From e2e0b5df1c41cfa68cbdd30adb5139bc758b3501 Mon Sep 17 00:00:00 2001
From: Todd Gamblin <tgamblin@llnl.gov>
Date: Sun, 7 Oct 2018 21:59:04 -0700
Subject: [PATCH] relicense: add `spack license` command

- `spack license list-files`: list all files that should have license headers
- `spack license list-lgpl`:  list files still under LGPL-2.1
- `spack license verify`:     check that license headers are correct

- Added `spack license verify` to style tests
---
 lib/spack/spack/cmd/license.py      | 156 ++++++++++++++++++++++++++++
 lib/spack/spack/test/cmd/license.py |  68 ++++++++++++
 share/spack/qa/run-flake8-tests     |   4 +
 3 files changed, 228 insertions(+)
 create mode 100644 lib/spack/spack/cmd/license.py
 create mode 100644 lib/spack/spack/test/cmd/license.py

diff --git a/lib/spack/spack/cmd/license.py b/lib/spack/spack/cmd/license.py
new file mode 100644
index 0000000000..feb82fe0ed
--- /dev/null
+++ b/lib/spack/spack/cmd/license.py
@@ -0,0 +1,156 @@
+# Copyright 2013-2018 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+from __future__ import print_function
+
+import os
+import re
+
+import llnl.util.tty as tty
+
+import spack.paths
+from spack.util.executable import which
+
+description = 'list and check license headers on files in spack'
+section = "developer"
+level = "long"
+
+#: need the git command to check new files
+git = which('git')
+
+#: SPDX license id must appear in the first <license_lines> lines of a file
+license_lines = 6
+
+#: Spack's license identifier
+apache2_mit_spdx = "(Apache-2.0 OR MIT)"
+
+#: regular expressions for licensed files.
+licensed_files = [
+    # spack scripts
+    r'^bin/spack$',
+    r'^bin/spack-python$',
+    r'^bin/sbang$',
+
+    # all of spack core
+    r'^lib/spack/spack/.*\.py$',
+    r'^lib/spack/spack/.*\.sh$',
+    r'^lib/spack/llnl/.*\.py$',
+    r'^lib/spack/env/cc$',
+
+    # rst files in documentation
+    r'^lib/spack/docs/.*\.rst$',
+    r'^lib/spack/docs/.*\.py$',
+
+    # 2 files in external
+    r'^lib/spack/external/__init__.py$',
+    r'^lib/spack/external/ordereddict_backport.py$',
+
+    # shell scripts in share
+    r'^share/spack/.*\.sh$',
+    r'^share/spack/.*\.bash$',
+    r'^share/spack/.*\.csh$',
+    r'^share/spack/qa/run-[^/]*$',
+
+    # all packages
+    r'^var/spack/repos/.*/package.py$'
+]
+
+#: licensed files that can have LGPL language in them
+#: so far, just this command -- so it can find LGPL things elsewhere
+lgpl_exceptions = [
+    r'lib/spack/spack/cmd/license.py',
+    r'lib/spack/spack/test/cmd/license.py',
+]
+
+
+def _all_spack_files(root=spack.paths.prefix):
+    """Generates root-relative paths of all files in the spack repository."""
+    for cur_root, folders, files in os.walk(root):
+        for filename in files:
+            path = os.path.join(cur_root, filename)
+            yield os.path.relpath(path, root)
+
+
+def _licensed_files(root=spack.paths.prefix):
+    for relpath in _all_spack_files(root):
+        if any(regex.match(relpath) for regex in licensed_files):
+            yield relpath
+
+
+def list_files(args):
+    """list files in spack that should have license headers"""
+    for relpath in _licensed_files():
+        print(os.path.join(spack.paths.spack_root, relpath))
+
+
+def verify(args):
+    """verify that files in spack have the right license header"""
+    errors = 0
+    missing = 0
+    old_license = 0
+
+    for relpath in _licensed_files(args.root):
+        path = os.path.join(args.root, relpath)
+        with open(path) as f:
+            lines = [line for line in f]
+
+        if not any(re.match(regex, relpath) for regex in lgpl_exceptions):
+            if any(re.match(r'^# This program is free software', line)
+                   for line in lines):
+                print('%s: has old LGPL license header' % path)
+                old_license += 1
+                continue
+
+        # how we'll find licenses in files
+        spdx_expr = r'SPDX-License-Identifier: ([^\n]*)'
+
+        # check first <license_lines> lines for required header
+        first_n_lines = ''.join(lines[:license_lines])
+        match = re.search(spdx_expr, first_n_lines)
+
+        if not match:
+            print('%s: no license header' % path)
+            missing += 1
+            continue
+
+        correct = apache2_mit_spdx
+        actual = match.group(1)
+        if actual != correct:
+            print("%s: labeled as '%s', but should be '%s'"
+                  % (path, actual, correct))
+            errors += 1
+            continue
+
+    if any([errors, missing, old_license]):
+        tty.die(
+            '%d improperly licensed files' % (errors + missing + old_license),
+            'files with no SPDX-License-Identifier:      %d' % missing,
+            'files with wrong SPDX-License-Identifier:   %d' % errors,
+            'files with old license header:              %d' % old_license)
+    else:
+        tty.msg('No license issues found.')
+
+
+def setup_parser(subparser):
+    sp = subparser.add_subparsers(metavar='SUBCOMMAND', dest='license_command')
+    sp.add_parser('list-files', help=list_files.__doc__)
+
+    verify_parser = sp.add_parser('verify', help=verify.__doc__)
+    verify_parser.add_argument(
+        '--root', action='store', default=spack.paths.prefix,
+        help='scan a different prefix for license issues')
+
+
+def license(parser, args):
+    if not git:
+        tty.die('spack license requires git in your environment')
+
+    licensed_files[:] = [re.compile(regex) for regex in licensed_files]
+
+    commands = {
+        'list-files': list_files,
+        'verify': verify,
+    }
+    return commands[args.license_command](args)
diff --git a/lib/spack/spack/test/cmd/license.py b/lib/spack/spack/test/cmd/license.py
new file mode 100644
index 0000000000..6294309c86
--- /dev/null
+++ b/lib/spack/spack/test/cmd/license.py
@@ -0,0 +1,68 @@
+# Copyright 2013-2018 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+import os.path
+import re
+
+from llnl.util.filesystem import touch, mkdirp
+
+import spack.paths
+from spack.main import SpackCommand
+
+license = SpackCommand('license')
+
+
+def test_list_files():
+    files = license('list-files').strip().split('\n')
+    assert all(f.startswith(spack.paths.prefix) for f in files)
+    assert os.path.join(spack.paths.bin_path, 'spack') in files
+    assert os.path.abspath(__file__) in files
+
+
+def test_verify(tmpdir):
+    source_dir = tmpdir.join('lib', 'spack', 'spack')
+    mkdirp(str(source_dir))
+
+    no_header = source_dir.join('no_header.py')
+    touch(str(no_header))
+
+    lgpl_header = source_dir.join('lgpl_header.py')
+    with lgpl_header.open('w') as f:
+        f.write("""\
+# Copyright 2013-2018 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: LGPL-2.1-only
+""")
+
+    old_lgpl_header = source_dir.join('old_lgpl_header.py')
+    with old_lgpl_header.open('w') as f:
+        f.write("""\
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License (as
+# published by the Free Software Foundation) version 2.1, February 1999.
+""")
+
+    correct_header = source_dir.join('correct_header.py')
+    with correct_header.open('w') as f:
+        f.write("""\
+# Copyright 2013-2018 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+""")
+
+    out = license('verify', '--root', str(tmpdir), fail_on_error=False)
+
+    assert str(no_header) in out
+    assert str(lgpl_header) in out
+    assert str(old_lgpl_header) in out
+    assert str(correct_header) not in out
+    assert '3 improperly licensed files' in out
+    assert re.search('files with no SPDX-License-Identifier:\s*1', out)
+    assert re.search('files with wrong SPDX-License-Identifier:\s*1', out)
+    assert re.search('files with old license header:\s*1', out)
+
+    assert license.returncode == 1
diff --git a/share/spack/qa/run-flake8-tests b/share/spack/qa/run-flake8-tests
index 1b9beb4d16..f98cdb919c 100755
--- a/share/spack/qa/run-flake8-tests
+++ b/share/spack/qa/run-flake8-tests
@@ -17,4 +17,8 @@
 . "$(dirname $0)/setup.sh"
 check_dependencies flake8
 
+# verify that the code style is correct
 spack flake8
+
+# verify that the license headers are present
+spack license verify
-- 
GitLab