Skip to content
Snippets Groups Projects
Commit 53ec16c9 authored by Peter Josef Scheibel's avatar Peter Josef Scheibel Committed by Peter Scheibel
Browse files

make license check more-strict

The license text is now expected to match almost exactly (not
accounting for formatting in different file types (e.g. rst vs.
bash script vs. python)
parent 7e947742
No related branches found
No related tags found
No related merge requests found
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
import os import os
import re import re
from collections import defaultdict
import llnl.util.tty as tty import llnl.util.tty as tty
...@@ -89,50 +90,103 @@ def list_files(args): ...@@ -89,50 +90,103 @@ def list_files(args):
print(os.path.join(spack.paths.spack_root, relpath)) print(os.path.join(spack.paths.spack_root, relpath))
# Error codes for license verification. All values are chosen such that
# bool(value) evaluates to True
OLD_LICENSE, SPDX_MISMATCH, GENERAL_MISMATCH = range(1, 4)
class LicenseError(object):
def __init__(self):
self.error_counts = defaultdict(int)
def add_error(self, error):
self.error_counts[error] += 1
def has_errors(self):
return sum(self.error_counts.values()) > 0
def error_messages(self):
total = sum(self.error_counts.values())
missing = self.error_counts[GENERAL_MISMATCH]
spdx_mismatch = self.error_counts[SPDX_MISMATCH]
old_license = self.error_counts[OLD_LICENSE]
return (
'%d improperly licensed files' % (total),
'files with wrong SPDX-License-Identifier: %d' % spdx_mismatch,
'files with old license header: %d' % old_license,
'files not containing expected license: %d' % missing)
def _check_license(lines, path):
license_lines = [
r'Copyright 2013-(?:201[789]|202\d) Lawrence Livermore National Security, LLC and other', # noqa: E501
r'Spack Project Developers\. See the top-level COPYRIGHT file for details.', # noqa: E501
r'SPDX-License-Identifier: \(Apache-2\.0 OR MIT\)'
]
strict_date = r'Copyright 2013-2019'
found = []
for line in lines:
line = re.sub(r'^[\s#\.]*', '', line)
line = line.rstrip()
for i, license_line in enumerate(license_lines):
if re.match(license_line, line):
# The first line of the license contains the copyright date.
# We allow it to be out of date but print a warning if it is
# out of date.
if i == 0:
if not re.search(strict_date, line):
tty.debug('{0}: copyright date mismatch'.format(path))
found.append(i)
if len(found) == len(license_lines) and found == list(sorted(found)):
return
def old_license(line, path):
if re.search('This program is free software', line):
print('{0}: has old LGPL license header'.format(path))
return OLD_LICENSE
# If the SPDX identifier is present, then there is a mismatch (since it
# did not match the above regex)
def wrong_spdx_identifier(line, path):
m = re.search(r'SPDX-License-Identifier: ([^\n]*)', line)
if m and m.group(1) != apache2_mit_spdx:
print('{0}: SPDX license identifier mismatch'
'(expecting {1}, found {2})'
.format(path, apache2_mit_spdx, m.group(1)))
return SPDX_MISMATCH
checks = [old_license, wrong_spdx_identifier]
for line in lines:
for check in checks:
error = check(line, path)
if error:
return error
print('{0}: the license does not match the expected format'.format(path))
return GENERAL_MISMATCH
def verify(args): def verify(args):
"""verify that files in spack have the right license header""" """verify that files in spack have the right license header"""
errors = 0
missing = 0 license_errors = LicenseError()
old_license = 0
for relpath in _licensed_files(args.root): for relpath in _licensed_files(args.root):
path = os.path.join(args.root, relpath) path = os.path.join(args.root, relpath)
with open(path) as f: with open(path) as f:
lines = [line for line in f] lines = [line for line in f][:license_lines]
if not any(re.match(regex, relpath) for regex in lgpl_exceptions): error = _check_license(lines, path)
if any(re.match(r'^# This program is free software', line) if error:
for line in lines): license_errors.add_error(error)
print('%s: has old LGPL license header' % path)
old_license += 1 if license_errors.has_errors():
continue tty.die(*license_errors.error_messages())
# how we'll find licenses in files
spdx_expr = r'SPDX-License-Identifier: ([^\n]*)'
# check first <license_lines> lines for required header
first_n_lines = ''.join(lines[:license_lines])
match = re.search(spdx_expr, first_n_lines)
if not match:
print('%s: no license header' % path)
missing += 1
continue
correct = apache2_mit_spdx
actual = match.group(1)
if actual != correct:
print("%s: labeled as '%s', but should be '%s'"
% (path, actual, correct))
errors += 1
continue
if any([errors, missing, old_license]):
tty.die(
'%d improperly licensed files' % (errors + missing + old_license),
'files with no SPDX-License-Identifier: %d' % missing,
'files with wrong SPDX-License-Identifier: %d' % errors,
'files with old license header: %d' % old_license)
else: else:
tty.msg('No license issues found.') tty.msg('No license issues found.')
......
...@@ -61,7 +61,7 @@ def test_verify(tmpdir): ...@@ -61,7 +61,7 @@ def test_verify(tmpdir):
assert str(old_lgpl_header) in out assert str(old_lgpl_header) in out
assert str(correct_header) not in out assert str(correct_header) not in out
assert '3 improperly licensed files' in out assert '3 improperly licensed files' in out
assert re.search(r'files with no SPDX-License-Identifier:\s*1', out) assert re.search(r'files not containing expected license:\s*1', out)
assert re.search(r'files with wrong SPDX-License-Identifier:\s*1', out) assert re.search(r'files with wrong SPDX-License-Identifier:\s*1', out)
assert re.search(r'files with old license header:\s*1', out) assert re.search(r'files with old license header:\s*1', out)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment