Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create lit test for copyright header validation #15296

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
validation-test/copyright: migrated bash script to python
Script that changes copyright headers
 to conform to their git creation/modification dates
  • Loading branch information
purarue authored and AndrewSB committed Jun 28, 2022
commit af94ef28cb91c1ffbd184d580c6b790a4df6d69b
226 changes: 226 additions & 0 deletions utils/copyright-conformance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
#!/usr/bin/env python3
# This test validates that all source files have correct copyright headers.
# Ignores any files given that don't have a copyright notice
# Only changes copyright headers for files whose years don't conform
# to when they were last modified according to git modification date

import sys
import re
import datetime
import shlex
import subprocess
import argparse

import os.path
from distutils.util import strtobool

# Regex to match different types of Copright headers
# matches years from 1000-2999
COPYRIGHT_EXISTS_PATTERN = r'Copyright.*\d{4}'
COPYRIGHT_SINGLE_YEAR_PATTERN = r'[12][0-9]{3}'
COPYRIGHT_RANGE_PATTERN = r'[12][0-9]{3} - [12][0-9]{3}'
COPYRIGHT_LITERAL_PATTERN = r'Copyright \(c\) '

DEBUG = False
VERBOSE = False


def get_parser():
parser = argparse.ArgumentParser(description='Validates that all source files given on command line \
have correct copyright headers, based on git modification dates. \
Note that this edits the modification date.')
required = parser.add_argument_group('required arguments')
required.add_argument('-f', nargs='+', help='One or more files/directories to check', required=True)
parser.add_argument('-r', '--recursive', help='Recursively search through directories', action="store_true", default=False, required=False)
parser.add_argument('-d', '--debug', help="Print files whose copyright headers could not be found, or don't exist", action='store_true', default=False, required=False)
parser.add_argument('-v', '--verbose', help="Print additional info on when copyright headers are conformant, or when files are patched", action='store_true', default=False, required=False)
parser.add_argument('-p', '--patch', help='Replace non-conforming copyright headers with the correct years', action="store_true", default=False, required=False)
parser.add_argument('-i', '--interactive', help='Shows copyright notice before and after change. Asks for user confirmation when patching files', action="store_true", default=False, required=False)
return parser


def debug(message):
global DEBUG
if DEBUG:
print(message)


def verbose(message):
global VERBOSE
if VERBOSE:
print(message)


def discover_files(filepaths, recursive_opt):
"""Make sure all files given exist, and discover any files in folders if we're discovering files recursively."""
# TODO: Make paths relative to swift source directory
discovered_files = set()

i = 0
while i < len(filepaths):
f = os.path.abspath(filepaths[i])
# exit if files given on command line don't exist
if not os.path.exists(f):
print(f"Unexpected error: No such file or directory {f}", file=sys.stderr)
sys.exit(1)

if os.path.isdir(f):
if not recursive_opt:
print(f"You passed in a directory ({f}), without specifying a recursive search. That doesn't make sense because we can't check a directory for conformance. Either omit the directory, or add the --recursive flag.", file=sys.stderr)
sys.exit(1)
else:
dir_contents = [os.path.join(f, path) for path in os.listdir(f)]
filepaths.extend(dir_contents)
else:
discovered_files.add(f)

i += 1
return list(discovered_files)


def has_copyright_notice(candidate_filepath):
"""Checks if a file has a copyright notice"""
with open(candidate_filepath, "r") as f:
contents = f.read()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line can fail. We should catch any error that happens here and print out the candidate_filepath

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yup, i see. it looks like some non-unicode files have been added to the tree in the last 4 years

i've pushed a change that skips all files that fail to read with a non-unicode error

matches = re.findall(COPYRIGHT_EXISTS_PATTERN, contents)

has_match = len(matches) > 0
if has_match:
verbose(f"Found copyright notice for {candidate_filepath}: {matches[0]}")
else:
debug(f"Could not find copyright notice for {candidate_filepath}")

return has_match


def get_expected_copyright_notice(filepath):
"""Returns the expected copyright notice for a file based on the git commit history."""
# get the year this file was created/last modified
out = subprocess.check_output(shlex.split(f'git log --format="%ad" --date="format:%Y" -- {filepath}')).decode("utf-8")
years = out.splitlines()
# if this a new file, and has no git commit history, the copyright notice should have the current year
if len(years) == 0:
current_year = datetime.datetime.now().year
expected_created_year, expected_last_modified_year = current_year, current_year
# if git log returned information on when this file was created/last modified
else:
expected_created_year = years[-1]
expected_last_modified_year = years[0]

if expected_created_year == expected_last_modified_year:
return "Copyright (c) {}".format(expected_created_year)
else:
return "Copyright (c) {} - {}".format(expected_created_year, expected_last_modified_year)


def validate_copyright_notice(filepath, copyright_notice, expected_copyright_notice):
"""Checks whether copyright notice is conformant, tells user if any copyright headers should be changed."""
if copyright_notice == expected_copyright_notice:
verbose("Copyright notice for {} is conformant.".format(filepath))
return None
else:
print("Non-Conforming copyright notice for {}. Copyright notice is '{}', should be '{}'.".format(
filepath, copyright_notice, expected_copyright_notice
))
return expected_copyright_notice


def copyright_notice_is_conformant(filepath):
"""Checks the dates in the copyright notice against the git created and last modified dates.
If copyright notice is non-conforming, returns what it should be changed to.
Returns None if the copyright notice has the correct years and nothing has to be changed."""

expected_copyright_notice = get_expected_copyright_notice(filepath)

# get file contents
with open(filepath, "r") as f:
contents = f.read()

# test contents against regex matching a range of years, a single year, and a final regex for any other spacing issues.
range_match = re.search(f"{COPYRIGHT_LITERAL_PATTERN}{COPYRIGHT_RANGE_PATTERN}", contents)
single_match = re.search(f"{COPYRIGHT_LITERAL_PATTERN}{COPYRIGHT_SINGLE_YEAR_PATTERN}", contents)
exists_match = re.search(f"{COPYRIGHT_EXISTS_PATTERN}", contents)

if range_match:
return validate_copyright_notice(filepath, range_match.group(0), expected_copyright_notice)
elif single_match:
return validate_copyright_notice(filepath, single_match.group(0), expected_copyright_notice)
else:
return validate_copyright_notice(filepath, exists_match.group(0), expected_copyright_notice)


def get_user_confirmation():
"""Asks user for confirmation till they give a valid response."""
while True:
try:
user_input = input('> ')
return strtobool(user_input)
except ValueError as v:
print("Could not interpret {} as a response.".format(user_input))
print("True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values are 'n', 'no', 'f', 'false', 'off', and '0'")


def find_copyright_line(lines):
"""Returns the line number and match object for the first line which matches the Copyright Notice"""
for number, line in enumerate(lines):
match = re.search(COPYRIGHT_EXISTS_PATTERN, line)
if match:
return number, match


def patch(non_conforming_filepath, correct_copyright_notice, interactive_opt):
"""Patches the non conforming copyright notice - overwrites that line in the file"""

# get file contents
with open(non_conforming_filepath, "r") as f:
lines = f.readlines()

write_to_file = True
line_number, match = find_copyright_line(lines)
conforming_line = lines[line_number].replace(match.group(0), correct_copyright_notice, 1)

# if in interactive mode, ask user for confirmation
if interactive_opt:
print("Line:\n{}Conforming Line:\n{}Replace this line in the file?".format(
lines[line_number], conforming_line
))
write_to_file = get_user_confirmation()
lines[line_number] = conforming_line

if write_to_file:
verbose("Patching file {} with correct copyright notice.".format(non_conforming_filepath))
with open(non_conforming_filepath, "w") as f:
f.writelines(lines)


def main():
global DEBUG
global VERBOSE
parser = get_parser()
args = parser.parse_args()
DEBUG = args.debug
VERBOSE = args.verbose
if VERBOSE:
DEBUG = True
if args.patch:
# if there are modified files tracked by git, shouldn't patch anything.
# git status --porcelain will return a list of files that are untracked or have been modified
out = subprocess.check_output(shlex.split("git status --porcelain")).decode("utf-8")
# if there was output, can't call patch
if out.strip():
print("There are modified files in the git directory - Cannot patch files. Commit any changes and try again.", file=sys.stderr)
sys.exit(2)
# discover any files in subdirectories if we're searching recursively
files = discover_files(args.f, args.recursive)
# filter filepaths - remove any paths which don't have a copyright notice
copyright_notice_files = [f for f in files if has_copyright_notice(f)]
for f in copyright_notice_files:
correct_copyright_notice = copyright_notice_is_conformant(f)
# if the copyright notice should be changed
if correct_copyright_notice is not None:
# if we should be replacing headers in files
if args.patch:
patch(f, correct_copyright_notice, args.interactive)

if __name__ == "__main__":
main()
3 changes: 3 additions & 0 deletions validation-test/Python/copyright-conformance.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Checks to make sure all copyright headers conform with their
// git creation/last modification date for silly legal reasons.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove the text "for silly legal reasons". Only lawyers are allowed to opine on whether legal issues are silly or not. 😄

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

quite sage

committed 👍

// RUN: python3 %utils/copyright-conformance.py -rf %swift_obj_root
145 changes: 0 additions & 145 deletions validation-test/copyright.test-sh

This file was deleted.