-
Notifications
You must be signed in to change notification settings - Fork 146
/
apply_legal_header.py
executable file
·470 lines (401 loc) · 16.2 KB
/
apply_legal_header.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
#!/usr/bin/env python
# -*- python -*-
#BEGIN_LEGAL
#
#Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http:https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#END_LEGAL
from __future__ import print_function
from pathlib import Path
import sys
import os
import re
import datetime
from stat import *
import argparse
from collections import defaultdict, Counter
import subprocess
XED_ROOT = Path(__file__).parents[1]
TEMPLATE_PATH = Path(XED_ROOT, 'misc','legal-header.txt')
CURRENT_YEAR = datetime.date.today().strftime("%Y")
# ignore last modified year for files that only got their headers changed
HEADER_SKIP_COMMITS = ['5d594f8b09224e77524098bbac43e8f7f680d9f9']
def print_final_lines(output):
print('[FINAL COMMAND OUTPUT]')
for line in output:
print("\t" + line)
def die(s):
sys.stdout.write("ERROR: {0}\n".format(s))
sys.exit(1)
def cond_die(v, cmd, msg, lines=[]):
if v:
if lines:
print_final_lines(lines)
s = msg + "\n [CMD] " + cmd
die(s)
def ensure_string(x):
"""handle non unicode output"""
if isinstance(x, bytes):
try:
return x.decode('utf-8')
except:
return ''
return x
def run_subprocess(cmd, **kwargs):
"""front end to running subprocess"""
sub = subprocess.Popen(cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
**kwargs)
lines = sub.stdout.readlines()
lines = [ensure_string(x) for x in lines]
sub.wait()
return sub.returncode, lines
def run_shell_command(cmd, **kwargs):
"""INPUT: string with all args. OUTPUT: return the exit status, or die trying..."""
try:
(returncode, lines) = run_subprocess(cmd, **kwargs)
cond_die(returncode, cmd,
"Child was terminated by signal {0:d}".format(-returncode), lines)
return (returncode, lines)
except OSError as e:
die("Execution failed: {0}".format(str(e)))
def get_mode(fn):
"get the mode of the file named fn, suitable for os.chmod() or open() calls"
mode = os.stat(fn)[ST_MODE]
cmode = S_IMODE(mode)
return cmode
def replace_original_with_new_file(file,newfile):
"Replace file with newfile"
# os.system(" mv -f %s %s" % ( newfile, file))
os.unlink(file)
os.rename(newfile,file)
def remove_existing_header(contents):
"remove existing legal header, if any"
retval = []
skipping = False
start_pattern = re.compile(r"^(/[*][ ]*BEGIN_LEGAL)|(#[ ]*BEGIN_LEGAL)")
stop_pattern = re.compile(r"^[ ]*(END_LEGAL[ ]?[*]/)|(#[ ]*END_LEGAL)")
for line in contents:
if start_pattern.match(line):
skipping = True
if skipping == False:
retval.append(line)
if stop_pattern.match(line):
skipping = False
return retval
def prepend_script_comment(header):
"Apply script comment marker to each line"
retval = []
for line in header:
retval.append( "#" + line )
return retval
def replace_curr_year(file, year=''):
"""replaces the general year from the template with the given year.
If not specified, it will be replaced with the current year."""
if not year:
year = CURRENT_YEAR
updated_file = [sub.replace("<CURRENT_YEAR>", year) for sub in file]
return updated_file
def apply_header_to_source_file(header, file, year=''):
"apply header to file using C++ comment style"
f = open(file,"r")
mode = get_mode(file)
contents = f.readlines()
f.close()
trimmed_contents = remove_existing_header(contents)
newfile = file + ".new"
curr_header = replace_curr_year(header, year)
o = open(newfile,"w")
o.write("/* BEGIN_LEGAL \n")
o.writelines(curr_header)
o.write("END_LEGAL */\n")
o.writelines(trimmed_contents)
o.close()
os.chmod(newfile,mode)
replace_original_with_new_file(file,newfile)
# FIXME: this will flag files that have multiline C-style comments
# with -*- in them even though the splitter will not look for the
# comment properly
def shell_script(lines):
"""return true if the lines are the start of shell script or
something that needs a mode comment at the top"""
first = ""
second = ""
if len(lines) > 0:
first = lines[0];
if len(lines) > 1:
second = lines[1];
if re.match("#!",first):
#print "\t\t First script test true"
return True
if re.search("-\*-",first) or re.search("-\*-",second):
#print "\t\t Second script test true"
return True
return False
def split_script(lines):
"Return a tuple of (header, body) for shell scripts, based on an input line list"
header = []
body = []
f = lines.pop(0)
while re.match("#",f) or re.search("-\*-",f):
header.append(f)
f = lines.pop(0)
# tack on the first non matching line from the above loop
body.append(f);
body.extend(lines);
return (header,body)
def write_script_header(o,lines):
"Write the file header for a script"
o.write("#BEGIN_LEGAL\n")
o.writelines(lines)
o.write("#END_LEGAL\n")
def apply_header_to_data_file(header, file, year=''):
"apply header to file using script comment style"
f = open(file,"r")
mode = get_mode(file)
#print "file: " + file + " mode: " + "%o" % mode
contents = f.readlines()
f.close()
trimmed_contents = remove_existing_header(contents)
newfile = file + ".new"
curr_header = replace_curr_year(header, year)
o = open(newfile,"w")
augmented_header = prepend_script_comment(curr_header)
if shell_script(trimmed_contents):
(script_header, script_body) = split_script(trimmed_contents)
o.writelines(script_header)
write_script_header(o, augmented_header)
o.writelines(script_body)
else:
write_script_header(o,augmented_header)
o.writelines(trimmed_contents)
o.close()
os.chmod(newfile,mode)
replace_original_with_new_file(file,newfile)
def skip_file(file):
"""determines if we need to skip legal header change"""
f = Path(file).resolve(strict=True)
if f.is_dir(): # skip directories (e.g. submodule changes)
return True
skip_dirs = ['tests/resync',
'.github/actions/create-pull-request', # Borrowed action
]
skip_dirs = [Path(XED_ROOT, d).resolve() for d in skip_dirs] # convert to Path list
dir = f.parent
for skip_d in skip_dirs:
if Path(os.path.commonpath([dir, skip_d])) == skip_d:
return True
skip_suffix = ['.pdf', '.msi', '.sln', '.vcproj', '.vcxproj', '.filters',
'.xsl', '.rtf', '.reference', '.rc', '.doc', '.html',
'.docx', '.msm', '.ico', '.bmp', '.exe', '.a', '.lib', '.csv', '.bz2',
'.zip', '.csproj', '.json', '.js', '.xz', '.TESTS', '.pyc', '.md', '.in']
# Path().suffixes return a list of the final component's suffixes, if any.
# check if the intersection with skip_suffix is empty or not
if set(f.suffixes).intersection(skip_suffix):
return True
# skip specific files
skip_list = ['misc/legal-header.txt', 'misc/API.NAMES.txt', 'misc/ci-branches.txt',
'.github/workflows/exclude_external.txt', 'LICENSE', 'scripts/run-pylint-scan',
'examples/xed-rc-template.txt', 'docsrc/xed-doxygen-header.txt']
for se in skip_list:
sf = Path(se).resolve()
if sf.exists() and f.samefile(sf):
return True
#skip pattern
skip_patterns = ['*.gitignore', '*README.*', 'tests/test*/*', 'tests/test*/**/*']
for pattern in skip_patterns:
if f.match(pattern):
return True
return False
def replace_headers(files, year=''):
"""Replaces legal headers for the given files with the specified legal header template"""
source_files, data_files = [], []
for file_name in files:
if skip_file(file_name): # need to skip tests & legal header template for instance
print(f"skipped file: {file_name}")
elif re.search(r'[.][chp]$|(cpp)$',file_name, re.IGNORECASE):
source_files.append(file_name)
else:
data_files.append(file_name) # includes python scripts
# opens legal header template
with TEMPLATE_PATH.open('r') as header_template:
legal_header = header_template.readlines()
for f in source_files:
apply_header_to_source_file(legal_header, f, year)
for f in data_files:
apply_header_to_data_file(legal_header, f, year)
def get_years_from_copyrights(copyrights):
"""Returns the year from a given copyright string. If not found, return None."""
regx = '.*copyright.* (\d+\d+)[ ,].*'
re_obj = re.compile(regx, re.IGNORECASE)
match = re_obj.search(copyrights)
if match:
try:
found_year = match.group(1)
return found_year
except: pass
return None
def get_last_modified_data(file):
"""Retrieves the file's last modified year and commit hash."""
cmd = f'git log -1 --format="%ad %H" {file}'
_, out = run_shell_command(cmd)
try:
commit_info = out[0].split() # out has format D M DATE H {YEAR} TZ {COMMIT_HASH}
modified_year = commit_info[4]
commit_hash = commit_info[6]
except:
modified_year = CURRENT_YEAR # in case file was just added
commit_hash = ''
return modified_year, commit_hash
def check_years(curr_header, file, issues):
"""Checks if the current header's year matches the year it was last modified in."""
found_year = get_years_from_copyrights(curr_header)
if not found_year:
issues[file] = f'Copyright year was not found.'
return False
excpected_year, file_cmt_hash = get_last_modified_data(file)
if int(found_year) != int(excpected_year):
if file_cmt_hash not in HEADER_SKIP_COMMITS:
issues[file] = f'Bad copyright years - found "{found_year}", expected "{excpected_year}."'
return False
return True
def remove_year_from_header(copyright_lines):
"""Removes the line with the year from the legal header."""
regx= '.*Copyright \(C\) .* intel corporation.*'
re_obj = re.compile(regx, re.IGNORECASE)
match = re_obj.search(copyright_lines)
if match:
years_start, years_end = match.span()
copyright_lines = copyright_lines[:years_start] + copyright_lines[years_end:]
return copyright_lines
def get_clean_lines(copyright_text):
"""Removes comments from copyright text."""
copyright_lines = copyright_text.splitlines()
copyright_lines = list(map(lambda x: x.strip('#*/'), copyright_lines))
copyright_lines = list(map(lambda x: x.strip(), copyright_lines))
return '\n'.join(copyright_lines)
def get_copyright_from_file(file_content):
"""Extracts the copyright text from the file content."""
match_e = re.compile('.*END_LEGAL.*(\n)*', re.IGNORECASE).search(file_content)
match_b = re.compile('.*BEGIN_LEGAL.*(\n)*', re.IGNORECASE).search(file_content)
copyright_lines = file_content[match_b.start():match_e.end()]
return copyright_lines
def check_copyright_text(file_content, file, issues):
"""Checks whether the legal header of the specified file matches the legal header template."""
with open(TEMPLATE_PATH) as f:
expected_header = f.read().strip()
expected_header = remove_year_from_header(expected_header)
copyright_lines = get_copyright_from_file(file_content) # current header
copyright_lines = get_clean_lines(copyright_lines) # remove comments
copyright_lines = remove_year_from_header(copyright_lines)
# truncate whitespaces and remove empty new lines and BEGIN_LEGAL/END_LEGAL
copyright_lines = copyright_lines.strip().split()
expected_header = expected_header.strip().split()
del copyright_lines[0], copyright_lines[-1]
if Counter(copyright_lines) != Counter(expected_header):
issues[file] = 'Wrong copyright header found.'
return False
return True
def check_header_existence(file_content, file, issues):
"""Checks whether the legal header exists in the file."""
start_hdr_pattern = ('.*BEGIN_LEGAL.*(\n)*'
'(:?.+\n)*'
'.*Copyright \(C\) (\d+) intel corporation.*\n')
match = re.compile(start_hdr_pattern, re.IGNORECASE).search(file_content)
if not match:
issues[file] = 'Intel legal string not found.'
return match
def print_copyright_errors(issues):
"""Prints scanned files' copyright errors."""
if issues:
print('======== Copyright Errors ========')
for file, text in issues.items():
print(f'{file} : {text}')
def check_copyrights(files, suppress_print=False):
"""Checks if the legal headers of the given files are appropriate; this includes year and text checks."""
issues ={}
for file in files:
if skip_file(file):
if not suppress_print:
print(f'skipping {file}')
continue
try:
with open(file) as file_obj:
file_content = ''.join(file_obj.readlines()[:100])
except Exception as e:
issues[file] = f'Can not read from file: {e}'
continue
if not check_header_existence(file_content, file, issues):
continue
elif not check_years(file_content, file, issues):
continue
elif not check_copyright_text(file_content, file, issues):
continue
print_copyright_errors(issues)
return issues
def validate_copyrights(files):
"""Returns the number of issues with the files to be scanned."""
issues = check_copyrights(files, suppress_print=True)
return len(issues)
def replace_copyrights(files2scan):
"""Replaces copyright headers for files2scan"""
issues = check_copyrights(files2scan, suppress_print=True)
# only replace files with invalid legal headers
files2replace = list(issues.keys())
files_by_year = defaultdict(list) # partition the files into lists of files by last modification year
for file in files2replace:
last_mod_year, file_cmt_hash = get_last_modified_data(file)
files_by_year[last_mod_year].append(file)
for modified_year, files in files_by_year.items():
replace_headers(files, modified_year)
return 0
def retrieve_files(dir):
"""Returns a list of files in the given dir in the index and the working tree (excluding git-ignored files)"""
cmd = f"git ls-files --directory {dir}"
_, out = run_shell_command(cmd)
files =[file.strip() for file in out]
return files
def setup():
"""This function sets up the script env according to cmd line knobs."""
parser = argparse.ArgumentParser(description='XED Copyrights Checker')
parser.add_argument("--dir",
action="store",
dest="dir",
default=os.getcwd(),
help="directory to scan for copyrights")
# defaultly changes legal header (don't use -validate-headers to change the headers)
parser.add_argument("--validate-only",
action="store_true",
dest="validate_only",
default=False,
help="Only validate legal headers. Do not change the files' headers.")
env = vars(parser.parse_args())
return env
def main():
env = setup()
files_for_scan = []
files = retrieve_files(env['dir'])
for fname in files:
fname = str(Path(fname).resolve(strict=True))
files_for_scan.append(fname)
if env['validate_only']:
retval = validate_copyrights(files_for_scan)
else:
retval = replace_copyrights(files_for_scan)
sys.exit(retval)
if __name__ == '__main__':
main()