diff --git a/.github/scripts/common/header_checker.py b/.github/scripts/common/header_checker.py index 6888694069..9aca62ef9b 100755 --- a/.github/scripts/common/header_checker.py +++ b/.github/scripts/common/header_checker.py @@ -1,67 +1,219 @@ #!/usr/bin/env python3 +# /* +# * FreeRTOS V202104.00 +# * Copyright (C) 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# * +# * Permission is hereby granted, free of charge, to any person obtaining a copy of +# * this software and associated documentation files (the "Software"), to deal in +# * the Software without restriction, including without limitation the rights to +# * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# * the Software, and to permit persons to whom the Software is furnished to do so, +# * subject to the following conditions: +# * +# * The above copyright notice and this permission notice shall be included in all +# * copies or substantial portions of the Software. +# * +# * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# * +# * https://www.FreeRTOS.org +# * https://github.com/FreeRTOS +# * +# */ -import os, sys, re +import os +import re from argparse import ArgumentParser from difflib import unified_diff from json import load + +import requests from colorama import Fore, Style + def dprint(msg): - print('[DEBUG]: %s' % str(msg)) + print("[DEBUG]: %s" % str(msg)) + class HeaderChecker: - def __init__(self, header, padding=1000, ignored_files=[], ignored_ext=[], ignored_patterns=[]): + def separateHeaderIntoSections(self, header): + """ + Separate header into text, copyright, and spdx sections. + """ + cur_headers = dict() + cur_headers["text"] = [] + cur_headers["copyright"] = [] + cur_headers["spdx"] = [] + for line in header: + if "Copyright" in line: + cur_headers["copyright"].append(line) + elif "SPDX-License-Identifier:" in line: + cur_headers["spdx"].append(line) + else: + cur_headers["text"].append(line) + return cur_headers + + def __init__( + self, + header, + padding=1000, + ignored_files=None, + ignored_ext=None, + ignored_patterns=None, + py_ext=None, + asm_ext=None, + third_party_patterns=None, + ): self.padding = padding self.header = header - self.ignorePatternList = ignored_patterns.copy() - self.ignoreFileList = ignored_files.copy() - self.ignoreExtList = ignored_ext.copy() + # Construct mutated header for assembly files + self.asm_header = [";" + line for line in header] + + # Construct mutated header for python files + self.py_header = ["#" + line for line in header] + + self.headers = self.separateHeaderIntoSections(self.header) + self.headers_asm = self.separateHeaderIntoSections(self.asm_header) + self.headers_py = self.separateHeaderIntoSections(self.py_header) + + self.ignorePatternList = [] + if ignored_patterns: + for p in ignored_patterns: + self.ignorePatternList.append(re.compile(p)) + + self.ignoreFileList = ignored_files.copy() if ignored_files else [] + self.ignoreExtList = ignored_ext.copy() if ignored_ext else [] + self.pyExtList = py_ext.copy() if py_ext else [] + self.asmExtList = asm_ext.copy() if asm_ext else [] + + self.thirdPartyPatternList = [] + if third_party_patterns: + for p in third_party_patterns: + self.thirdPartyPatternList.append(re.compile(p)) + + self.spdx_data = None + self.sdpx_regex = None def checkJSONList(self, path_json): - ''' + """ This is particularly useful when ingesting output from other programs, like git actions - ''' - assert os.path.exists(path_json), 'No such file: ' + path_json + """ + assert os.path.exists(path_json), "No such file: " + path_json # Get list of files to check from JSON file with open(path_json) as file_json: file_checklist = load(file_json) - assert isinstance(file_checklist, list), 'Expected list for singular JSON List entry' + assert isinstance( + file_checklist, list + ), "Expected list for singular JSON List entry" # Accrue how how files fail the check n_failed = 0 for path_file in file_checklist: - assert isinstance(path_file, str), 'Unexpected JSON format for ' + path_json + assert isinstance(path_file, str), "Unexpected JSON format for " + path_json if os.path.exists(path_file) and not self.isValidFile(path_file): n_failed += 1 return n_failed + def isValidHeaderSection(self, file_ext, section_name, section): + """Validate a given section based on file extentions and section name""" + valid = False + if file_ext in self.pyExtList: + valid = self.headers_py[section_name] == section + elif file_ext in self.asmExtList: + valid = self.headers[section_name] == section + valid |= self.headers_asm[section_name] == section + else: + valid = self.headers[section_name] == section + + return valid + + def getHeaderDiff(self, file_ext, header): + diff = [] + if file_ext in self.pyExtList: + diff = list(unified_diff(header[: len(self.py_header)], self.py_header)) + elif file_ext in self.asmExtList: + # For assembly files and headers, calculate diffs between both header types. + # Return the smallest diff. + diff_default = list(unified_diff(header[: len(self.header)], self.header)) + diff_asm = list( + unified_diff(header[: len(self.asm_header)], self.asm_header) + ) + if len(diff_asm) >= len(diff_default): + diff = diff_default + else: + diff = diff_asm + else: + diff = list(unified_diff(header[: len(self.header)], self.header)) + + return diff + def isValidFile(self, path): - assert os.path.exists(path), 'No such file: ' + path - print('-------------------------------------------------------------------------------------') + assert os.path.exists(path), "No such file: " + path + print("-" * 85) + + print("Checking file: %s..." % path, end="") - print('Checking file: %s...' % path, end='') + file_ext = os.path.splitext(path)[-1] if self.isIgnoredFile(path) or os.path.isdir(path): - print('SKIP') - print('-------------------------------------------------------------------------------------') + print("SKIP") + print("-" * 85) return True - # Don't need entire file. Read sufficiently large chunk of file that should contain the header - with open(path, encoding='utf-8', errors='ignore') as file: - chunk = file.read(len(''.join(self.header)) + self.padding) - lines = [('%s\n' % l) for l in chunk.strip().splitlines()][:len(self.header)] - if self.header == lines: - print('PASS') - print('-------------------------------------------------------------------------------------') + # Read sufficiently large chunk of the file which should contain the header + with open(path, encoding="utf-8", errors="ignore") as file: + chunk = file.read(len("".join(self.header)) + self.padding) + lines = [("%s\n" % line) for line in chunk.strip().splitlines()][ + : len(self.header) + ] + if (len(lines) > 0) and (lines[0].find("#!") == 0): + lines.remove(lines[0]) + + # Split lines in sections: + header_check = self.separateHeaderIntoSections(lines) + + text_equal = self.isValidHeaderSection( + file_ext, "text", header_check["text"] + ) + copyright_equal = self.isValidHeaderSection( + file_ext, "copyright", header_check["copyright"] + ) + spdx_equal = self.isValidHeaderSection( + file_ext, "spdx", header_check["spdx"] + ) + + # Most files should have exactly the same sections + if text_equal and copyright_equal and spdx_equal: + print("PASS") + print("-" * 85) return True + # Third party files may have a different copyright + elif self.isThirdPartyFile(path) and text_equal and spdx_equal: + print("PASS") + print("-" * 85) + return True + elif self.isThirdPartyFile(path) and self.validateSpdxLine( + header_check["spdx"] + ): + print("PASS") + print("-" * 85) + return True + elif self.isThirdPartyFile(path): + print("FAIL") + print("-" * 85) + return False else: - print('FAIL') - print('File Delta: %s' % path) - print(*unified_diff(lines[:len(self.header)], self.header)) - print('-------------------------------------------------------------------------------------') + print("FAIL") + print("File Delta: %s" % path) + print(*self.getHeaderDiff(file_ext, lines)) + print("-" * 85) return False def ignoreExtension(self, *args): @@ -76,69 +228,91 @@ class HeaderChecker: for p in args: self.ignorePatternList.append(re.compile(p)) + def addPyExtension(self, *args): + for p in args: + self.pyExtList.append(p) + + def addAsmExtension(self, *args): + for p in args: + self.asmExtList.append(p) + def isIgnoredFile(self, path): - ''' + """ There are multiple ways a file can be ignored. This is a catch all - ''' - assert os.path.exists(path), 'No such file: ' + path - + """ + assert os.path.exists(path), "No such file: " + path # Try simpler checks first - filename = os.path.split(path)[-1] + filename = os.path.split(path)[-1] extension = os.path.splitext(filename)[-1] if extension in self.ignoreExtList or filename in self.ignoreFileList: return True # Then iterate against regex patterns. In future consider Trie for pattern in self.ignorePatternList: + # print(pattern) if pattern.match(path): return True + # print("DEBUG: did not match any regex") + + return False + + def isThirdPartyFile(self, path): + # Tterate against regex patterns + for pattern in self.thirdPartyPatternList: + if pattern.match(path): + return True return False def showHelp(self, path_config): print(Fore.YELLOW) print( - "\n\n" - "*************************************************************************************************\n" - "* FreeRTOS Header Check %s(FAILED)%s *\n" - "*************************************************************************************************\n" - "* *\n" - "* %sWe do NOT require that all files contain the FreeRTOS File Header (copyright + license).%s *\n" - "* While some files in this change-set don't adhere with the FreeRTOS File Header, *\n" - "* they can be omitted from this check as needed. *\n" - "* *\n" - "* The Git PR check sources its scripts from your fork. *\n" - "* For FreeRTOS/FreeRTOS, ignored files are listed in '.github/scripts/core_checker.py' *\n" - "* For FreeRTOS/FreeRTOS-Kernel, ignored files are listed in '.github/scripts/kernel_checker.py' *\n" - "* *\n" - "* Please fix any offending files that should have the FreeRTOS header, *\n" - "* or add new files to the ignore list as needed to make the check pass. *\n" - "* *\n" - "* %sInclude the required updates to the '*_checker.py' script in your PR to make the check pass.%s *\n" - "*************************************************************************************************\n" - "\n\n" - % (Fore.RED, Fore.YELLOW, - Fore.RED, Fore.YELLOW, - Fore.RED, Fore.YELLOW) - ) + "\n\n \ + *************************************************************************************************\n\ + * FreeRTOS Header Check %s(FAILED)%s *\n\ + *************************************************************************************************\n\ + * *\n\ + * %sWe do NOT require that all files contain the FreeRTOS File Header (copyright + license).%s *\n\ + * While some files in this change-set don't adhere with the FreeRTOS File Header, *\n\ + * they can be omitted from this check as needed. *\n\ + * *\n\ + * The Git PR check sources its scripts from your fork. *\n\ + * For FreeRTOS/FreeRTOS, ignored files are listed in '.github/scripts/core_checker.py' *\n\ + * For FreeRTOS/FreeRTOS-Kernel, ignored files are listed in '.github/scripts/kernel_checker.py' *\n\ + * *\n\ + * Please fix any offending files that should have the FreeRTOS header, *\n\ + * or add new files to the ignore list as needed to make the check pass. *\n\ + * *\n\ + * %sInclude the required updates to the '*_checker.py' script in your PR to make the check pass.%s *\n\ + *************************************************************************************************\n\ + \n\n" # noqa: B950 + % (Fore.RED, Fore.YELLOW, Fore.RED, Fore.YELLOW, Fore.RED, Fore.YELLOW) + ) print(Style.RESET_ALL) @staticmethod def configArgParser(): - parser = ArgumentParser(description='FreeRTOS file header checker. We expect a consistent header across all ' - 'first party files. The header includes current version number, copyright, ' - 'and FreeRTOS license.') - - parser.add_argument('files_checked', - nargs = '+', - metavar = 'FILE_LIST', - help = 'Space separated list of files to check.') - - parser.add_argument('-j', '--json', - default = False, - action = 'store_true', - help = 'Treat arguments json files that store a list of files to check.') + parser = ArgumentParser( + description="FreeRTOS file header checker. We expect a consistent header across all " + "first party files. The header includes current version number, copyright, " + "and FreeRTOS license." + ) + + parser.add_argument( + "files_checked", + nargs="+", + metavar="FILE_LIST", + help="Space separated list of files to check.", + ) + + parser.add_argument( + "-j", + "--json", + default=False, + action="store_true", + help="Treat arguments json files that store a list of files to check.", + ) return parser def processArgs(self, args): @@ -151,3 +325,102 @@ class HeaderChecker: n_failed += not self.isValidFile(path) return n_failed + + def loadSpdxData(self): + """Load spdx license and license exception information from github.""" + spdx_data = dict() + spdx_url = "https://raw.githubusercontent.com/spdx/license-list-data/" + + licenses_url = spdx_url + "master/json/licenses.json" + licenses = requests.get(licenses_url).json() + + assert "licenses" in licenses + spdx_data["licenses"] = dict() + + for license in licenses["licenses"]: + spdx_data["licenses"][license["licenseId"]] = True + + exceptions_url = spdx_url + "master/json/exceptions.json" + exceptions = requests.get(exceptions_url).json() + + assert "exceptions" in exceptions + + spdx_data["exceptions"] = dict() + + for exception in exceptions["exceptions"]: + spdx_data["exceptions"][exception["licenseExceptionId"]] = True + self.spdx_data = spdx_data + + def validateSpdxTag(self, tag): + """ + Validate a given SPDX license tag against SPDX data from github + """ + + error_count = 0 + licenses = tag.split(" ") + license_exception_flag = False + paren_depth = 0 + last_paren_depth = 0 + for i in range(0, len(licenses)): + if licenses[i][0] == "(": + paren_depth += 1 + # skip "and" "or" keywords + if licenses[i] in ["and", "AND", "or", "OR"]: + pass + # "with" keyword denotes a license exception + elif licenses[i] in ["with", "WITH"]: + # Set flag for next iteration + license_exception_flag = True + elif license_exception_flag: + if not licenses[i].strip("()") in self.spdx_data["exceptions"]: + dprint( + "Invalid license exception id {} in SPDX tag {}".format( + licenses[i], tag + ) + ) + error_count += 1 + # No '(' character -> single license exception + if paren_depth <= last_paren_depth: + license_exception_flag = False + else: + if not licenses[i].strip("()") in self.spdx_data["licenses"]: + dprint( + 'Invalid license id "{}" in SPDX tag "{}"'.format( + licenses[i], tag + ) + ) + error_count += 1 + + last_paren_depth = paren_depth + if licenses[i][-1] == ")": + paren_depth -= 1 + + return error_count + + def validateSpdxLine(self, lines): + """ + Validate an SPDX license data line from a source file. + Like: /* SPDX-License-Identifier: MIT */ + """ + + if len(lines) == 0: + dprint("No SPDX identifier found.") + return False + + if not self.spdx_data: + self.loadSpdxData() + + if not self.sdpx_regex: + self.spdx_regex = re.compile( + r"^.*SPDX-License-Identifier:\s*(?P.*)\s*$" + ) + + error_count = 0 + + for line in lines: + matches = self.spdx_regex.match(line) + if "tag" in matches.groupdict(): + error_count += self.validateSpdxTag(matches.group("tag")) + else: + error_count += 1 + return error_count == 0 diff --git a/.github/scripts/common/requirements.txt b/.github/scripts/common/requirements.txt index 82049f76bb..dec20bcb8c 100644 --- a/.github/scripts/common/requirements.txt +++ b/.github/scripts/common/requirements.txt @@ -1,5 +1,5 @@ certifi==2020.12.5 -chardet==3.0.4 +chardet==4.0.0 colorama==0.4.4 Deprecated==1.2.10 gitdb==4.0.5 @@ -8,7 +8,7 @@ idna==2.10 PyGithub==1.54 PyJWT==1.7.1 PyYAML==5.4 -requests==2.24.0 +requests==2.25.1 smmap==3.0.4 -urllib3==1.25.11 +urllib3==1.26.3 wrapt==1.12.1