## @file | |
# Check a patch for various format issues | |
# | |
# Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR> | |
# Copyright (C) 2020, Red Hat, Inc.<BR> | |
# Copyright (c) 2020, ARM Ltd. All rights reserved.<BR> | |
# | |
# SPDX-License-Identifier: BSD-2-Clause-Patent | |
# | |
from __future__ import print_function | |
VersionNumber = '0.1' | |
__copyright__ = "Copyright (c) 2015 - 2016, Intel Corporation All rights reserved." | |
import email | |
import argparse | |
import os | |
import re | |
import subprocess | |
import sys | |
import email.header | |
class Verbose: | |
SILENT, ONELINE, NORMAL = range(3) | |
level = NORMAL | |
class EmailAddressCheck: | |
"""Checks an email address.""" | |
def __init__(self, email, description): | |
self.ok = True | |
if email is None: | |
self.error('Email address is missing!') | |
return | |
if description is None: | |
self.error('Email description is missing!') | |
return | |
self.description = "'" + description + "'" | |
self.check_email_address(email) | |
def error(self, *err): | |
if self.ok and Verbose.level > Verbose.ONELINE: | |
print('The ' + self.description + ' email address is not valid:') | |
self.ok = False | |
if Verbose.level < Verbose.NORMAL: | |
return | |
count = 0 | |
for line in err: | |
prefix = (' *', ' ')[count > 0] | |
print(prefix, line) | |
count += 1 | |
email_re1 = re.compile(r'(?:\s*)(.*?)(\s*)<(.+)>\s*$', | |
re.MULTILINE|re.IGNORECASE) | |
def check_email_address(self, email): | |
email = email.strip() | |
mo = self.email_re1.match(email) | |
if mo is None: | |
self.error("Email format is invalid: " + email.strip()) | |
return | |
name = mo.group(1).strip() | |
if name == '': | |
self.error("Name is not provided with email address: " + | |
email) | |
else: | |
quoted = len(name) > 2 and name[0] == '"' and name[-1] == '"' | |
if name.find(',') >= 0 and not quoted: | |
self.error('Add quotes (") around name with a comma: ' + | |
name) | |
if mo.group(2) == '': | |
self.error("There should be a space between the name and " + | |
"email address: " + email) | |
if mo.group(3).find(' ') >= 0: | |
self.error("The email address cannot contain a space: " + | |
mo.group(3)) | |
if ' via Groups.Io' in name and mo.group(3).endswith('@groups.io'): | |
self.error("Email rewritten by lists DMARC / DKIM / SPF: " + | |
email) | |
class CommitMessageCheck: | |
"""Checks the contents of a git commit message.""" | |
def __init__(self, subject, message, author_email): | |
self.ok = True | |
if subject is None and message is None: | |
self.error('Commit message is missing!') | |
return | |
MergifyMerge = False | |
if "mergify[bot]@users.noreply.github.com" in author_email: | |
if "Merge branch" in subject: | |
MergifyMerge = True | |
self.subject = subject | |
self.msg = message | |
print (subject) | |
self.check_contributed_under() | |
if not MergifyMerge: | |
self.check_signed_off_by() | |
self.check_misc_signatures() | |
self.check_overall_format() | |
self.report_message_result() | |
url = 'https://github.com/tianocore/tianocore.github.io/wiki/Commit-Message-Format' | |
def report_message_result(self): | |
if Verbose.level < Verbose.NORMAL: | |
return | |
if self.ok: | |
# All checks passed | |
return_code = 0 | |
print('The commit message format passed all checks.') | |
else: | |
return_code = 1 | |
if not self.ok: | |
print(self.url) | |
def error(self, *err): | |
if self.ok and Verbose.level > Verbose.ONELINE: | |
print('The commit message format is not valid:') | |
self.ok = False | |
if Verbose.level < Verbose.NORMAL: | |
return | |
count = 0 | |
for line in err: | |
prefix = (' *', ' ')[count > 0] | |
print(prefix, line) | |
count += 1 | |
# Find 'contributed-under:' at the start of a line ignoring case and | |
# requires ':' to be present. Matches if there is white space before | |
# the tag or between the tag and the ':'. | |
contributed_under_re = \ | |
re.compile(r'^\s*contributed-under\s*:', re.MULTILINE|re.IGNORECASE) | |
def check_contributed_under(self): | |
match = self.contributed_under_re.search(self.msg) | |
if match is not None: | |
self.error('Contributed-under! (Note: this must be ' + | |
'removed by the code contributor!)') | |
@staticmethod | |
def make_signature_re(sig, re_input=False): | |
if re_input: | |
sub_re = sig | |
else: | |
sub_re = sig.replace('-', r'[-\s]+') | |
re_str = (r'^(?P<tag>' + sub_re + | |
r')(\s*):(\s*)(?P<value>\S.*?)(?:\s*)$') | |
try: | |
return re.compile(re_str, re.MULTILINE|re.IGNORECASE) | |
except Exception: | |
print("Tried to compile re:", re_str) | |
raise | |
sig_block_re = \ | |
re.compile(r'''^ | |
(?: (?P<tag>[^:]+) \s* : \s* | |
(?P<value>\S.*?) ) | |
| | |
(?: \[ (?P<updater>[^:]+) \s* : \s* | |
(?P<note>.+?) \s* \] ) | |
\s* $''', | |
re.VERBOSE | re.MULTILINE) | |
def find_signatures(self, sig): | |
if not sig.endswith('-by') and sig != 'Cc': | |
sig += '-by' | |
regex = self.make_signature_re(sig) | |
sigs = regex.findall(self.msg) | |
bad_case_sigs = filter(lambda m: m[0] != sig, sigs) | |
for s in bad_case_sigs: | |
self.error("'" +s[0] + "' should be '" + sig + "'") | |
for s in sigs: | |
if s[1] != '': | |
self.error('There should be no spaces between ' + sig + | |
" and the ':'") | |
if s[2] != ' ': | |
self.error("There should be a space after '" + sig + ":'") | |
EmailAddressCheck(s[3], sig) | |
return sigs | |
def check_signed_off_by(self): | |
sob='Signed-off-by' | |
if self.msg.find(sob) < 0: | |
self.error('Missing Signed-off-by! (Note: this must be ' + | |
'added by the code contributor!)') | |
return | |
sobs = self.find_signatures('Signed-off') | |
if len(sobs) == 0: | |
self.error('Invalid Signed-off-by format!') | |
return | |
sig_types = ( | |
'Reviewed', | |
'Reported', | |
'Tested', | |
'Suggested', | |
'Acked', | |
'Cc' | |
) | |
def check_misc_signatures(self): | |
for sig in self.sig_types: | |
self.find_signatures(sig) | |
cve_re = re.compile('CVE-[0-9]{4}-[0-9]{5}[^0-9]') | |
def check_overall_format(self): | |
lines = self.msg.splitlines() | |
if len(lines) >= 1 and lines[0].endswith('\r\n'): | |
empty_line = '\r\n' | |
else: | |
empty_line = '\n' | |
lines.insert(0, empty_line) | |
lines.insert(0, self.subject + empty_line) | |
count = len(lines) | |
if count <= 0: | |
self.error('Empty commit message!') | |
return | |
if count >= 1 and re.search(self.cve_re, lines[0]): | |
# | |
# If CVE-xxxx-xxxxx is present in subject line, then limit length of | |
# subject line to 92 characters | |
# | |
if len(lines[0].rstrip()) >= 93: | |
self.error( | |
'First line of commit message (subject line) is too long (%d >= 93).' % | |
(len(lines[0].rstrip())) | |
) | |
else: | |
# | |
# If CVE-xxxx-xxxxx is not present in subject line, then limit | |
# length of subject line to 75 characters | |
# | |
if len(lines[0].rstrip()) >= 76: | |
self.error( | |
'First line of commit message (subject line) is too long (%d >= 76).' % | |
(len(lines[0].rstrip())) | |
) | |
if count >= 1 and len(lines[0].strip()) == 0: | |
self.error('First line of commit message (subject line) ' + | |
'is empty.') | |
if count >= 2 and lines[1].strip() != '': | |
self.error('Second line of commit message should be ' + | |
'empty.') | |
for i in range(2, count): | |
if (len(lines[i]) >= 76 and | |
len(lines[i].split()) > 1 and | |
not lines[i].startswith('git-svn-id:') and | |
not lines[i].startswith('Reviewed-by') and | |
not lines[i].startswith('Acked-by:') and | |
not lines[i].startswith('Tested-by:') and | |
not lines[i].startswith('Reported-by:') and | |
not lines[i].startswith('Suggested-by:') and | |
not lines[i].startswith('Signed-off-by:') and | |
not lines[i].startswith('Cc:')): | |
# | |
# Print a warning if body line is longer than 75 characters | |
# | |
print( | |
'WARNING - Line %d of commit message is too long (%d >= 76).' % | |
(i + 1, len(lines[i])) | |
) | |
print(lines[i]) | |
last_sig_line = None | |
for i in range(count - 1, 0, -1): | |
line = lines[i] | |
mo = self.sig_block_re.match(line) | |
if mo is None: | |
if line.strip() == '': | |
break | |
elif last_sig_line is not None: | |
err2 = 'Add empty line before "%s"?' % last_sig_line | |
self.error('The line before the signature block ' + | |
'should be empty', err2) | |
else: | |
self.error('The signature block was not found') | |
break | |
last_sig_line = line.strip() | |
(START, PRE_PATCH, PATCH) = range(3) | |
class GitDiffCheck: | |
"""Checks the contents of a git diff.""" | |
def __init__(self, diff): | |
self.ok = True | |
self.format_ok = True | |
self.lines = diff.splitlines(True) | |
self.count = len(self.lines) | |
self.line_num = 0 | |
self.state = START | |
self.new_bin = [] | |
while self.line_num < self.count and self.format_ok: | |
line_num = self.line_num | |
self.run() | |
assert(self.line_num > line_num) | |
self.report_message_result() | |
def report_message_result(self): | |
if Verbose.level < Verbose.NORMAL: | |
return | |
if self.ok: | |
print('The code passed all checks.') | |
if self.new_bin: | |
print('\nWARNING - The following binary files will be added ' + | |
'into the repository:') | |
for binary in self.new_bin: | |
print(' ' + binary) | |
def run(self): | |
line = self.lines[self.line_num] | |
if self.state in (PRE_PATCH, PATCH): | |
if line.startswith('diff --git'): | |
self.state = START | |
if self.state == PATCH: | |
if line.startswith('@@ '): | |
self.state = PRE_PATCH | |
elif len(line) >= 1 and line[0] not in ' -+' and \ | |
not line.startswith('\r\n') and \ | |
not line.startswith(r'\ No newline ') and not self.binary: | |
for line in self.lines[self.line_num + 1:]: | |
if line.startswith('diff --git'): | |
self.format_error('diff found after end of patch') | |
break | |
self.line_num = self.count | |
return | |
if self.state == START: | |
if line.startswith('diff --git'): | |
self.state = PRE_PATCH | |
self.filename = line[13:].split(' ', 1)[0] | |
self.is_newfile = False | |
self.force_crlf = True | |
self.force_notabs = True | |
if self.filename.endswith('.sh') or \ | |
self.filename.startswith('BaseTools/BinWrappers/PosixLike/') or \ | |
self.filename.startswith('BaseTools/BinPipWrappers/PosixLike/') or \ | |
self.filename == 'BaseTools/BuildEnv': | |
# | |
# Do not enforce CR/LF line endings for linux shell scripts. | |
# Some linux shell scripts don't end with the ".sh" extension, | |
# they are identified by their path. | |
# | |
self.force_crlf = False | |
if self.filename == '.gitmodules' or \ | |
self.filename == 'BaseTools/Conf/diff.order': | |
# | |
# .gitmodules and diff orderfiles are used internally by git | |
# use tabs and LF line endings. Do not enforce no tabs and | |
# do not enforce CR/LF line endings. | |
# | |
self.force_crlf = False | |
self.force_notabs = False | |
if os.path.basename(self.filename) == 'GNUmakefile' or \ | |
os.path.basename(self.filename) == 'Makefile': | |
self.force_notabs = False | |
elif len(line.rstrip()) != 0: | |
self.format_error("didn't find diff command") | |
self.line_num += 1 | |
elif self.state == PRE_PATCH: | |
if line.startswith('@@ '): | |
self.state = PATCH | |
self.binary = False | |
elif line.startswith('GIT binary patch') or \ | |
line.startswith('Binary files'): | |
self.state = PATCH | |
self.binary = True | |
if self.is_newfile: | |
self.new_bin.append(self.filename) | |
elif line.startswith('new file mode 160000'): | |
# | |
# New submodule. Do not enforce CR/LF line endings | |
# | |
self.force_crlf = False | |
else: | |
ok = False | |
self.is_newfile = self.newfile_prefix_re.match(line) | |
for pfx in self.pre_patch_prefixes: | |
if line.startswith(pfx): | |
ok = True | |
if not ok: | |
self.format_error("didn't find diff hunk marker (@@)") | |
self.line_num += 1 | |
elif self.state == PATCH: | |
if self.binary: | |
pass | |
elif line.startswith('-'): | |
pass | |
elif line.startswith('+'): | |
self.check_added_line(line[1:]) | |
elif line.startswith('\r\n'): | |
pass | |
elif line.startswith(r'\ No newline '): | |
pass | |
elif not line.startswith(' '): | |
self.format_error("unexpected patch line") | |
self.line_num += 1 | |
pre_patch_prefixes = ( | |
'--- ', | |
'+++ ', | |
'index ', | |
'new file ', | |
'deleted file ', | |
'old mode ', | |
'new mode ', | |
'similarity index ', | |
'copy from ', | |
'copy to ', | |
'rename ', | |
) | |
line_endings = ('\r\n', '\n\r', '\n', '\r') | |
newfile_prefix_re = \ | |
re.compile(r'''^ | |
index\ 0+\.\. | |
''', | |
re.VERBOSE) | |
def added_line_error(self, msg, line): | |
lines = [ msg ] | |
if self.filename is not None: | |
lines.append('File: ' + self.filename) | |
lines.append('Line: ' + line) | |
self.error(*lines) | |
old_debug_re = \ | |
re.compile(r''' | |
DEBUG \s* \( \s* \( \s* | |
(?: DEBUG_[A-Z_]+ \s* \| \s*)* | |
EFI_D_ ([A-Z_]+) | |
''', | |
re.VERBOSE) | |
def check_added_line(self, line): | |
eol = '' | |
for an_eol in self.line_endings: | |
if line.endswith(an_eol): | |
eol = an_eol | |
line = line[:-len(eol)] | |
stripped = line.rstrip() | |
if self.force_crlf and eol != '\r\n' and (line.find('Subproject commit') == -1): | |
self.added_line_error('Line ending (%s) is not CRLF' % repr(eol), | |
line) | |
if self.force_notabs and '\t' in line: | |
self.added_line_error('Tab character used', line) | |
if len(stripped) < len(line): | |
self.added_line_error('Trailing whitespace found', line) | |
mo = self.old_debug_re.search(line) | |
if mo is not None: | |
self.added_line_error('EFI_D_' + mo.group(1) + ' was used, ' | |
'but DEBUG_' + mo.group(1) + | |
' is now recommended', line) | |
split_diff_re = re.compile(r''' | |
(?P<cmd> | |
^ diff \s+ --git \s+ a/.+ \s+ b/.+ $ | |
) | |
(?P<index> | |
^ index \s+ .+ $ | |
) | |
''', | |
re.IGNORECASE | re.VERBOSE | re.MULTILINE) | |
def format_error(self, err): | |
self.format_ok = False | |
err = 'Patch format error: ' + err | |
err2 = 'Line: ' + self.lines[self.line_num].rstrip() | |
self.error(err, err2) | |
def error(self, *err): | |
if self.ok and Verbose.level > Verbose.ONELINE: | |
print('Code format is not valid:') | |
self.ok = False | |
if Verbose.level < Verbose.NORMAL: | |
return | |
count = 0 | |
for line in err: | |
prefix = (' *', ' ')[count > 0] | |
print(prefix, line) | |
count += 1 | |
class CheckOnePatch: | |
"""Checks the contents of a git email formatted patch. | |
Various checks are performed on both the commit message and the | |
patch content. | |
""" | |
def __init__(self, name, patch): | |
self.patch = patch | |
self.find_patch_pieces() | |
email_check = EmailAddressCheck(self.author_email, 'Author') | |
email_ok = email_check.ok | |
msg_check = CommitMessageCheck(self.commit_subject, self.commit_msg, self.author_email) | |
msg_ok = msg_check.ok | |
diff_ok = True | |
if self.diff is not None: | |
diff_check = GitDiffCheck(self.diff) | |
diff_ok = diff_check.ok | |
self.ok = email_ok and msg_ok and diff_ok | |
if Verbose.level == Verbose.ONELINE: | |
if self.ok: | |
result = 'ok' | |
else: | |
result = list() | |
if not msg_ok: | |
result.append('commit message') | |
if not diff_ok: | |
result.append('diff content') | |
result = 'bad ' + ' and '.join(result) | |
print(name, result) | |
git_diff_re = re.compile(r''' | |
^ diff \s+ --git \s+ a/.+ \s+ b/.+ $ | |
''', | |
re.IGNORECASE | re.VERBOSE | re.MULTILINE) | |
stat_re = \ | |
re.compile(r''' | |
(?P<commit_message> [\s\S\r\n]* ) | |
(?P<stat> | |
^ --- $ [\r\n]+ | |
(?: ^ \s+ .+ \s+ \| \s+ \d+ \s+ \+* \-* | |
$ [\r\n]+ )+ | |
[\s\S\r\n]+ | |
) | |
''', | |
re.IGNORECASE | re.VERBOSE | re.MULTILINE) | |
subject_prefix_re = \ | |
re.compile(r'''^ | |
\s* (\[ | |
[^\[\]]* # Allow all non-brackets | |
\])* \s* | |
''', | |
re.VERBOSE) | |
def find_patch_pieces(self): | |
if sys.version_info < (3, 0): | |
patch = self.patch.encode('ascii', 'ignore') | |
else: | |
patch = self.patch | |
self.commit_msg = None | |
self.stat = None | |
self.commit_subject = None | |
self.commit_prefix = None | |
self.diff = None | |
if patch.startswith('diff --git'): | |
self.diff = patch | |
return | |
pmail = email.message_from_string(patch) | |
parts = list(pmail.walk()) | |
assert(len(parts) == 1) | |
assert(parts[0].get_content_type() == 'text/plain') | |
content = parts[0].get_payload(decode=True).decode('utf-8', 'ignore') | |
mo = self.git_diff_re.search(content) | |
if mo is not None: | |
self.diff = content[mo.start():] | |
content = content[:mo.start()] | |
mo = self.stat_re.search(content) | |
if mo is None: | |
self.commit_msg = content | |
else: | |
self.stat = mo.group('stat') | |
self.commit_msg = mo.group('commit_message') | |
# | |
# Parse subject line from email header. The subject line may be | |
# composed of multiple parts with different encodings. Decode and | |
# combine all the parts to produce a single string with the contents of | |
# the decoded subject line. | |
# | |
parts = email.header.decode_header(pmail.get('subject')) | |
subject = '' | |
for (part, encoding) in parts: | |
if encoding: | |
part = part.decode(encoding) | |
else: | |
try: | |
part = part.decode() | |
except: | |
pass | |
subject = subject + part | |
self.commit_subject = subject.replace('\r\n', '') | |
self.commit_subject = self.commit_subject.replace('\n', '') | |
self.commit_subject = self.subject_prefix_re.sub('', self.commit_subject, 1) | |
self.author_email = pmail['from'] | |
class CheckGitCommits: | |
"""Reads patches from git based on the specified git revision range. | |
The patches are read from git, and then checked. | |
""" | |
def __init__(self, rev_spec, max_count): | |
commits = self.read_commit_list_from_git(rev_spec, max_count) | |
if len(commits) == 1 and Verbose.level > Verbose.ONELINE: | |
commits = [ rev_spec ] | |
self.ok = True | |
blank_line = False | |
for commit in commits: | |
if Verbose.level > Verbose.ONELINE: | |
if blank_line: | |
print() | |
else: | |
blank_line = True | |
print('Checking git commit:', commit) | |
email = self.read_committer_email_address_from_git(commit) | |
self.ok &= EmailAddressCheck(email, 'Committer').ok | |
patch = self.read_patch_from_git(commit) | |
self.ok &= CheckOnePatch(commit, patch).ok | |
if not commits: | |
print("Couldn't find commit matching: '{}'".format(rev_spec)) | |
def read_commit_list_from_git(self, rev_spec, max_count): | |
# Run git to get the commit patch | |
cmd = [ 'rev-list', '--abbrev-commit', '--no-walk' ] | |
if max_count is not None: | |
cmd.append('--max-count=' + str(max_count)) | |
cmd.append(rev_spec) | |
out = self.run_git(*cmd) | |
return out.split() if out else [] | |
def read_patch_from_git(self, commit): | |
# Run git to get the commit patch | |
return self.run_git('show', '--pretty=email', '--no-textconv', | |
'--no-use-mailmap', commit) | |
def read_committer_email_address_from_git(self, commit): | |
# Run git to get the committer email | |
return self.run_git('show', '--pretty=%cn <%ce>', '--no-patch', | |
'--no-use-mailmap', commit) | |
def run_git(self, *args): | |
cmd = [ 'git' ] | |
cmd += args | |
p = subprocess.Popen(cmd, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.STDOUT) | |
Result = p.communicate() | |
return Result[0].decode('utf-8', 'ignore') if Result[0] and Result[0].find(b"fatal")!=0 else None | |
class CheckOnePatchFile: | |
"""Performs a patch check for a single file. | |
stdin is used when the filename is '-'. | |
""" | |
def __init__(self, patch_filename): | |
if patch_filename == '-': | |
patch = sys.stdin.read() | |
patch_filename = 'stdin' | |
else: | |
f = open(patch_filename, 'rb') | |
patch = f.read().decode('utf-8', 'ignore') | |
f.close() | |
if Verbose.level > Verbose.ONELINE: | |
print('Checking patch file:', patch_filename) | |
self.ok = CheckOnePatch(patch_filename, patch).ok | |
class CheckOneArg: | |
"""Performs a patch check for a single command line argument. | |
The argument will be handed off to a file or git-commit based | |
checker. | |
""" | |
def __init__(self, param, max_count=None): | |
self.ok = True | |
if param == '-' or os.path.exists(param): | |
checker = CheckOnePatchFile(param) | |
else: | |
checker = CheckGitCommits(param, max_count) | |
self.ok = checker.ok | |
class PatchCheckApp: | |
"""Checks patches based on the command line arguments.""" | |
def __init__(self): | |
self.parse_options() | |
patches = self.args.patches | |
if len(patches) == 0: | |
patches = [ 'HEAD' ] | |
self.ok = True | |
self.count = None | |
for patch in patches: | |
self.process_one_arg(patch) | |
if self.count is not None: | |
self.process_one_arg('HEAD') | |
if self.ok: | |
self.retval = 0 | |
else: | |
self.retval = -1 | |
def process_one_arg(self, arg): | |
if len(arg) >= 2 and arg[0] == '-': | |
try: | |
self.count = int(arg[1:]) | |
return | |
except ValueError: | |
pass | |
self.ok &= CheckOneArg(arg, self.count).ok | |
self.count = None | |
def parse_options(self): | |
parser = argparse.ArgumentParser(description=__copyright__) | |
parser.add_argument('--version', action='version', | |
version='%(prog)s ' + VersionNumber) | |
parser.add_argument('patches', nargs='*', | |
help='[patch file | git rev list]') | |
group = parser.add_mutually_exclusive_group() | |
group.add_argument("--oneline", | |
action="store_true", | |
help="Print one result per line") | |
group.add_argument("--silent", | |
action="store_true", | |
help="Print nothing") | |
self.args = parser.parse_args() | |
if self.args.oneline: | |
Verbose.level = Verbose.ONELINE | |
if self.args.silent: | |
Verbose.level = Verbose.SILENT | |
if __name__ == "__main__": | |
sys.exit(PatchCheckApp().retval) |