summaryrefslogtreecommitdiffstats
path: root/Tools/patchcheck
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2022-10-12 08:09:21 (GMT)
committerGitHub <noreply@github.com>2022-10-12 08:09:21 (GMT)
commit0895c2a066c64c84cab0821886dfa66efc1bdc2f (patch)
treef2371b25cdbf64201c90316f03370f5d0000fe6e /Tools/patchcheck
parentc39a0c335486fa8eac0f3030930f9e8769118a4f (diff)
downloadcpython-0895c2a066c64c84cab0821886dfa66efc1bdc2f.zip
cpython-0895c2a066c64c84cab0821886dfa66efc1bdc2f.tar.gz
cpython-0895c2a066c64c84cab0821886dfa66efc1bdc2f.tar.bz2
gh-97669: Create Tools/patchcheck/ directory (#98186)
Move patchcheck.py, reindent.py and untabify.py scripts to a new Tools/patchcheck/ directory.
Diffstat (limited to 'Tools/patchcheck')
-rwxr-xr-xTools/patchcheck/patchcheck.py312
-rwxr-xr-xTools/patchcheck/reindent.py333
-rwxr-xr-xTools/patchcheck/untabify.py55
3 files changed, 700 insertions, 0 deletions
diff --git a/Tools/patchcheck/patchcheck.py b/Tools/patchcheck/patchcheck.py
new file mode 100755
index 0000000..a324eaf
--- /dev/null
+++ b/Tools/patchcheck/patchcheck.py
@@ -0,0 +1,312 @@
+#!/usr/bin/env python3
+"""Check proposed changes for common issues."""
+import re
+import sys
+import shutil
+import os.path
+import subprocess
+import sysconfig
+
+import reindent
+import untabify
+
+
+# Excluded directories which are copies of external libraries:
+# don't check their coding style
+EXCLUDE_DIRS = [os.path.join('Modules', '_ctypes', 'libffi_osx'),
+ os.path.join('Modules', '_ctypes', 'libffi_msvc'),
+ os.path.join('Modules', '_decimal', 'libmpdec'),
+ os.path.join('Modules', 'expat'),
+ os.path.join('Modules', 'zlib')]
+SRCDIR = sysconfig.get_config_var('srcdir')
+
+
+def n_files_str(count):
+ """Return 'N file(s)' with the proper plurality on 'file'."""
+ return "{} file{}".format(count, "s" if count != 1 else "")
+
+
+def status(message, modal=False, info=None):
+ """Decorator to output status info to stdout."""
+ def decorated_fxn(fxn):
+ def call_fxn(*args, **kwargs):
+ sys.stdout.write(message + ' ... ')
+ sys.stdout.flush()
+ result = fxn(*args, **kwargs)
+ if not modal and not info:
+ print("done")
+ elif info:
+ print(info(result))
+ else:
+ print("yes" if result else "NO")
+ return result
+ return call_fxn
+ return decorated_fxn
+
+
+def get_git_branch():
+ """Get the symbolic name for the current git branch"""
+ cmd = "git rev-parse --abbrev-ref HEAD".split()
+ try:
+ return subprocess.check_output(cmd,
+ stderr=subprocess.DEVNULL,
+ cwd=SRCDIR,
+ encoding='UTF-8')
+ except subprocess.CalledProcessError:
+ return None
+
+
+def get_git_upstream_remote():
+ """Get the remote name to use for upstream branches
+
+ Uses "upstream" if it exists, "origin" otherwise
+ """
+ cmd = "git remote get-url upstream".split()
+ try:
+ subprocess.check_output(cmd,
+ stderr=subprocess.DEVNULL,
+ cwd=SRCDIR,
+ encoding='UTF-8')
+ except subprocess.CalledProcessError:
+ return "origin"
+ return "upstream"
+
+
+def get_git_remote_default_branch(remote_name):
+ """Get the name of the default branch for the given remote
+
+ It is typically called 'main', but may differ
+ """
+ cmd = "git remote show {}".format(remote_name).split()
+ env = os.environ.copy()
+ env['LANG'] = 'C'
+ try:
+ remote_info = subprocess.check_output(cmd,
+ stderr=subprocess.DEVNULL,
+ cwd=SRCDIR,
+ encoding='UTF-8',
+ env=env)
+ except subprocess.CalledProcessError:
+ return None
+ for line in remote_info.splitlines():
+ if "HEAD branch:" in line:
+ base_branch = line.split(":")[1].strip()
+ return base_branch
+ return None
+
+
+@status("Getting base branch for PR",
+ info=lambda x: x if x is not None else "not a PR branch")
+def get_base_branch():
+ if not os.path.exists(os.path.join(SRCDIR, '.git')):
+ # Not a git checkout, so there's no base branch
+ return None
+ upstream_remote = get_git_upstream_remote()
+ version = sys.version_info
+ if version.releaselevel == 'alpha':
+ base_branch = get_git_remote_default_branch(upstream_remote)
+ else:
+ base_branch = "{0.major}.{0.minor}".format(version)
+ this_branch = get_git_branch()
+ if this_branch is None or this_branch == base_branch:
+ # Not on a git PR branch, so there's no base branch
+ return None
+ return upstream_remote + "/" + base_branch
+
+
+@status("Getting the list of files that have been added/changed",
+ info=lambda x: n_files_str(len(x)))
+def changed_files(base_branch=None):
+ """Get the list of changed or added files from git."""
+ if os.path.exists(os.path.join(SRCDIR, '.git')):
+ # We just use an existence check here as:
+ # directory = normal git checkout/clone
+ # file = git worktree directory
+ if base_branch:
+ cmd = 'git diff --name-status ' + base_branch
+ else:
+ cmd = 'git status --porcelain'
+ filenames = []
+ with subprocess.Popen(cmd.split(),
+ stdout=subprocess.PIPE,
+ cwd=SRCDIR) as st:
+ if st.wait() != 0:
+ sys.exit(f'error running {cmd}')
+ for line in st.stdout:
+ line = line.decode().rstrip()
+ status_text, filename = line.split(maxsplit=1)
+ status = set(status_text)
+ # modified, added or unmerged files
+ if not status.intersection('MAU'):
+ continue
+ if ' -> ' in filename:
+ # file is renamed
+ filename = filename.split(' -> ', 2)[1].strip()
+ filenames.append(filename)
+ else:
+ sys.exit('need a git checkout to get modified files')
+
+ filenames2 = []
+ for filename in filenames:
+ # Normalize the path to be able to match using .startswith()
+ filename = os.path.normpath(filename)
+ if any(filename.startswith(path) for path in EXCLUDE_DIRS):
+ # Exclude the file
+ continue
+ filenames2.append(filename)
+
+ return filenames2
+
+
+def report_modified_files(file_paths):
+ count = len(file_paths)
+ if count == 0:
+ return n_files_str(count)
+ else:
+ lines = ["{}:".format(n_files_str(count))]
+ for path in file_paths:
+ lines.append(" {}".format(path))
+ return "\n".join(lines)
+
+
+@status("Fixing Python file whitespace", info=report_modified_files)
+def normalize_whitespace(file_paths):
+ """Make sure that the whitespace for .py files have been normalized."""
+ reindent.makebackup = False # No need to create backups.
+ fixed = [path for path in file_paths if path.endswith('.py') and
+ reindent.check(os.path.join(SRCDIR, path))]
+ return fixed
+
+
+@status("Fixing C file whitespace", info=report_modified_files)
+def normalize_c_whitespace(file_paths):
+ """Report if any C files """
+ fixed = []
+ for path in file_paths:
+ abspath = os.path.join(SRCDIR, path)
+ with open(abspath, 'r') as f:
+ if '\t' not in f.read():
+ continue
+ untabify.process(abspath, 8, verbose=False)
+ fixed.append(path)
+ return fixed
+
+
+ws_re = re.compile(br'\s+(\r?\n)$')
+
+@status("Fixing docs whitespace", info=report_modified_files)
+def normalize_docs_whitespace(file_paths):
+ fixed = []
+ for path in file_paths:
+ abspath = os.path.join(SRCDIR, path)
+ try:
+ with open(abspath, 'rb') as f:
+ lines = f.readlines()
+ new_lines = [ws_re.sub(br'\1', line) for line in lines]
+ if new_lines != lines:
+ shutil.copyfile(abspath, abspath + '.bak')
+ with open(abspath, 'wb') as f:
+ f.writelines(new_lines)
+ fixed.append(path)
+ except Exception as err:
+ print('Cannot fix %s: %s' % (path, err))
+ return fixed
+
+
+@status("Docs modified", modal=True)
+def docs_modified(file_paths):
+ """Report if any file in the Doc directory has been changed."""
+ return bool(file_paths)
+
+
+@status("Misc/ACKS updated", modal=True)
+def credit_given(file_paths):
+ """Check if Misc/ACKS has been changed."""
+ return os.path.join('Misc', 'ACKS') in file_paths
+
+
+@status("Misc/NEWS.d updated with `blurb`", modal=True)
+def reported_news(file_paths):
+ """Check if Misc/NEWS.d has been changed."""
+ return any(p.startswith(os.path.join('Misc', 'NEWS.d', 'next'))
+ for p in file_paths)
+
+@status("configure regenerated", modal=True, info=str)
+def regenerated_configure(file_paths):
+ """Check if configure has been regenerated."""
+ if 'configure.ac' in file_paths:
+ return "yes" if 'configure' in file_paths else "no"
+ else:
+ return "not needed"
+
+@status("pyconfig.h.in regenerated", modal=True, info=str)
+def regenerated_pyconfig_h_in(file_paths):
+ """Check if pyconfig.h.in has been regenerated."""
+ if 'configure.ac' in file_paths:
+ return "yes" if 'pyconfig.h.in' in file_paths else "no"
+ else:
+ return "not needed"
+
+def ci(pull_request):
+ if pull_request == 'false':
+ print('Not a pull request; skipping')
+ return
+ base_branch = get_base_branch()
+ file_paths = changed_files(base_branch)
+ python_files = [fn for fn in file_paths if fn.endswith('.py')]
+ c_files = [fn for fn in file_paths if fn.endswith(('.c', '.h'))]
+ doc_files = [fn for fn in file_paths if fn.startswith('Doc') and
+ fn.endswith(('.rst', '.inc'))]
+ fixed = []
+ fixed.extend(normalize_whitespace(python_files))
+ fixed.extend(normalize_c_whitespace(c_files))
+ fixed.extend(normalize_docs_whitespace(doc_files))
+ if not fixed:
+ print('No whitespace issues found')
+ else:
+ print(f'Please fix the {len(fixed)} file(s) with whitespace issues')
+ print('(on UNIX you can run `make patchcheck` to make the fixes)')
+ sys.exit(1)
+
+def main():
+ base_branch = get_base_branch()
+ file_paths = changed_files(base_branch)
+ python_files = [fn for fn in file_paths if fn.endswith('.py')]
+ c_files = [fn for fn in file_paths if fn.endswith(('.c', '.h'))]
+ doc_files = [fn for fn in file_paths if fn.startswith('Doc') and
+ fn.endswith(('.rst', '.inc'))]
+ misc_files = {p for p in file_paths if p.startswith('Misc')}
+ # PEP 8 whitespace rules enforcement.
+ normalize_whitespace(python_files)
+ # C rules enforcement.
+ normalize_c_whitespace(c_files)
+ # Doc whitespace enforcement.
+ normalize_docs_whitespace(doc_files)
+ # Docs updated.
+ docs_modified(doc_files)
+ # Misc/ACKS changed.
+ credit_given(misc_files)
+ # Misc/NEWS changed.
+ reported_news(misc_files)
+ # Regenerated configure, if necessary.
+ regenerated_configure(file_paths)
+ # Regenerated pyconfig.h.in, if necessary.
+ regenerated_pyconfig_h_in(file_paths)
+
+ # Test suite run and passed.
+ if python_files or c_files:
+ end = " and check for refleaks?" if c_files else "?"
+ print()
+ print("Did you run the test suite" + end)
+
+
+if __name__ == '__main__':
+ import argparse
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument('--ci',
+ help='Perform pass/fail checks')
+ args = parser.parse_args()
+ if args.ci:
+ ci(args.ci)
+ else:
+ main()
diff --git a/Tools/patchcheck/reindent.py b/Tools/patchcheck/reindent.py
new file mode 100755
index 0000000..f6dadaa
--- /dev/null
+++ b/Tools/patchcheck/reindent.py
@@ -0,0 +1,333 @@
+#! /usr/bin/env python3
+
+# Released to the public domain, by Tim Peters, 03 October 2000.
+
+"""reindent [-d][-r][-v] [ path ... ]
+
+-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
+-r (--recurse) Recurse. Search for all .py files in subdirectories too.
+-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
+-v (--verbose) Verbose. Print informative msgs; else no output.
+ (--newline) Newline. Specify the newline character to use (CRLF, LF).
+ Default is the same as the original file.
+-h (--help) Help. Print this usage information and exit.
+
+Change Python (.py) files to use 4-space indents and no hard tab characters.
+Also trim excess spaces and tabs from ends of lines, and remove empty lines
+at the end of files. Also ensure the last line ends with a newline.
+
+If no paths are given on the command line, reindent operates as a filter,
+reading a single source file from standard input and writing the transformed
+source to standard output. In this case, the -d, -r and -v flags are
+ignored.
+
+You can pass one or more file and/or directory paths. When a directory
+path, all .py files within the directory will be examined, and, if the -r
+option is given, likewise recursively for subdirectories.
+
+If output is not to standard output, reindent overwrites files in place,
+renaming the originals with a .bak extension. If it finds nothing to
+change, the file is left alone. If reindent does change a file, the changed
+file is a fixed-point for future runs (i.e., running reindent on the
+resulting .py file won't change it again).
+
+The hard part of reindenting is figuring out what to do with comment
+lines. So long as the input files get a clean bill of health from
+tabnanny.py, reindent should do a good job.
+
+The backup file is a copy of the one that is being reindented. The ".bak"
+file is generated with shutil.copy(), but some corner cases regarding
+user/group and permissions could leave the backup file more readable than
+you'd prefer. You can always use the --nobackup option to prevent this.
+"""
+
+__version__ = "1"
+
+import tokenize
+import os
+import shutil
+import sys
+
+verbose = False
+recurse = False
+dryrun = False
+makebackup = True
+# A specified newline to be used in the output (set by --newline option)
+spec_newline = None
+
+
+def usage(msg=None):
+ if msg is None:
+ msg = __doc__
+ print(msg, file=sys.stderr)
+
+
+def errprint(*args):
+ sys.stderr.write(" ".join(str(arg) for arg in args))
+ sys.stderr.write("\n")
+
+def main():
+ import getopt
+ global verbose, recurse, dryrun, makebackup, spec_newline
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "drnvh",
+ ["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"])
+ except getopt.error as msg:
+ usage(msg)
+ return
+ for o, a in opts:
+ if o in ('-d', '--dryrun'):
+ dryrun = True
+ elif o in ('-r', '--recurse'):
+ recurse = True
+ elif o in ('-n', '--nobackup'):
+ makebackup = False
+ elif o in ('-v', '--verbose'):
+ verbose = True
+ elif o in ('--newline',):
+ if not a.upper() in ('CRLF', 'LF'):
+ usage()
+ return
+ spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()]
+ elif o in ('-h', '--help'):
+ usage()
+ return
+ if not args:
+ r = Reindenter(sys.stdin)
+ r.run()
+ r.write(sys.stdout)
+ return
+ for arg in args:
+ check(arg)
+
+
+def check(file):
+ if os.path.isdir(file) and not os.path.islink(file):
+ if verbose:
+ print("listing directory", file)
+ names = os.listdir(file)
+ for name in names:
+ fullname = os.path.join(file, name)
+ if ((recurse and os.path.isdir(fullname) and
+ not os.path.islink(fullname) and
+ not os.path.split(fullname)[1].startswith("."))
+ or name.lower().endswith(".py")):
+ check(fullname)
+ return
+
+ if verbose:
+ print("checking", file, "...", end=' ')
+ with open(file, 'rb') as f:
+ try:
+ encoding, _ = tokenize.detect_encoding(f.readline)
+ except SyntaxError as se:
+ errprint("%s: SyntaxError: %s" % (file, str(se)))
+ return
+ try:
+ with open(file, encoding=encoding) as f:
+ r = Reindenter(f)
+ except IOError as msg:
+ errprint("%s: I/O Error: %s" % (file, str(msg)))
+ return
+
+ newline = spec_newline if spec_newline else r.newlines
+ if isinstance(newline, tuple):
+ errprint("%s: mixed newlines detected; cannot continue without --newline" % file)
+ return
+
+ if r.run():
+ if verbose:
+ print("changed.")
+ if dryrun:
+ print("But this is a dry run, so leaving it alone.")
+ if not dryrun:
+ bak = file + ".bak"
+ if makebackup:
+ shutil.copyfile(file, bak)
+ if verbose:
+ print("backed up", file, "to", bak)
+ with open(file, "w", encoding=encoding, newline=newline) as f:
+ r.write(f)
+ if verbose:
+ print("wrote new", file)
+ return True
+ else:
+ if verbose:
+ print("unchanged.")
+ return False
+
+
+def _rstrip(line, JUNK='\n \t'):
+ """Return line stripped of trailing spaces, tabs, newlines.
+
+ Note that line.rstrip() instead also strips sundry control characters,
+ but at least one known Emacs user expects to keep junk like that, not
+ mentioning Barry by name or anything <wink>.
+ """
+
+ i = len(line)
+ while i > 0 and line[i - 1] in JUNK:
+ i -= 1
+ return line[:i]
+
+
+class Reindenter:
+
+ def __init__(self, f):
+ self.find_stmt = 1 # next token begins a fresh stmt?
+ self.level = 0 # current indent level
+
+ # Raw file lines.
+ self.raw = f.readlines()
+
+ # File lines, rstripped & tab-expanded. Dummy at start is so
+ # that we can use tokenize's 1-based line numbering easily.
+ # Note that a line is all-blank iff it's "\n".
+ self.lines = [_rstrip(line).expandtabs() + "\n"
+ for line in self.raw]
+ self.lines.insert(0, None)
+ self.index = 1 # index into self.lines of next line
+
+ # List of (lineno, indentlevel) pairs, one for each stmt and
+ # comment line. indentlevel is -1 for comment lines, as a
+ # signal that tokenize doesn't know what to do about them;
+ # indeed, they're our headache!
+ self.stats = []
+
+ # Save the newlines found in the file so they can be used to
+ # create output without mutating the newlines.
+ self.newlines = f.newlines
+
+ def run(self):
+ tokens = tokenize.generate_tokens(self.getline)
+ for _token in tokens:
+ self.tokeneater(*_token)
+ # Remove trailing empty lines.
+ lines = self.lines
+ while lines and lines[-1] == "\n":
+ lines.pop()
+ # Sentinel.
+ stats = self.stats
+ stats.append((len(lines), 0))
+ # Map count of leading spaces to # we want.
+ have2want = {}
+ # Program after transformation.
+ after = self.after = []
+ # Copy over initial empty lines -- there's nothing to do until
+ # we see a line with *something* on it.
+ i = stats[0][0]
+ after.extend(lines[1:i])
+ for i in range(len(stats) - 1):
+ thisstmt, thislevel = stats[i]
+ nextstmt = stats[i + 1][0]
+ have = getlspace(lines[thisstmt])
+ want = thislevel * 4
+ if want < 0:
+ # A comment line.
+ if have:
+ # An indented comment line. If we saw the same
+ # indentation before, reuse what it most recently
+ # mapped to.
+ want = have2want.get(have, -1)
+ if want < 0:
+ # Then it probably belongs to the next real stmt.
+ for j in range(i + 1, len(stats) - 1):
+ jline, jlevel = stats[j]
+ if jlevel >= 0:
+ if have == getlspace(lines[jline]):
+ want = jlevel * 4
+ break
+ if want < 0: # Maybe it's a hanging
+ # comment like this one,
+ # in which case we should shift it like its base
+ # line got shifted.
+ for j in range(i - 1, -1, -1):
+ jline, jlevel = stats[j]
+ if jlevel >= 0:
+ want = have + (getlspace(after[jline - 1]) -
+ getlspace(lines[jline]))
+ break
+ if want < 0:
+ # Still no luck -- leave it alone.
+ want = have
+ else:
+ want = 0
+ assert want >= 0
+ have2want[have] = want
+ diff = want - have
+ if diff == 0 or have == 0:
+ after.extend(lines[thisstmt:nextstmt])
+ else:
+ for line in lines[thisstmt:nextstmt]:
+ if diff > 0:
+ if line == "\n":
+ after.append(line)
+ else:
+ after.append(" " * diff + line)
+ else:
+ remove = min(getlspace(line), -diff)
+ after.append(line[remove:])
+ return self.raw != self.after
+
+ def write(self, f):
+ f.writelines(self.after)
+
+ # Line-getter for tokenize.
+ def getline(self):
+ if self.index >= len(self.lines):
+ line = ""
+ else:
+ line = self.lines[self.index]
+ self.index += 1
+ return line
+
+ # Line-eater for tokenize.
+ def tokeneater(self, type, token, slinecol, end, line,
+ INDENT=tokenize.INDENT,
+ DEDENT=tokenize.DEDENT,
+ NEWLINE=tokenize.NEWLINE,
+ COMMENT=tokenize.COMMENT,
+ NL=tokenize.NL):
+
+ if type == NEWLINE:
+ # A program statement, or ENDMARKER, will eventually follow,
+ # after some (possibly empty) run of tokens of the form
+ # (NL | COMMENT)* (INDENT | DEDENT+)?
+ self.find_stmt = 1
+
+ elif type == INDENT:
+ self.find_stmt = 1
+ self.level += 1
+
+ elif type == DEDENT:
+ self.find_stmt = 1
+ self.level -= 1
+
+ elif type == COMMENT:
+ if self.find_stmt:
+ self.stats.append((slinecol[0], -1))
+ # but we're still looking for a new stmt, so leave
+ # find_stmt alone
+
+ elif type == NL:
+ pass
+
+ elif self.find_stmt:
+ # This is the first "real token" following a NEWLINE, so it
+ # must be the first token of the next program statement, or an
+ # ENDMARKER.
+ self.find_stmt = 0
+ if line: # not endmarker
+ self.stats.append((slinecol[0], self.level))
+
+
+# Count number of leading blanks.
+def getlspace(line):
+ i, n = 0, len(line)
+ while i < n and line[i] == " ":
+ i += 1
+ return i
+
+
+if __name__ == '__main__':
+ main()
diff --git a/Tools/patchcheck/untabify.py b/Tools/patchcheck/untabify.py
new file mode 100755
index 0000000..861c83c
--- /dev/null
+++ b/Tools/patchcheck/untabify.py
@@ -0,0 +1,55 @@
+#! /usr/bin/env python3
+
+"Replace tabs with spaces in argument files. Print names of changed files."
+
+import os
+import sys
+import getopt
+import tokenize
+
+def main():
+ tabsize = 8
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "t:")
+ if not args:
+ raise getopt.error("At least one file argument required")
+ except getopt.error as msg:
+ print(msg)
+ print("usage:", sys.argv[0], "[-t tabwidth] file ...")
+ return
+ for optname, optvalue in opts:
+ if optname == '-t':
+ tabsize = int(optvalue)
+
+ for filename in args:
+ process(filename, tabsize)
+
+
+def process(filename, tabsize, verbose=True):
+ try:
+ with tokenize.open(filename) as f:
+ text = f.read()
+ encoding = f.encoding
+ except IOError as msg:
+ print("%r: I/O error: %s" % (filename, msg))
+ return
+ newtext = text.expandtabs(tabsize)
+ if newtext == text:
+ return
+ backup = filename + "~"
+ try:
+ os.unlink(backup)
+ except OSError:
+ pass
+ try:
+ os.rename(filename, backup)
+ except OSError:
+ pass
+ with open(filename, "w", encoding=encoding) as f:
+ f.write(newtext)
+ if verbose:
+ print(filename)
+
+
+if __name__ == '__main__':
+ main()