From 0895c2a066c64c84cab0821886dfa66efc1bdc2f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 12 Oct 2022 10:09:21 +0200 Subject: gh-97669: Create Tools/patchcheck/ directory (#98186) Move patchcheck.py, reindent.py and untabify.py scripts to a new Tools/patchcheck/ directory. --- .azure-pipelines/posix-steps.yml | 2 +- Lib/test/test_tools/test_reindent.py | 4 +- Makefile.pre.in | 4 +- Tools/patchcheck/patchcheck.py | 312 ++++++++++++++++++++++++++++++++ Tools/patchcheck/reindent.py | 333 +++++++++++++++++++++++++++++++++++ Tools/patchcheck/untabify.py | 55 ++++++ Tools/scripts/README | 3 - Tools/scripts/patchcheck.py | 312 -------------------------------- Tools/scripts/reindent.py | 333 ----------------------------------- Tools/scripts/untabify.py | 55 ------ 10 files changed, 705 insertions(+), 708 deletions(-) create mode 100755 Tools/patchcheck/patchcheck.py create mode 100755 Tools/patchcheck/reindent.py create mode 100755 Tools/patchcheck/untabify.py delete mode 100755 Tools/scripts/patchcheck.py delete mode 100755 Tools/scripts/reindent.py delete mode 100755 Tools/scripts/untabify.py diff --git a/.azure-pipelines/posix-steps.yml b/.azure-pipelines/posix-steps.yml index 29b43e0..9d7c5e1 100644 --- a/.azure-pipelines/posix-steps.yml +++ b/.azure-pipelines/posix-steps.yml @@ -68,7 +68,7 @@ steps: - ${{ if eq(parameters.patchcheck, 'true') }}: - script: | git fetch origin - ./python Tools/scripts/patchcheck.py --ci true + ./python Tools/patchcheck/patchcheck.py --ci true displayName: 'Run patchcheck.py' condition: and(succeeded(), eq(variables['Build.Reason'], 'PullRequest')) diff --git a/Lib/test/test_tools/test_reindent.py b/Lib/test/test_tools/test_reindent.py index 34df0c5..3b0c793 100644 --- a/Lib/test/test_tools/test_reindent.py +++ b/Lib/test/test_tools/test_reindent.py @@ -9,12 +9,12 @@ import unittest from test.support.script_helper import assert_python_ok from test.support import findfile -from test.test_tools import scriptsdir, skip_if_missing +from test.test_tools import toolsdir, skip_if_missing skip_if_missing() class ReindentTests(unittest.TestCase): - script = os.path.join(scriptsdir, 'reindent.py') + script = os.path.join(toolsdir, 'patchcheck', 'reindent.py') def test_noargs(self): assert_python_ok(self.script) diff --git a/Makefile.pre.in b/Makefile.pre.in index 4602db6..7e25671 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2386,7 +2386,7 @@ Python/dtoa.o: Python/dtoa.c # Run reindent on the library reindent: - ./$(BUILDPYTHON) $(srcdir)/Tools/scripts/reindent.py -r $(srcdir)/Lib + ./$(BUILDPYTHON) $(srcdir)/Tools/patchcheck/reindent.py -r $(srcdir)/Lib # Rerun configure with the same options as it was run last time, # provided the config.status script exists @@ -2546,7 +2546,7 @@ funny: # Perform some verification checks on any modified files. patchcheck: all - $(RUNSHARED) ./$(BUILDPYTHON) $(srcdir)/Tools/scripts/patchcheck.py + $(RUNSHARED) ./$(BUILDPYTHON) $(srcdir)/Tools/patchcheck/patchcheck.py check-limited-abi: all $(RUNSHARED) ./$(BUILDPYTHON) $(srcdir)/Tools/scripts/stable_abi.py --all $(srcdir)/Misc/stable_abi.toml diff --git a/Tools/patchcheck/patchcheck.py b/Tools/patchcheck/patchcheck.py new file mode 100755 index 0000000..a324eaf --- /dev/null +++ b/Tools/patchcheck/patchcheck.py @@ -0,0 +1,312 @@ +#!/usr/bin/env python3 +"""Check proposed changes for common issues.""" +import re +import sys +import shutil +import os.path +import subprocess +import sysconfig + +import reindent +import untabify + + +# Excluded directories which are copies of external libraries: +# don't check their coding style +EXCLUDE_DIRS = [os.path.join('Modules', '_ctypes', 'libffi_osx'), + os.path.join('Modules', '_ctypes', 'libffi_msvc'), + os.path.join('Modules', '_decimal', 'libmpdec'), + os.path.join('Modules', 'expat'), + os.path.join('Modules', 'zlib')] +SRCDIR = sysconfig.get_config_var('srcdir') + + +def n_files_str(count): + """Return 'N file(s)' with the proper plurality on 'file'.""" + return "{} file{}".format(count, "s" if count != 1 else "") + + +def status(message, modal=False, info=None): + """Decorator to output status info to stdout.""" + def decorated_fxn(fxn): + def call_fxn(*args, **kwargs): + sys.stdout.write(message + ' ... ') + sys.stdout.flush() + result = fxn(*args, **kwargs) + if not modal and not info: + print("done") + elif info: + print(info(result)) + else: + print("yes" if result else "NO") + return result + return call_fxn + return decorated_fxn + + +def get_git_branch(): + """Get the symbolic name for the current git branch""" + cmd = "git rev-parse --abbrev-ref HEAD".split() + try: + return subprocess.check_output(cmd, + stderr=subprocess.DEVNULL, + cwd=SRCDIR, + encoding='UTF-8') + except subprocess.CalledProcessError: + return None + + +def get_git_upstream_remote(): + """Get the remote name to use for upstream branches + + Uses "upstream" if it exists, "origin" otherwise + """ + cmd = "git remote get-url upstream".split() + try: + subprocess.check_output(cmd, + stderr=subprocess.DEVNULL, + cwd=SRCDIR, + encoding='UTF-8') + except subprocess.CalledProcessError: + return "origin" + return "upstream" + + +def get_git_remote_default_branch(remote_name): + """Get the name of the default branch for the given remote + + It is typically called 'main', but may differ + """ + cmd = "git remote show {}".format(remote_name).split() + env = os.environ.copy() + env['LANG'] = 'C' + try: + remote_info = subprocess.check_output(cmd, + stderr=subprocess.DEVNULL, + cwd=SRCDIR, + encoding='UTF-8', + env=env) + except subprocess.CalledProcessError: + return None + for line in remote_info.splitlines(): + if "HEAD branch:" in line: + base_branch = line.split(":")[1].strip() + return base_branch + return None + + +@status("Getting base branch for PR", + info=lambda x: x if x is not None else "not a PR branch") +def get_base_branch(): + if not os.path.exists(os.path.join(SRCDIR, '.git')): + # Not a git checkout, so there's no base branch + return None + upstream_remote = get_git_upstream_remote() + version = sys.version_info + if version.releaselevel == 'alpha': + base_branch = get_git_remote_default_branch(upstream_remote) + else: + base_branch = "{0.major}.{0.minor}".format(version) + this_branch = get_git_branch() + if this_branch is None or this_branch == base_branch: + # Not on a git PR branch, so there's no base branch + return None + return upstream_remote + "/" + base_branch + + +@status("Getting the list of files that have been added/changed", + info=lambda x: n_files_str(len(x))) +def changed_files(base_branch=None): + """Get the list of changed or added files from git.""" + if os.path.exists(os.path.join(SRCDIR, '.git')): + # We just use an existence check here as: + # directory = normal git checkout/clone + # file = git worktree directory + if base_branch: + cmd = 'git diff --name-status ' + base_branch + else: + cmd = 'git status --porcelain' + filenames = [] + with subprocess.Popen(cmd.split(), + stdout=subprocess.PIPE, + cwd=SRCDIR) as st: + if st.wait() != 0: + sys.exit(f'error running {cmd}') + for line in st.stdout: + line = line.decode().rstrip() + status_text, filename = line.split(maxsplit=1) + status = set(status_text) + # modified, added or unmerged files + if not status.intersection('MAU'): + continue + if ' -> ' in filename: + # file is renamed + filename = filename.split(' -> ', 2)[1].strip() + filenames.append(filename) + else: + sys.exit('need a git checkout to get modified files') + + filenames2 = [] + for filename in filenames: + # Normalize the path to be able to match using .startswith() + filename = os.path.normpath(filename) + if any(filename.startswith(path) for path in EXCLUDE_DIRS): + # Exclude the file + continue + filenames2.append(filename) + + return filenames2 + + +def report_modified_files(file_paths): + count = len(file_paths) + if count == 0: + return n_files_str(count) + else: + lines = ["{}:".format(n_files_str(count))] + for path in file_paths: + lines.append(" {}".format(path)) + return "\n".join(lines) + + +@status("Fixing Python file whitespace", info=report_modified_files) +def normalize_whitespace(file_paths): + """Make sure that the whitespace for .py files have been normalized.""" + reindent.makebackup = False # No need to create backups. + fixed = [path for path in file_paths if path.endswith('.py') and + reindent.check(os.path.join(SRCDIR, path))] + return fixed + + +@status("Fixing C file whitespace", info=report_modified_files) +def normalize_c_whitespace(file_paths): + """Report if any C files """ + fixed = [] + for path in file_paths: + abspath = os.path.join(SRCDIR, path) + with open(abspath, 'r') as f: + if '\t' not in f.read(): + continue + untabify.process(abspath, 8, verbose=False) + fixed.append(path) + return fixed + + +ws_re = re.compile(br'\s+(\r?\n)$') + +@status("Fixing docs whitespace", info=report_modified_files) +def normalize_docs_whitespace(file_paths): + fixed = [] + for path in file_paths: + abspath = os.path.join(SRCDIR, path) + try: + with open(abspath, 'rb') as f: + lines = f.readlines() + new_lines = [ws_re.sub(br'\1', line) for line in lines] + if new_lines != lines: + shutil.copyfile(abspath, abspath + '.bak') + with open(abspath, 'wb') as f: + f.writelines(new_lines) + fixed.append(path) + except Exception as err: + print('Cannot fix %s: %s' % (path, err)) + return fixed + + +@status("Docs modified", modal=True) +def docs_modified(file_paths): + """Report if any file in the Doc directory has been changed.""" + return bool(file_paths) + + +@status("Misc/ACKS updated", modal=True) +def credit_given(file_paths): + """Check if Misc/ACKS has been changed.""" + return os.path.join('Misc', 'ACKS') in file_paths + + +@status("Misc/NEWS.d updated with `blurb`", modal=True) +def reported_news(file_paths): + """Check if Misc/NEWS.d has been changed.""" + return any(p.startswith(os.path.join('Misc', 'NEWS.d', 'next')) + for p in file_paths) + +@status("configure regenerated", modal=True, info=str) +def regenerated_configure(file_paths): + """Check if configure has been regenerated.""" + if 'configure.ac' in file_paths: + return "yes" if 'configure' in file_paths else "no" + else: + return "not needed" + +@status("pyconfig.h.in regenerated", modal=True, info=str) +def regenerated_pyconfig_h_in(file_paths): + """Check if pyconfig.h.in has been regenerated.""" + if 'configure.ac' in file_paths: + return "yes" if 'pyconfig.h.in' in file_paths else "no" + else: + return "not needed" + +def ci(pull_request): + if pull_request == 'false': + print('Not a pull request; skipping') + return + base_branch = get_base_branch() + file_paths = changed_files(base_branch) + python_files = [fn for fn in file_paths if fn.endswith('.py')] + c_files = [fn for fn in file_paths if fn.endswith(('.c', '.h'))] + doc_files = [fn for fn in file_paths if fn.startswith('Doc') and + fn.endswith(('.rst', '.inc'))] + fixed = [] + fixed.extend(normalize_whitespace(python_files)) + fixed.extend(normalize_c_whitespace(c_files)) + fixed.extend(normalize_docs_whitespace(doc_files)) + if not fixed: + print('No whitespace issues found') + else: + print(f'Please fix the {len(fixed)} file(s) with whitespace issues') + print('(on UNIX you can run `make patchcheck` to make the fixes)') + sys.exit(1) + +def main(): + base_branch = get_base_branch() + file_paths = changed_files(base_branch) + python_files = [fn for fn in file_paths if fn.endswith('.py')] + c_files = [fn for fn in file_paths if fn.endswith(('.c', '.h'))] + doc_files = [fn for fn in file_paths if fn.startswith('Doc') and + fn.endswith(('.rst', '.inc'))] + misc_files = {p for p in file_paths if p.startswith('Misc')} + # PEP 8 whitespace rules enforcement. + normalize_whitespace(python_files) + # C rules enforcement. + normalize_c_whitespace(c_files) + # Doc whitespace enforcement. + normalize_docs_whitespace(doc_files) + # Docs updated. + docs_modified(doc_files) + # Misc/ACKS changed. + credit_given(misc_files) + # Misc/NEWS changed. + reported_news(misc_files) + # Regenerated configure, if necessary. + regenerated_configure(file_paths) + # Regenerated pyconfig.h.in, if necessary. + regenerated_pyconfig_h_in(file_paths) + + # Test suite run and passed. + if python_files or c_files: + end = " and check for refleaks?" if c_files else "?" + print() + print("Did you run the test suite" + end) + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('--ci', + help='Perform pass/fail checks') + args = parser.parse_args() + if args.ci: + ci(args.ci) + else: + main() diff --git a/Tools/patchcheck/reindent.py b/Tools/patchcheck/reindent.py new file mode 100755 index 0000000..f6dadaa --- /dev/null +++ b/Tools/patchcheck/reindent.py @@ -0,0 +1,333 @@ +#! /usr/bin/env python3 + +# Released to the public domain, by Tim Peters, 03 October 2000. + +"""reindent [-d][-r][-v] [ path ... ] + +-d (--dryrun) Dry run. Analyze, but don't make any changes to, files. +-r (--recurse) Recurse. Search for all .py files in subdirectories too. +-n (--nobackup) No backup. Does not make a ".bak" file before reindenting. +-v (--verbose) Verbose. Print informative msgs; else no output. + (--newline) Newline. Specify the newline character to use (CRLF, LF). + Default is the same as the original file. +-h (--help) Help. Print this usage information and exit. + +Change Python (.py) files to use 4-space indents and no hard tab characters. +Also trim excess spaces and tabs from ends of lines, and remove empty lines +at the end of files. Also ensure the last line ends with a newline. + +If no paths are given on the command line, reindent operates as a filter, +reading a single source file from standard input and writing the transformed +source to standard output. In this case, the -d, -r and -v flags are +ignored. + +You can pass one or more file and/or directory paths. When a directory +path, all .py files within the directory will be examined, and, if the -r +option is given, likewise recursively for subdirectories. + +If output is not to standard output, reindent overwrites files in place, +renaming the originals with a .bak extension. If it finds nothing to +change, the file is left alone. If reindent does change a file, the changed +file is a fixed-point for future runs (i.e., running reindent on the +resulting .py file won't change it again). + +The hard part of reindenting is figuring out what to do with comment +lines. So long as the input files get a clean bill of health from +tabnanny.py, reindent should do a good job. + +The backup file is a copy of the one that is being reindented. The ".bak" +file is generated with shutil.copy(), but some corner cases regarding +user/group and permissions could leave the backup file more readable than +you'd prefer. You can always use the --nobackup option to prevent this. +""" + +__version__ = "1" + +import tokenize +import os +import shutil +import sys + +verbose = False +recurse = False +dryrun = False +makebackup = True +# A specified newline to be used in the output (set by --newline option) +spec_newline = None + + +def usage(msg=None): + if msg is None: + msg = __doc__ + print(msg, file=sys.stderr) + + +def errprint(*args): + sys.stderr.write(" ".join(str(arg) for arg in args)) + sys.stderr.write("\n") + +def main(): + import getopt + global verbose, recurse, dryrun, makebackup, spec_newline + try: + opts, args = getopt.getopt(sys.argv[1:], "drnvh", + ["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"]) + except getopt.error as msg: + usage(msg) + return + for o, a in opts: + if o in ('-d', '--dryrun'): + dryrun = True + elif o in ('-r', '--recurse'): + recurse = True + elif o in ('-n', '--nobackup'): + makebackup = False + elif o in ('-v', '--verbose'): + verbose = True + elif o in ('--newline',): + if not a.upper() in ('CRLF', 'LF'): + usage() + return + spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()] + elif o in ('-h', '--help'): + usage() + return + if not args: + r = Reindenter(sys.stdin) + r.run() + r.write(sys.stdout) + return + for arg in args: + check(arg) + + +def check(file): + if os.path.isdir(file) and not os.path.islink(file): + if verbose: + print("listing directory", file) + names = os.listdir(file) + for name in names: + fullname = os.path.join(file, name) + if ((recurse and os.path.isdir(fullname) and + not os.path.islink(fullname) and + not os.path.split(fullname)[1].startswith(".")) + or name.lower().endswith(".py")): + check(fullname) + return + + if verbose: + print("checking", file, "...", end=' ') + with open(file, 'rb') as f: + try: + encoding, _ = tokenize.detect_encoding(f.readline) + except SyntaxError as se: + errprint("%s: SyntaxError: %s" % (file, str(se))) + return + try: + with open(file, encoding=encoding) as f: + r = Reindenter(f) + except IOError as msg: + errprint("%s: I/O Error: %s" % (file, str(msg))) + return + + newline = spec_newline if spec_newline else r.newlines + if isinstance(newline, tuple): + errprint("%s: mixed newlines detected; cannot continue without --newline" % file) + return + + if r.run(): + if verbose: + print("changed.") + if dryrun: + print("But this is a dry run, so leaving it alone.") + if not dryrun: + bak = file + ".bak" + if makebackup: + shutil.copyfile(file, bak) + if verbose: + print("backed up", file, "to", bak) + with open(file, "w", encoding=encoding, newline=newline) as f: + r.write(f) + if verbose: + print("wrote new", file) + return True + else: + if verbose: + print("unchanged.") + return False + + +def _rstrip(line, JUNK='\n \t'): + """Return line stripped of trailing spaces, tabs, newlines. + + Note that line.rstrip() instead also strips sundry control characters, + but at least one known Emacs user expects to keep junk like that, not + mentioning Barry by name or anything . + """ + + i = len(line) + while i > 0 and line[i - 1] in JUNK: + i -= 1 + return line[:i] + + +class Reindenter: + + def __init__(self, f): + self.find_stmt = 1 # next token begins a fresh stmt? + self.level = 0 # current indent level + + # Raw file lines. + self.raw = f.readlines() + + # File lines, rstripped & tab-expanded. Dummy at start is so + # that we can use tokenize's 1-based line numbering easily. + # Note that a line is all-blank iff it's "\n". + self.lines = [_rstrip(line).expandtabs() + "\n" + for line in self.raw] + self.lines.insert(0, None) + self.index = 1 # index into self.lines of next line + + # List of (lineno, indentlevel) pairs, one for each stmt and + # comment line. indentlevel is -1 for comment lines, as a + # signal that tokenize doesn't know what to do about them; + # indeed, they're our headache! + self.stats = [] + + # Save the newlines found in the file so they can be used to + # create output without mutating the newlines. + self.newlines = f.newlines + + def run(self): + tokens = tokenize.generate_tokens(self.getline) + for _token in tokens: + self.tokeneater(*_token) + # Remove trailing empty lines. + lines = self.lines + while lines and lines[-1] == "\n": + lines.pop() + # Sentinel. + stats = self.stats + stats.append((len(lines), 0)) + # Map count of leading spaces to # we want. + have2want = {} + # Program after transformation. + after = self.after = [] + # Copy over initial empty lines -- there's nothing to do until + # we see a line with *something* on it. + i = stats[0][0] + after.extend(lines[1:i]) + for i in range(len(stats) - 1): + thisstmt, thislevel = stats[i] + nextstmt = stats[i + 1][0] + have = getlspace(lines[thisstmt]) + want = thislevel * 4 + if want < 0: + # A comment line. + if have: + # An indented comment line. If we saw the same + # indentation before, reuse what it most recently + # mapped to. + want = have2want.get(have, -1) + if want < 0: + # Then it probably belongs to the next real stmt. + for j in range(i + 1, len(stats) - 1): + jline, jlevel = stats[j] + if jlevel >= 0: + if have == getlspace(lines[jline]): + want = jlevel * 4 + break + if want < 0: # Maybe it's a hanging + # comment like this one, + # in which case we should shift it like its base + # line got shifted. + for j in range(i - 1, -1, -1): + jline, jlevel = stats[j] + if jlevel >= 0: + want = have + (getlspace(after[jline - 1]) - + getlspace(lines[jline])) + break + if want < 0: + # Still no luck -- leave it alone. + want = have + else: + want = 0 + assert want >= 0 + have2want[have] = want + diff = want - have + if diff == 0 or have == 0: + after.extend(lines[thisstmt:nextstmt]) + else: + for line in lines[thisstmt:nextstmt]: + if diff > 0: + if line == "\n": + after.append(line) + else: + after.append(" " * diff + line) + else: + remove = min(getlspace(line), -diff) + after.append(line[remove:]) + return self.raw != self.after + + def write(self, f): + f.writelines(self.after) + + # Line-getter for tokenize. + def getline(self): + if self.index >= len(self.lines): + line = "" + else: + line = self.lines[self.index] + self.index += 1 + return line + + # Line-eater for tokenize. + def tokeneater(self, type, token, slinecol, end, line, + INDENT=tokenize.INDENT, + DEDENT=tokenize.DEDENT, + NEWLINE=tokenize.NEWLINE, + COMMENT=tokenize.COMMENT, + NL=tokenize.NL): + + if type == NEWLINE: + # A program statement, or ENDMARKER, will eventually follow, + # after some (possibly empty) run of tokens of the form + # (NL | COMMENT)* (INDENT | DEDENT+)? + self.find_stmt = 1 + + elif type == INDENT: + self.find_stmt = 1 + self.level += 1 + + elif type == DEDENT: + self.find_stmt = 1 + self.level -= 1 + + elif type == COMMENT: + if self.find_stmt: + self.stats.append((slinecol[0], -1)) + # but we're still looking for a new stmt, so leave + # find_stmt alone + + elif type == NL: + pass + + elif self.find_stmt: + # This is the first "real token" following a NEWLINE, so it + # must be the first token of the next program statement, or an + # ENDMARKER. + self.find_stmt = 0 + if line: # not endmarker + self.stats.append((slinecol[0], self.level)) + + +# Count number of leading blanks. +def getlspace(line): + i, n = 0, len(line) + while i < n and line[i] == " ": + i += 1 + return i + + +if __name__ == '__main__': + main() diff --git a/Tools/patchcheck/untabify.py b/Tools/patchcheck/untabify.py new file mode 100755 index 0000000..861c83c --- /dev/null +++ b/Tools/patchcheck/untabify.py @@ -0,0 +1,55 @@ +#! /usr/bin/env python3 + +"Replace tabs with spaces in argument files. Print names of changed files." + +import os +import sys +import getopt +import tokenize + +def main(): + tabsize = 8 + try: + opts, args = getopt.getopt(sys.argv[1:], "t:") + if not args: + raise getopt.error("At least one file argument required") + except getopt.error as msg: + print(msg) + print("usage:", sys.argv[0], "[-t tabwidth] file ...") + return + for optname, optvalue in opts: + if optname == '-t': + tabsize = int(optvalue) + + for filename in args: + process(filename, tabsize) + + +def process(filename, tabsize, verbose=True): + try: + with tokenize.open(filename) as f: + text = f.read() + encoding = f.encoding + except IOError as msg: + print("%r: I/O error: %s" % (filename, msg)) + return + newtext = text.expandtabs(tabsize) + if newtext == text: + return + backup = filename + "~" + try: + os.unlink(backup) + except OSError: + pass + try: + os.rename(filename, backup) + except OSError: + pass + with open(filename, "w", encoding=encoding) as f: + f.write(newtext) + if verbose: + print(filename) + + +if __name__ == '__main__': + main() diff --git a/Tools/scripts/README b/Tools/scripts/README index 9943d4c..b952268 100644 --- a/Tools/scripts/README +++ b/Tools/scripts/README @@ -5,9 +5,6 @@ useful while building, extending or managing Python. combinerefs.py A helper for analyzing PYTHONDUMPREFS output idle3 Main program to start IDLE parse_html5_entities.py Utility for parsing HTML5 entity definitions -patchcheck.py Perform common checks and cleanup before committing pydoc3 Python documentation browser -reindent.py Change .py files to use 4-space indents run_tests.py Run the test suite with more sensible default options stable_abi.py Stable ABI checks and file generators. -untabify.py Replace tabs with spaces in argument files diff --git a/Tools/scripts/patchcheck.py b/Tools/scripts/patchcheck.py deleted file mode 100755 index a324eaf..0000000 --- a/Tools/scripts/patchcheck.py +++ /dev/null @@ -1,312 +0,0 @@ -#!/usr/bin/env python3 -"""Check proposed changes for common issues.""" -import re -import sys -import shutil -import os.path -import subprocess -import sysconfig - -import reindent -import untabify - - -# Excluded directories which are copies of external libraries: -# don't check their coding style -EXCLUDE_DIRS = [os.path.join('Modules', '_ctypes', 'libffi_osx'), - os.path.join('Modules', '_ctypes', 'libffi_msvc'), - os.path.join('Modules', '_decimal', 'libmpdec'), - os.path.join('Modules', 'expat'), - os.path.join('Modules', 'zlib')] -SRCDIR = sysconfig.get_config_var('srcdir') - - -def n_files_str(count): - """Return 'N file(s)' with the proper plurality on 'file'.""" - return "{} file{}".format(count, "s" if count != 1 else "") - - -def status(message, modal=False, info=None): - """Decorator to output status info to stdout.""" - def decorated_fxn(fxn): - def call_fxn(*args, **kwargs): - sys.stdout.write(message + ' ... ') - sys.stdout.flush() - result = fxn(*args, **kwargs) - if not modal and not info: - print("done") - elif info: - print(info(result)) - else: - print("yes" if result else "NO") - return result - return call_fxn - return decorated_fxn - - -def get_git_branch(): - """Get the symbolic name for the current git branch""" - cmd = "git rev-parse --abbrev-ref HEAD".split() - try: - return subprocess.check_output(cmd, - stderr=subprocess.DEVNULL, - cwd=SRCDIR, - encoding='UTF-8') - except subprocess.CalledProcessError: - return None - - -def get_git_upstream_remote(): - """Get the remote name to use for upstream branches - - Uses "upstream" if it exists, "origin" otherwise - """ - cmd = "git remote get-url upstream".split() - try: - subprocess.check_output(cmd, - stderr=subprocess.DEVNULL, - cwd=SRCDIR, - encoding='UTF-8') - except subprocess.CalledProcessError: - return "origin" - return "upstream" - - -def get_git_remote_default_branch(remote_name): - """Get the name of the default branch for the given remote - - It is typically called 'main', but may differ - """ - cmd = "git remote show {}".format(remote_name).split() - env = os.environ.copy() - env['LANG'] = 'C' - try: - remote_info = subprocess.check_output(cmd, - stderr=subprocess.DEVNULL, - cwd=SRCDIR, - encoding='UTF-8', - env=env) - except subprocess.CalledProcessError: - return None - for line in remote_info.splitlines(): - if "HEAD branch:" in line: - base_branch = line.split(":")[1].strip() - return base_branch - return None - - -@status("Getting base branch for PR", - info=lambda x: x if x is not None else "not a PR branch") -def get_base_branch(): - if not os.path.exists(os.path.join(SRCDIR, '.git')): - # Not a git checkout, so there's no base branch - return None - upstream_remote = get_git_upstream_remote() - version = sys.version_info - if version.releaselevel == 'alpha': - base_branch = get_git_remote_default_branch(upstream_remote) - else: - base_branch = "{0.major}.{0.minor}".format(version) - this_branch = get_git_branch() - if this_branch is None or this_branch == base_branch: - # Not on a git PR branch, so there's no base branch - return None - return upstream_remote + "/" + base_branch - - -@status("Getting the list of files that have been added/changed", - info=lambda x: n_files_str(len(x))) -def changed_files(base_branch=None): - """Get the list of changed or added files from git.""" - if os.path.exists(os.path.join(SRCDIR, '.git')): - # We just use an existence check here as: - # directory = normal git checkout/clone - # file = git worktree directory - if base_branch: - cmd = 'git diff --name-status ' + base_branch - else: - cmd = 'git status --porcelain' - filenames = [] - with subprocess.Popen(cmd.split(), - stdout=subprocess.PIPE, - cwd=SRCDIR) as st: - if st.wait() != 0: - sys.exit(f'error running {cmd}') - for line in st.stdout: - line = line.decode().rstrip() - status_text, filename = line.split(maxsplit=1) - status = set(status_text) - # modified, added or unmerged files - if not status.intersection('MAU'): - continue - if ' -> ' in filename: - # file is renamed - filename = filename.split(' -> ', 2)[1].strip() - filenames.append(filename) - else: - sys.exit('need a git checkout to get modified files') - - filenames2 = [] - for filename in filenames: - # Normalize the path to be able to match using .startswith() - filename = os.path.normpath(filename) - if any(filename.startswith(path) for path in EXCLUDE_DIRS): - # Exclude the file - continue - filenames2.append(filename) - - return filenames2 - - -def report_modified_files(file_paths): - count = len(file_paths) - if count == 0: - return n_files_str(count) - else: - lines = ["{}:".format(n_files_str(count))] - for path in file_paths: - lines.append(" {}".format(path)) - return "\n".join(lines) - - -@status("Fixing Python file whitespace", info=report_modified_files) -def normalize_whitespace(file_paths): - """Make sure that the whitespace for .py files have been normalized.""" - reindent.makebackup = False # No need to create backups. - fixed = [path for path in file_paths if path.endswith('.py') and - reindent.check(os.path.join(SRCDIR, path))] - return fixed - - -@status("Fixing C file whitespace", info=report_modified_files) -def normalize_c_whitespace(file_paths): - """Report if any C files """ - fixed = [] - for path in file_paths: - abspath = os.path.join(SRCDIR, path) - with open(abspath, 'r') as f: - if '\t' not in f.read(): - continue - untabify.process(abspath, 8, verbose=False) - fixed.append(path) - return fixed - - -ws_re = re.compile(br'\s+(\r?\n)$') - -@status("Fixing docs whitespace", info=report_modified_files) -def normalize_docs_whitespace(file_paths): - fixed = [] - for path in file_paths: - abspath = os.path.join(SRCDIR, path) - try: - with open(abspath, 'rb') as f: - lines = f.readlines() - new_lines = [ws_re.sub(br'\1', line) for line in lines] - if new_lines != lines: - shutil.copyfile(abspath, abspath + '.bak') - with open(abspath, 'wb') as f: - f.writelines(new_lines) - fixed.append(path) - except Exception as err: - print('Cannot fix %s: %s' % (path, err)) - return fixed - - -@status("Docs modified", modal=True) -def docs_modified(file_paths): - """Report if any file in the Doc directory has been changed.""" - return bool(file_paths) - - -@status("Misc/ACKS updated", modal=True) -def credit_given(file_paths): - """Check if Misc/ACKS has been changed.""" - return os.path.join('Misc', 'ACKS') in file_paths - - -@status("Misc/NEWS.d updated with `blurb`", modal=True) -def reported_news(file_paths): - """Check if Misc/NEWS.d has been changed.""" - return any(p.startswith(os.path.join('Misc', 'NEWS.d', 'next')) - for p in file_paths) - -@status("configure regenerated", modal=True, info=str) -def regenerated_configure(file_paths): - """Check if configure has been regenerated.""" - if 'configure.ac' in file_paths: - return "yes" if 'configure' in file_paths else "no" - else: - return "not needed" - -@status("pyconfig.h.in regenerated", modal=True, info=str) -def regenerated_pyconfig_h_in(file_paths): - """Check if pyconfig.h.in has been regenerated.""" - if 'configure.ac' in file_paths: - return "yes" if 'pyconfig.h.in' in file_paths else "no" - else: - return "not needed" - -def ci(pull_request): - if pull_request == 'false': - print('Not a pull request; skipping') - return - base_branch = get_base_branch() - file_paths = changed_files(base_branch) - python_files = [fn for fn in file_paths if fn.endswith('.py')] - c_files = [fn for fn in file_paths if fn.endswith(('.c', '.h'))] - doc_files = [fn for fn in file_paths if fn.startswith('Doc') and - fn.endswith(('.rst', '.inc'))] - fixed = [] - fixed.extend(normalize_whitespace(python_files)) - fixed.extend(normalize_c_whitespace(c_files)) - fixed.extend(normalize_docs_whitespace(doc_files)) - if not fixed: - print('No whitespace issues found') - else: - print(f'Please fix the {len(fixed)} file(s) with whitespace issues') - print('(on UNIX you can run `make patchcheck` to make the fixes)') - sys.exit(1) - -def main(): - base_branch = get_base_branch() - file_paths = changed_files(base_branch) - python_files = [fn for fn in file_paths if fn.endswith('.py')] - c_files = [fn for fn in file_paths if fn.endswith(('.c', '.h'))] - doc_files = [fn for fn in file_paths if fn.startswith('Doc') and - fn.endswith(('.rst', '.inc'))] - misc_files = {p for p in file_paths if p.startswith('Misc')} - # PEP 8 whitespace rules enforcement. - normalize_whitespace(python_files) - # C rules enforcement. - normalize_c_whitespace(c_files) - # Doc whitespace enforcement. - normalize_docs_whitespace(doc_files) - # Docs updated. - docs_modified(doc_files) - # Misc/ACKS changed. - credit_given(misc_files) - # Misc/NEWS changed. - reported_news(misc_files) - # Regenerated configure, if necessary. - regenerated_configure(file_paths) - # Regenerated pyconfig.h.in, if necessary. - regenerated_pyconfig_h_in(file_paths) - - # Test suite run and passed. - if python_files or c_files: - end = " and check for refleaks?" if c_files else "?" - print() - print("Did you run the test suite" + end) - - -if __name__ == '__main__': - import argparse - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument('--ci', - help='Perform pass/fail checks') - args = parser.parse_args() - if args.ci: - ci(args.ci) - else: - main() diff --git a/Tools/scripts/reindent.py b/Tools/scripts/reindent.py deleted file mode 100755 index f6dadaa..0000000 --- a/Tools/scripts/reindent.py +++ /dev/null @@ -1,333 +0,0 @@ -#! /usr/bin/env python3 - -# Released to the public domain, by Tim Peters, 03 October 2000. - -"""reindent [-d][-r][-v] [ path ... ] - --d (--dryrun) Dry run. Analyze, but don't make any changes to, files. --r (--recurse) Recurse. Search for all .py files in subdirectories too. --n (--nobackup) No backup. Does not make a ".bak" file before reindenting. --v (--verbose) Verbose. Print informative msgs; else no output. - (--newline) Newline. Specify the newline character to use (CRLF, LF). - Default is the same as the original file. --h (--help) Help. Print this usage information and exit. - -Change Python (.py) files to use 4-space indents and no hard tab characters. -Also trim excess spaces and tabs from ends of lines, and remove empty lines -at the end of files. Also ensure the last line ends with a newline. - -If no paths are given on the command line, reindent operates as a filter, -reading a single source file from standard input and writing the transformed -source to standard output. In this case, the -d, -r and -v flags are -ignored. - -You can pass one or more file and/or directory paths. When a directory -path, all .py files within the directory will be examined, and, if the -r -option is given, likewise recursively for subdirectories. - -If output is not to standard output, reindent overwrites files in place, -renaming the originals with a .bak extension. If it finds nothing to -change, the file is left alone. If reindent does change a file, the changed -file is a fixed-point for future runs (i.e., running reindent on the -resulting .py file won't change it again). - -The hard part of reindenting is figuring out what to do with comment -lines. So long as the input files get a clean bill of health from -tabnanny.py, reindent should do a good job. - -The backup file is a copy of the one that is being reindented. The ".bak" -file is generated with shutil.copy(), but some corner cases regarding -user/group and permissions could leave the backup file more readable than -you'd prefer. You can always use the --nobackup option to prevent this. -""" - -__version__ = "1" - -import tokenize -import os -import shutil -import sys - -verbose = False -recurse = False -dryrun = False -makebackup = True -# A specified newline to be used in the output (set by --newline option) -spec_newline = None - - -def usage(msg=None): - if msg is None: - msg = __doc__ - print(msg, file=sys.stderr) - - -def errprint(*args): - sys.stderr.write(" ".join(str(arg) for arg in args)) - sys.stderr.write("\n") - -def main(): - import getopt - global verbose, recurse, dryrun, makebackup, spec_newline - try: - opts, args = getopt.getopt(sys.argv[1:], "drnvh", - ["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"]) - except getopt.error as msg: - usage(msg) - return - for o, a in opts: - if o in ('-d', '--dryrun'): - dryrun = True - elif o in ('-r', '--recurse'): - recurse = True - elif o in ('-n', '--nobackup'): - makebackup = False - elif o in ('-v', '--verbose'): - verbose = True - elif o in ('--newline',): - if not a.upper() in ('CRLF', 'LF'): - usage() - return - spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()] - elif o in ('-h', '--help'): - usage() - return - if not args: - r = Reindenter(sys.stdin) - r.run() - r.write(sys.stdout) - return - for arg in args: - check(arg) - - -def check(file): - if os.path.isdir(file) and not os.path.islink(file): - if verbose: - print("listing directory", file) - names = os.listdir(file) - for name in names: - fullname = os.path.join(file, name) - if ((recurse and os.path.isdir(fullname) and - not os.path.islink(fullname) and - not os.path.split(fullname)[1].startswith(".")) - or name.lower().endswith(".py")): - check(fullname) - return - - if verbose: - print("checking", file, "...", end=' ') - with open(file, 'rb') as f: - try: - encoding, _ = tokenize.detect_encoding(f.readline) - except SyntaxError as se: - errprint("%s: SyntaxError: %s" % (file, str(se))) - return - try: - with open(file, encoding=encoding) as f: - r = Reindenter(f) - except IOError as msg: - errprint("%s: I/O Error: %s" % (file, str(msg))) - return - - newline = spec_newline if spec_newline else r.newlines - if isinstance(newline, tuple): - errprint("%s: mixed newlines detected; cannot continue without --newline" % file) - return - - if r.run(): - if verbose: - print("changed.") - if dryrun: - print("But this is a dry run, so leaving it alone.") - if not dryrun: - bak = file + ".bak" - if makebackup: - shutil.copyfile(file, bak) - if verbose: - print("backed up", file, "to", bak) - with open(file, "w", encoding=encoding, newline=newline) as f: - r.write(f) - if verbose: - print("wrote new", file) - return True - else: - if verbose: - print("unchanged.") - return False - - -def _rstrip(line, JUNK='\n \t'): - """Return line stripped of trailing spaces, tabs, newlines. - - Note that line.rstrip() instead also strips sundry control characters, - but at least one known Emacs user expects to keep junk like that, not - mentioning Barry by name or anything . - """ - - i = len(line) - while i > 0 and line[i - 1] in JUNK: - i -= 1 - return line[:i] - - -class Reindenter: - - def __init__(self, f): - self.find_stmt = 1 # next token begins a fresh stmt? - self.level = 0 # current indent level - - # Raw file lines. - self.raw = f.readlines() - - # File lines, rstripped & tab-expanded. Dummy at start is so - # that we can use tokenize's 1-based line numbering easily. - # Note that a line is all-blank iff it's "\n". - self.lines = [_rstrip(line).expandtabs() + "\n" - for line in self.raw] - self.lines.insert(0, None) - self.index = 1 # index into self.lines of next line - - # List of (lineno, indentlevel) pairs, one for each stmt and - # comment line. indentlevel is -1 for comment lines, as a - # signal that tokenize doesn't know what to do about them; - # indeed, they're our headache! - self.stats = [] - - # Save the newlines found in the file so they can be used to - # create output without mutating the newlines. - self.newlines = f.newlines - - def run(self): - tokens = tokenize.generate_tokens(self.getline) - for _token in tokens: - self.tokeneater(*_token) - # Remove trailing empty lines. - lines = self.lines - while lines and lines[-1] == "\n": - lines.pop() - # Sentinel. - stats = self.stats - stats.append((len(lines), 0)) - # Map count of leading spaces to # we want. - have2want = {} - # Program after transformation. - after = self.after = [] - # Copy over initial empty lines -- there's nothing to do until - # we see a line with *something* on it. - i = stats[0][0] - after.extend(lines[1:i]) - for i in range(len(stats) - 1): - thisstmt, thislevel = stats[i] - nextstmt = stats[i + 1][0] - have = getlspace(lines[thisstmt]) - want = thislevel * 4 - if want < 0: - # A comment line. - if have: - # An indented comment line. If we saw the same - # indentation before, reuse what it most recently - # mapped to. - want = have2want.get(have, -1) - if want < 0: - # Then it probably belongs to the next real stmt. - for j in range(i + 1, len(stats) - 1): - jline, jlevel = stats[j] - if jlevel >= 0: - if have == getlspace(lines[jline]): - want = jlevel * 4 - break - if want < 0: # Maybe it's a hanging - # comment like this one, - # in which case we should shift it like its base - # line got shifted. - for j in range(i - 1, -1, -1): - jline, jlevel = stats[j] - if jlevel >= 0: - want = have + (getlspace(after[jline - 1]) - - getlspace(lines[jline])) - break - if want < 0: - # Still no luck -- leave it alone. - want = have - else: - want = 0 - assert want >= 0 - have2want[have] = want - diff = want - have - if diff == 0 or have == 0: - after.extend(lines[thisstmt:nextstmt]) - else: - for line in lines[thisstmt:nextstmt]: - if diff > 0: - if line == "\n": - after.append(line) - else: - after.append(" " * diff + line) - else: - remove = min(getlspace(line), -diff) - after.append(line[remove:]) - return self.raw != self.after - - def write(self, f): - f.writelines(self.after) - - # Line-getter for tokenize. - def getline(self): - if self.index >= len(self.lines): - line = "" - else: - line = self.lines[self.index] - self.index += 1 - return line - - # Line-eater for tokenize. - def tokeneater(self, type, token, slinecol, end, line, - INDENT=tokenize.INDENT, - DEDENT=tokenize.DEDENT, - NEWLINE=tokenize.NEWLINE, - COMMENT=tokenize.COMMENT, - NL=tokenize.NL): - - if type == NEWLINE: - # A program statement, or ENDMARKER, will eventually follow, - # after some (possibly empty) run of tokens of the form - # (NL | COMMENT)* (INDENT | DEDENT+)? - self.find_stmt = 1 - - elif type == INDENT: - self.find_stmt = 1 - self.level += 1 - - elif type == DEDENT: - self.find_stmt = 1 - self.level -= 1 - - elif type == COMMENT: - if self.find_stmt: - self.stats.append((slinecol[0], -1)) - # but we're still looking for a new stmt, so leave - # find_stmt alone - - elif type == NL: - pass - - elif self.find_stmt: - # This is the first "real token" following a NEWLINE, so it - # must be the first token of the next program statement, or an - # ENDMARKER. - self.find_stmt = 0 - if line: # not endmarker - self.stats.append((slinecol[0], self.level)) - - -# Count number of leading blanks. -def getlspace(line): - i, n = 0, len(line) - while i < n and line[i] == " ": - i += 1 - return i - - -if __name__ == '__main__': - main() diff --git a/Tools/scripts/untabify.py b/Tools/scripts/untabify.py deleted file mode 100755 index 861c83c..0000000 --- a/Tools/scripts/untabify.py +++ /dev/null @@ -1,55 +0,0 @@ -#! /usr/bin/env python3 - -"Replace tabs with spaces in argument files. Print names of changed files." - -import os -import sys -import getopt -import tokenize - -def main(): - tabsize = 8 - try: - opts, args = getopt.getopt(sys.argv[1:], "t:") - if not args: - raise getopt.error("At least one file argument required") - except getopt.error as msg: - print(msg) - print("usage:", sys.argv[0], "[-t tabwidth] file ...") - return - for optname, optvalue in opts: - if optname == '-t': - tabsize = int(optvalue) - - for filename in args: - process(filename, tabsize) - - -def process(filename, tabsize, verbose=True): - try: - with tokenize.open(filename) as f: - text = f.read() - encoding = f.encoding - except IOError as msg: - print("%r: I/O error: %s" % (filename, msg)) - return - newtext = text.expandtabs(tabsize) - if newtext == text: - return - backup = filename + "~" - try: - os.unlink(backup) - except OSError: - pass - try: - os.rename(filename, backup) - except OSError: - pass - with open(filename, "w", encoding=encoding) as f: - f.write(newtext) - if verbose: - print(filename) - - -if __name__ == '__main__': - main() -- cgit v0.12