From 568973181aa523bbcf7f827b3a2eb2affd96ea67 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Wed, 24 Aug 2005 18:32:30 +0000 Subject: Patch [ 784089 ] A program to scan python files and list those require coding --- Misc/NEWS | 5 ++ Tools/scripts/findnocoding.py | 106 ++++++++++++++++++++++++++++++++++ Tools/scripts/pysource.py | 130 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 241 insertions(+) create mode 100755 Tools/scripts/findnocoding.py create mode 100644 Tools/scripts/pysource.py diff --git a/Misc/NEWS b/Misc/NEWS index 765565c..7e21b7a 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -502,6 +502,11 @@ New platforms Tools/Demos ----------- +- Added two new files to Tools/scripts: pysource.py, which recursively + finds Python source files, and findnocoding.py, which finds Python + source files that need an encoding declaration. + Patch #784089, credits to Oleg Broytmann. + - Bug #1072853: pindent.py used an uninitialized variable. - Patch #1177597: Correct Complex.__init__. diff --git a/Tools/scripts/findnocoding.py b/Tools/scripts/findnocoding.py new file mode 100755 index 0000000..707bf23 --- /dev/null +++ b/Tools/scripts/findnocoding.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python + +"""List all those Python files that require a coding directive + +Usage: nocoding.py dir1 [dir2...] +""" + +__author__ = "Oleg Broytmann, Reinhold Birkenfeld" + +import sys, os, re, getopt + +# our pysource module finds Python source files +try: + import pysource +except: + # emulate the module with a simple os.walk + class pysource: + has_python_ext = looks_like_python = can_be_compiled = None + def walk_python_files(self, paths, *args, **kwargs): + for path in paths: + if os.path.isfile(path): + yield path.endswith(".py") + elif os.path.isdir(path): + for root, dirs, files in os.walk(path): + for filename in files: + if filename.endswith(".py"): + yield os.path.join(root, filename) + pysource = pysource() + + + print >>sys.stderr, ("The pysource module is not available; " + "no sophisticated Python source file search will be done.") + + +decl_re = re.compile(r"coding[=:]\s*([-\w.]+)") + +def get_declaration(line): + match = decl_re.search(line) + if match: + return match.group(1) + return '' + +def has_correct_encoding(text, codec): + try: + unicode(text, codec) + except UnicodeDecodeError: + return False + else: + return True + +def needs_declaration(fullpath): + try: + infile = open(fullpath, 'rU') + except IOError: # Oops, the file was removed - ignore it + return None + + line1 = infile.readline() + line2 = infile.readline() + + if get_declaration(line1) or get_declaration(line2): + # the file does have an encoding declaration, so trust it + infile.close() + return False + + # check the whole file for non-ASCII characters + rest = infile.read() + infile.close() + + if has_correct_encoding(line1+line2+rest, "ascii"): + return False + + return True + + +usage = """Usage: %s [-cd] paths... + -c: recognize Python source files trying to compile them + -d: debug output""" % sys.argv[0] + +try: + opts, args = getopt.getopt(sys.argv[1:], 'cd') +except getopt.error, msg: + print >>sys.stderr, msg + print >>sys.stderr, usage + sys.exit(1) + +is_python = pysource.looks_like_python +debug = False + +for o, a in opts: + if o == '-c': + is_python = pysource.can_be_compiled + elif o == '-d': + debug = True + +if not args: + print >>sys.stderr, usage + sys.exit(1) + +for fullpath in pysource.walk_python_files(args, is_python): + if debug: + print "Testing for coding: %s" % fullpath + result = needs_declaration(fullpath) + if result: + print fullpath + + diff --git a/Tools/scripts/pysource.py b/Tools/scripts/pysource.py new file mode 100644 index 0000000..3b01bfc --- /dev/null +++ b/Tools/scripts/pysource.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python + +"""\ +List python source files. + +There are three functions to check whether a file is a Python source, listed +here with increasing complexity: + +- has_python_ext() checks whether a file name ends in '.py[w]'. +- look_like_python() checks whether the file is not binary and either has + the '.py[w]' extension or the first line contains the word 'python'. +- can_be_compiled() checks whether the file can be compiled by compile(). + +The file also must be of appropriate size - not bigger than a megabyte. + +walk_python_files() recursively lists all Python files under the given directories. +""" +__author__ = "Oleg Broytmann, Reinhold Birkenfeld" + +__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"] + + +import sys, os, re + +binary_re = re.compile('[\x00-\x08\x0E-\x1F\x7F]') + +debug = False + +def print_debug(msg): + if debug: print msg + + +def _open(fullpath): + try: + size = os.stat(fullpath).st_size + except OSError, err: # Permission denied - ignore the file + print_debug("%s: permission denied: %s" % (fullpath, err)) + return None + + if size > 1024*1024: # too big + print_debug("%s: the file is too big: %d bytes" % (fullpath, size)) + return None + + try: + return open(fullpath, 'rU') + except IOError, err: # Access denied, or a special file - ignore it + print_debug("%s: access denied: %s" % (fullpath, err)) + return None + +def has_python_ext(fullpath): + return fullpath.endswith(".py") or fullpath.endswith(".pyw") + +def looks_like_python(fullpath): + infile = _open(fullpath) + if infile is None: + return False + + line = infile.readline() + infile.close() + + if binary_re.search(line): + # file appears to be binary + print_debug("%s: appears to be binary" % fullpath) + return False + + if fullpath.endswith(".py") or fullpath.endswith(".pyw"): + return True + elif "python" in line: + # disguised Python script (e.g. CGI) + return True + + return False + +def can_be_compiled(fullpath): + infile = _open(fullpath) + if infile is None: + return False + + code = infile.read() + infile.close() + + try: + compile(code, fullpath, "exec") + except Exception, err: + print_debug("%s: cannot compile: %s" % (fullpath, err)) + return False + + return True + + +def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None): + """\ + Recursively yield all Python source files below the given paths. + + paths: a list of files and/or directories to be checked. + is_python: a function that takes a file name and checks whether it is a + Python source file + exclude_dirs: a list of directory base names that should be excluded in + the search + """ + if exclude_dirs is None: + exclude_dirs=[] + + for path in paths: + print_debug("testing: %s" % path) + if os.path.isfile(path): + if is_python(path): + yield path + elif os.path.isdir(path): + print_debug(" it is a directory") + for dirpath, dirnames, filenames in os.walk(path): + for exclude in exclude_dirs: + if exclude in dirnames: + dirnames.remove(exclude) + for filename in filenames: + fullpath = os.path.join(dirpath, filename) + print_debug("testing: %s" % fullpath) + if is_python(fullpath): + yield fullpath + else: + print_debug(" unknown type") + + +if __name__ == "__main__": + # Two simple examples/tests + for fullpath in walk_python_files(['.']): + print fullpath + print "----------" + for fullpath in walk_python_files(['.'], is_python=can_be_compiled): + print fullpath -- cgit v0.12