diff options
author | Georg Brandl <georg@python.org> | 2005-08-24 18:32:30 (GMT) |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2005-08-24 18:32:30 (GMT) |
commit | 568973181aa523bbcf7f827b3a2eb2affd96ea67 (patch) | |
tree | dc00a1741aea07b0d41fd39c441b00ef93dbb462 /Tools | |
parent | 0a5d4a20e7b2b758c974766cc3ac02af5a342483 (diff) | |
download | cpython-568973181aa523bbcf7f827b3a2eb2affd96ea67.zip cpython-568973181aa523bbcf7f827b3a2eb2affd96ea67.tar.gz cpython-568973181aa523bbcf7f827b3a2eb2affd96ea67.tar.bz2 |
Patch [ 784089 ] A program to scan python files and list those require coding
Diffstat (limited to 'Tools')
-rwxr-xr-x | Tools/scripts/findnocoding.py | 106 | ||||
-rw-r--r-- | Tools/scripts/pysource.py | 130 |
2 files changed, 236 insertions, 0 deletions
diff --git a/Tools/scripts/findnocoding.py b/Tools/scripts/findnocoding.py new file mode 100755 index 0000000..707bf23 --- /dev/null +++ b/Tools/scripts/findnocoding.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python + +"""List all those Python files that require a coding directive + +Usage: nocoding.py dir1 [dir2...] +""" + +__author__ = "Oleg Broytmann, Reinhold Birkenfeld" + +import sys, os, re, getopt + +# our pysource module finds Python source files +try: + import pysource +except: + # emulate the module with a simple os.walk + class pysource: + has_python_ext = looks_like_python = can_be_compiled = None + def walk_python_files(self, paths, *args, **kwargs): + for path in paths: + if os.path.isfile(path): + yield path.endswith(".py") + elif os.path.isdir(path): + for root, dirs, files in os.walk(path): + for filename in files: + if filename.endswith(".py"): + yield os.path.join(root, filename) + pysource = pysource() + + + print >>sys.stderr, ("The pysource module is not available; " + "no sophisticated Python source file search will be done.") + + +decl_re = re.compile(r"coding[=:]\s*([-\w.]+)") + +def get_declaration(line): + match = decl_re.search(line) + if match: + return match.group(1) + return '' + +def has_correct_encoding(text, codec): + try: + unicode(text, codec) + except UnicodeDecodeError: + return False + else: + return True + +def needs_declaration(fullpath): + try: + infile = open(fullpath, 'rU') + except IOError: # Oops, the file was removed - ignore it + return None + + line1 = infile.readline() + line2 = infile.readline() + + if get_declaration(line1) or get_declaration(line2): + # the file does have an encoding declaration, so trust it + infile.close() + return False + + # check the whole file for non-ASCII characters + rest = infile.read() + infile.close() + + if has_correct_encoding(line1+line2+rest, "ascii"): + return False + + return True + + +usage = """Usage: %s [-cd] paths... + -c: recognize Python source files trying to compile them + -d: debug output""" % sys.argv[0] + +try: + opts, args = getopt.getopt(sys.argv[1:], 'cd') +except getopt.error, msg: + print >>sys.stderr, msg + print >>sys.stderr, usage + sys.exit(1) + +is_python = pysource.looks_like_python +debug = False + +for o, a in opts: + if o == '-c': + is_python = pysource.can_be_compiled + elif o == '-d': + debug = True + +if not args: + print >>sys.stderr, usage + sys.exit(1) + +for fullpath in pysource.walk_python_files(args, is_python): + if debug: + print "Testing for coding: %s" % fullpath + result = needs_declaration(fullpath) + if result: + print fullpath + + diff --git a/Tools/scripts/pysource.py b/Tools/scripts/pysource.py new file mode 100644 index 0000000..3b01bfc --- /dev/null +++ b/Tools/scripts/pysource.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python + +"""\ +List python source files. + +There are three functions to check whether a file is a Python source, listed +here with increasing complexity: + +- has_python_ext() checks whether a file name ends in '.py[w]'. +- look_like_python() checks whether the file is not binary and either has + the '.py[w]' extension or the first line contains the word 'python'. +- can_be_compiled() checks whether the file can be compiled by compile(). + +The file also must be of appropriate size - not bigger than a megabyte. + +walk_python_files() recursively lists all Python files under the given directories. +""" +__author__ = "Oleg Broytmann, Reinhold Birkenfeld" + +__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"] + + +import sys, os, re + +binary_re = re.compile('[\x00-\x08\x0E-\x1F\x7F]') + +debug = False + +def print_debug(msg): + if debug: print msg + + +def _open(fullpath): + try: + size = os.stat(fullpath).st_size + except OSError, err: # Permission denied - ignore the file + print_debug("%s: permission denied: %s" % (fullpath, err)) + return None + + if size > 1024*1024: # too big + print_debug("%s: the file is too big: %d bytes" % (fullpath, size)) + return None + + try: + return open(fullpath, 'rU') + except IOError, err: # Access denied, or a special file - ignore it + print_debug("%s: access denied: %s" % (fullpath, err)) + return None + +def has_python_ext(fullpath): + return fullpath.endswith(".py") or fullpath.endswith(".pyw") + +def looks_like_python(fullpath): + infile = _open(fullpath) + if infile is None: + return False + + line = infile.readline() + infile.close() + + if binary_re.search(line): + # file appears to be binary + print_debug("%s: appears to be binary" % fullpath) + return False + + if fullpath.endswith(".py") or fullpath.endswith(".pyw"): + return True + elif "python" in line: + # disguised Python script (e.g. CGI) + return True + + return False + +def can_be_compiled(fullpath): + infile = _open(fullpath) + if infile is None: + return False + + code = infile.read() + infile.close() + + try: + compile(code, fullpath, "exec") + except Exception, err: + print_debug("%s: cannot compile: %s" % (fullpath, err)) + return False + + return True + + +def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None): + """\ + Recursively yield all Python source files below the given paths. + + paths: a list of files and/or directories to be checked. + is_python: a function that takes a file name and checks whether it is a + Python source file + exclude_dirs: a list of directory base names that should be excluded in + the search + """ + if exclude_dirs is None: + exclude_dirs=[] + + for path in paths: + print_debug("testing: %s" % path) + if os.path.isfile(path): + if is_python(path): + yield path + elif os.path.isdir(path): + print_debug(" it is a directory") + for dirpath, dirnames, filenames in os.walk(path): + for exclude in exclude_dirs: + if exclude in dirnames: + dirnames.remove(exclude) + for filename in filenames: + fullpath = os.path.join(dirpath, filename) + print_debug("testing: %s" % fullpath) + if is_python(fullpath): + yield fullpath + else: + print_debug(" unknown type") + + +if __name__ == "__main__": + # Two simple examples/tests + for fullpath in walk_python_files(['.']): + print fullpath + print "----------" + for fullpath in walk_python_files(['.'], is_python=can_be_compiled): + print fullpath |