summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2005-08-24 18:32:30 (GMT)
committerGeorg Brandl <georg@python.org>2005-08-24 18:32:30 (GMT)
commit568973181aa523bbcf7f827b3a2eb2affd96ea67 (patch)
treedc00a1741aea07b0d41fd39c441b00ef93dbb462
parent0a5d4a20e7b2b758c974766cc3ac02af5a342483 (diff)
downloadcpython-568973181aa523bbcf7f827b3a2eb2affd96ea67.zip
cpython-568973181aa523bbcf7f827b3a2eb2affd96ea67.tar.gz
cpython-568973181aa523bbcf7f827b3a2eb2affd96ea67.tar.bz2
Patch [ 784089 ] A program to scan python files and list those require coding
-rw-r--r--Misc/NEWS5
-rwxr-xr-xTools/scripts/findnocoding.py106
-rw-r--r--Tools/scripts/pysource.py130
3 files changed, 241 insertions, 0 deletions
diff --git a/Misc/NEWS b/Misc/NEWS
index 765565c..7e21b7a 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -502,6 +502,11 @@ New platforms
Tools/Demos
-----------
+- Added two new files to Tools/scripts: pysource.py, which recursively
+ finds Python source files, and findnocoding.py, which finds Python
+ source files that need an encoding declaration.
+ Patch #784089, credits to Oleg Broytmann.
+
- Bug #1072853: pindent.py used an uninitialized variable.
- Patch #1177597: Correct Complex.__init__.
diff --git a/Tools/scripts/findnocoding.py b/Tools/scripts/findnocoding.py
new file mode 100755
index 0000000..707bf23
--- /dev/null
+++ b/Tools/scripts/findnocoding.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+
+"""List all those Python files that require a coding directive
+
+Usage: nocoding.py dir1 [dir2...]
+"""
+
+__author__ = "Oleg Broytmann, Reinhold Birkenfeld"
+
+import sys, os, re, getopt
+
+# our pysource module finds Python source files
+try:
+ import pysource
+except:
+ # emulate the module with a simple os.walk
+ class pysource:
+ has_python_ext = looks_like_python = can_be_compiled = None
+ def walk_python_files(self, paths, *args, **kwargs):
+ for path in paths:
+ if os.path.isfile(path):
+ yield path.endswith(".py")
+ elif os.path.isdir(path):
+ for root, dirs, files in os.walk(path):
+ for filename in files:
+ if filename.endswith(".py"):
+ yield os.path.join(root, filename)
+ pysource = pysource()
+
+
+ print >>sys.stderr, ("The pysource module is not available; "
+ "no sophisticated Python source file search will be done.")
+
+
+decl_re = re.compile(r"coding[=:]\s*([-\w.]+)")
+
+def get_declaration(line):
+ match = decl_re.search(line)
+ if match:
+ return match.group(1)
+ return ''
+
+def has_correct_encoding(text, codec):
+ try:
+ unicode(text, codec)
+ except UnicodeDecodeError:
+ return False
+ else:
+ return True
+
+def needs_declaration(fullpath):
+ try:
+ infile = open(fullpath, 'rU')
+ except IOError: # Oops, the file was removed - ignore it
+ return None
+
+ line1 = infile.readline()
+ line2 = infile.readline()
+
+ if get_declaration(line1) or get_declaration(line2):
+ # the file does have an encoding declaration, so trust it
+ infile.close()
+ return False
+
+ # check the whole file for non-ASCII characters
+ rest = infile.read()
+ infile.close()
+
+ if has_correct_encoding(line1+line2+rest, "ascii"):
+ return False
+
+ return True
+
+
+usage = """Usage: %s [-cd] paths...
+ -c: recognize Python source files trying to compile them
+ -d: debug output""" % sys.argv[0]
+
+try:
+ opts, args = getopt.getopt(sys.argv[1:], 'cd')
+except getopt.error, msg:
+ print >>sys.stderr, msg
+ print >>sys.stderr, usage
+ sys.exit(1)
+
+is_python = pysource.looks_like_python
+debug = False
+
+for o, a in opts:
+ if o == '-c':
+ is_python = pysource.can_be_compiled
+ elif o == '-d':
+ debug = True
+
+if not args:
+ print >>sys.stderr, usage
+ sys.exit(1)
+
+for fullpath in pysource.walk_python_files(args, is_python):
+ if debug:
+ print "Testing for coding: %s" % fullpath
+ result = needs_declaration(fullpath)
+ if result:
+ print fullpath
+
+
diff --git a/Tools/scripts/pysource.py b/Tools/scripts/pysource.py
new file mode 100644
index 0000000..3b01bfc
--- /dev/null
+++ b/Tools/scripts/pysource.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python
+
+"""\
+List python source files.
+
+There are three functions to check whether a file is a Python source, listed
+here with increasing complexity:
+
+- has_python_ext() checks whether a file name ends in '.py[w]'.
+- look_like_python() checks whether the file is not binary and either has
+ the '.py[w]' extension or the first line contains the word 'python'.
+- can_be_compiled() checks whether the file can be compiled by compile().
+
+The file also must be of appropriate size - not bigger than a megabyte.
+
+walk_python_files() recursively lists all Python files under the given directories.
+"""
+__author__ = "Oleg Broytmann, Reinhold Birkenfeld"
+
+__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"]
+
+
+import sys, os, re
+
+binary_re = re.compile('[\x00-\x08\x0E-\x1F\x7F]')
+
+debug = False
+
+def print_debug(msg):
+ if debug: print msg
+
+
+def _open(fullpath):
+ try:
+ size = os.stat(fullpath).st_size
+ except OSError, err: # Permission denied - ignore the file
+ print_debug("%s: permission denied: %s" % (fullpath, err))
+ return None
+
+ if size > 1024*1024: # too big
+ print_debug("%s: the file is too big: %d bytes" % (fullpath, size))
+ return None
+
+ try:
+ return open(fullpath, 'rU')
+ except IOError, err: # Access denied, or a special file - ignore it
+ print_debug("%s: access denied: %s" % (fullpath, err))
+ return None
+
+def has_python_ext(fullpath):
+ return fullpath.endswith(".py") or fullpath.endswith(".pyw")
+
+def looks_like_python(fullpath):
+ infile = _open(fullpath)
+ if infile is None:
+ return False
+
+ line = infile.readline()
+ infile.close()
+
+ if binary_re.search(line):
+ # file appears to be binary
+ print_debug("%s: appears to be binary" % fullpath)
+ return False
+
+ if fullpath.endswith(".py") or fullpath.endswith(".pyw"):
+ return True
+ elif "python" in line:
+ # disguised Python script (e.g. CGI)
+ return True
+
+ return False
+
+def can_be_compiled(fullpath):
+ infile = _open(fullpath)
+ if infile is None:
+ return False
+
+ code = infile.read()
+ infile.close()
+
+ try:
+ compile(code, fullpath, "exec")
+ except Exception, err:
+ print_debug("%s: cannot compile: %s" % (fullpath, err))
+ return False
+
+ return True
+
+
+def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None):
+ """\
+ Recursively yield all Python source files below the given paths.
+
+ paths: a list of files and/or directories to be checked.
+ is_python: a function that takes a file name and checks whether it is a
+ Python source file
+ exclude_dirs: a list of directory base names that should be excluded in
+ the search
+ """
+ if exclude_dirs is None:
+ exclude_dirs=[]
+
+ for path in paths:
+ print_debug("testing: %s" % path)
+ if os.path.isfile(path):
+ if is_python(path):
+ yield path
+ elif os.path.isdir(path):
+ print_debug(" it is a directory")
+ for dirpath, dirnames, filenames in os.walk(path):
+ for exclude in exclude_dirs:
+ if exclude in dirnames:
+ dirnames.remove(exclude)
+ for filename in filenames:
+ fullpath = os.path.join(dirpath, filename)
+ print_debug("testing: %s" % fullpath)
+ if is_python(fullpath):
+ yield fullpath
+ else:
+ print_debug(" unknown type")
+
+
+if __name__ == "__main__":
+ # Two simple examples/tests
+ for fullpath in walk_python_files(['.']):
+ print fullpath
+ print "----------"
+ for fullpath in walk_python_files(['.'], is_python=can_be_compiled):
+ print fullpath