diff options
-rw-r--r-- | Tools/idle/pyclbr.py | 336 | ||||
-rwxr-xr-x | Tools/idle/tabnanny.py | 372 |
2 files changed, 0 insertions, 708 deletions
diff --git a/Tools/idle/pyclbr.py b/Tools/idle/pyclbr.py deleted file mode 100644 index 74b7ff7..0000000 --- a/Tools/idle/pyclbr.py +++ /dev/null @@ -1,336 +0,0 @@ -"""Parse a Python file and retrieve classes and methods. - -Parse enough of a Python file to recognize class and method -definitions and to find out the superclasses of a class. - -The interface consists of a single function: - readmodule(module, path) -module is the name of a Python module, path is an optional list of -directories where the module is to be searched. If present, path is -prepended to the system search path sys.path. -The return value is a dictionary. The keys of the dictionary are -the names of the classes defined in the module (including classes -that are defined via the from XXX import YYY construct). The values -are class instances of the class Class defined here. - -A class is described by the class Class in this module. Instances -of this class have the following instance variables: - name -- the name of the class - super -- a list of super classes (Class instances) - methods -- a dictionary of methods - file -- the file in which the class was defined - lineno -- the line in the file on which the class statement occurred -The dictionary of methods uses the method names as keys and the line -numbers on which the method was defined as values. -If the name of a super class is not recognized, the corresponding -entry in the list of super classes is not a class instance but a -string giving the name of the super class. Since import statements -are recognized and imported modules are scanned as well, this -shouldn't happen often. - -BUGS -- Continuation lines are not dealt with at all. -- While triple-quoted strings won't confuse it, lines that look like - def, class, import or "from ... import" stmts inside backslash-continued - single-quoted strings are treated like code. The expense of stopping - that isn't worth it. -- Code that doesn't pass tabnanny or python -t will confuse it, unless - you set the module TABWIDTH vrbl (default 8) to the correct tab width - for the file. - -PACKAGE RELATED BUGS -- If you have a package and a module inside that or another package - with the same name, module caching doesn't work properly since the - key is the base name of the module/package. -- The only entry that is returned when you readmodule a package is a - __path__ whose value is a list which confuses certain class browsers. -- When code does: - from package import subpackage - class MyClass(subpackage.SuperClass): - ... - It can't locate the parent. It probably needs to have the same - hairy logic that the import locator already does. (This logic - exists coded in Python in the freeze package.) -""" - -import os -import sys -import imp -import re -import string - -TABWIDTH = 8 - -_getnext = re.compile(r""" - (?P<String> - \""" [^"\\]* (?: - (?: \\. | "(?!"") ) - [^"\\]* - )* - \""" - - | ''' [^'\\]* (?: - (?: \\. | '(?!'') ) - [^'\\]* - )* - ''' - ) - -| (?P<Method> - ^ - (?P<MethodIndent> [ \t]* ) - def [ \t]+ - (?P<MethodName> [a-zA-Z_] \w* ) - [ \t]* \( - ) - -| (?P<Class> - ^ - (?P<ClassIndent> [ \t]* ) - class [ \t]+ - (?P<ClassName> [a-zA-Z_] \w* ) - [ \t]* - (?P<ClassSupers> \( [^)\n]* \) )? - [ \t]* : - ) - -| (?P<Import> - ^ import [ \t]+ - (?P<ImportList> [^#;\n]+ ) - ) - -| (?P<ImportFrom> - ^ from [ \t]+ - (?P<ImportFromPath> - [a-zA-Z_] \w* - (?: - [ \t]* \. [ \t]* [a-zA-Z_] \w* - )* - ) - [ \t]+ - import [ \t]+ - (?P<ImportFromList> [^#;\n]+ ) - ) -""", re.VERBOSE | re.DOTALL | re.MULTILINE).search - -_modules = {} # cache of modules we've seen - -# each Python class is represented by an instance of this class -class Class: - '''Class to represent a Python class.''' - def __init__(self, module, name, super, file, lineno): - self.module = module - self.name = name - if super is None: - super = [] - self.super = super - self.methods = {} - self.file = file - self.lineno = lineno - - def _addmethod(self, name, lineno): - self.methods[name] = lineno - -class Function(Class): - '''Class to represent a top-level Python function''' - def __init__(self, module, name, file, lineno): - Class.__init__(self, module, name, None, file, lineno) - def _addmethod(self, name, lineno): - assert 0, "Function._addmethod() shouldn't be called" - -def readmodule(module, path=[], inpackage=0): - '''Backwards compatible interface. - - Like readmodule_ex() but strips Function objects from the - resulting dictionary.''' - - dict = readmodule_ex(module, path, inpackage) - res = {} - for key, value in dict.items(): - if not isinstance(value, Function): - res[key] = value - return res - -def readmodule_ex(module, path=[], inpackage=0): - '''Read a module file and return a dictionary of classes. - - Search for MODULE in PATH and sys.path, read and parse the - module and return a dictionary with one entry for each class - found in the module.''' - - dict = {} - - i = string.rfind(module, '.') - if i >= 0: - # Dotted module name - package = string.strip(module[:i]) - submodule = string.strip(module[i+1:]) - parent = readmodule(package, path, inpackage) - child = readmodule(submodule, parent['__path__'], 1) - return child - - if _modules.has_key(module): - # we've seen this module before... - return _modules[module] - if module in sys.builtin_module_names: - # this is a built-in module - _modules[module] = dict - return dict - - # search the path for the module - f = None - if inpackage: - try: - f, file, (suff, mode, type) = \ - imp.find_module(module, path) - except ImportError: - f = None - if f is None: - fullpath = list(path) + sys.path - f, file, (suff, mode, type) = imp.find_module(module, fullpath) - if type == imp.PKG_DIRECTORY: - dict['__path__'] = [file] - _modules[module] = dict - path = [file] + path - f, file, (suff, mode, type) = \ - imp.find_module('__init__', [file]) - if type != imp.PY_SOURCE: - # not Python source, can't do anything with this module - f.close() - _modules[module] = dict - return dict - - _modules[module] = dict - imports = [] - classstack = [] # stack of (class, indent) pairs - src = f.read() - f.close() - - # To avoid having to stop the regexp at each newline, instead - # when we need a line number we simply string.count the number of - # newlines in the string since the last time we did this; i.e., - # lineno = lineno + \ - # string.count(src, '\n', last_lineno_pos, here) - # last_lineno_pos = here - countnl = string.count - lineno, last_lineno_pos = 1, 0 - i = 0 - while 1: - m = _getnext(src, i) - if not m: - break - start, i = m.span() - - if m.start("Method") >= 0: - # found a method definition or function - thisindent = _indent(m.group("MethodIndent")) - meth_name = m.group("MethodName") - lineno = lineno + \ - countnl(src, '\n', - last_lineno_pos, start) - last_lineno_pos = start - # close all classes indented at least as much - while classstack and \ - classstack[-1][1] >= thisindent: - del classstack[-1] - if classstack: - # it's a class method - cur_class = classstack[-1][0] - cur_class._addmethod(meth_name, lineno) - else: - # it's a function - f = Function(module, meth_name, - file, lineno) - dict[meth_name] = f - - elif m.start("String") >= 0: - pass - - elif m.start("Class") >= 0: - # we found a class definition - thisindent = _indent(m.group("ClassIndent")) - # close all classes indented at least as much - while classstack and \ - classstack[-1][1] >= thisindent: - del classstack[-1] - lineno = lineno + \ - countnl(src, '\n', last_lineno_pos, start) - last_lineno_pos = start - class_name = m.group("ClassName") - inherit = m.group("ClassSupers") - if inherit: - # the class inherits from other classes - inherit = string.strip(inherit[1:-1]) - names = [] - for n in string.splitfields(inherit, ','): - n = string.strip(n) - if dict.has_key(n): - # we know this super class - n = dict[n] - else: - c = string.splitfields(n, '.') - if len(c) > 1: - # super class - # is of the - # form module.class: - # look in - # module for class - m = c[-2] - c = c[-1] - if _modules.has_key(m): - d = _modules[m] - if d.has_key(c): - n = d[c] - names.append(n) - inherit = names - # remember this class - cur_class = Class(module, class_name, inherit, - file, lineno) - dict[class_name] = cur_class - classstack.append((cur_class, thisindent)) - - elif m.start("Import") >= 0: - # import module - for n in string.split(m.group("ImportList"), ','): - n = string.strip(n) - try: - # recursively read the imported module - d = readmodule(n, path, inpackage) - except: - ##print 'module', n, 'not found' - pass - - elif m.start("ImportFrom") >= 0: - # from module import stuff - mod = m.group("ImportFromPath") - names = string.split(m.group("ImportFromList"), ',') - try: - # recursively read the imported module - d = readmodule(mod, path, inpackage) - except: - ##print 'module', mod, 'not found' - continue - # add any classes that were defined in the - # imported module to our name space if they - # were mentioned in the list - for n in names: - n = string.strip(n) - if d.has_key(n): - dict[n] = d[n] - elif n == '*': - # only add a name if not - # already there (to mimic what - # Python does internally) - # also don't add names that - # start with _ - for n in d.keys(): - if n[0] != '_' and \ - not dict.has_key(n): - dict[n] = d[n] - else: - assert 0, "regexp _getnext found something unexpected" - - return dict - -def _indent(ws, _expandtabs=string.expandtabs): - return len(_expandtabs(ws, TABWIDTH)) diff --git a/Tools/idle/tabnanny.py b/Tools/idle/tabnanny.py deleted file mode 100755 index 8d3eab5..0000000 --- a/Tools/idle/tabnanny.py +++ /dev/null @@ -1,372 +0,0 @@ -#! /usr/bin/env python - -"""The Tab Nanny despises ambiguous indentation. She knows no mercy.""" - -# Released to the public domain, by Tim Peters, 15 April 1998. - -# XXX Note: this is now a standard library module. -# XXX The API needs to undergo changes however; the current code is too -# XXX script-like. This will be addressed later. - -__version__ = "6" - -import os -import sys -import string -import getopt -import tokenize - -verbose = 0 -filename_only = 0 - -def errprint(*args): - sep = "" - for arg in args: - sys.stderr.write(sep + str(arg)) - sep = " " - sys.stderr.write("\n") - -def main(): - global verbose, filename_only - try: - opts, args = getopt.getopt(sys.argv[1:], "qv") - except getopt.error, msg: - errprint(msg) - return - for o, a in opts: - if o == '-q': - filename_only = filename_only + 1 - if o == '-v': - verbose = verbose + 1 - if not args: - errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...") - return - for arg in args: - check(arg) - -class NannyNag: - def __init__(self, lineno, msg, line): - self.lineno, self.msg, self.line = lineno, msg, line - def get_lineno(self): - return self.lineno - def get_msg(self): - return self.msg - def get_line(self): - return self.line - -def check(file): - if os.path.isdir(file) and not os.path.islink(file): - if verbose: - print "%s: listing directory" % `file` - names = os.listdir(file) - for name in names: - fullname = os.path.join(file, name) - if (os.path.isdir(fullname) and - not os.path.islink(fullname) or - os.path.normcase(name[-3:]) == ".py"): - check(fullname) - return - - try: - f = open(file) - except IOError, msg: - errprint("%s: I/O Error: %s" % (`file`, str(msg))) - return - - if verbose > 1: - print "checking", `file`, "..." - - reset_globals() - try: - tokenize.tokenize(f.readline, tokeneater) - - except tokenize.TokenError, msg: - errprint("%s: Token Error: %s" % (`file`, str(msg))) - return - - except NannyNag, nag: - badline = nag.get_lineno() - line = nag.get_line() - if verbose: - print "%s: *** Line %d: trouble in tab city! ***" % ( - `file`, badline) - print "offending line:", `line` - print nag.get_msg() - else: - if ' ' in file: file = '"' + file + '"' - if filename_only: print file - else: print file, badline, `line` - return - - if verbose: - print "%s: Clean bill of health." % `file` - -class Whitespace: - # the characters used for space and tab - S, T = ' \t' - - # members: - # raw - # the original string - # n - # the number of leading whitespace characters in raw - # nt - # the number of tabs in raw[:n] - # norm - # the normal form as a pair (count, trailing), where: - # count - # a tuple such that raw[:n] contains count[i] - # instances of S * i + T - # trailing - # the number of trailing spaces in raw[:n] - # It's A Theorem that m.indent_level(t) == - # n.indent_level(t) for all t >= 1 iff m.norm == n.norm. - # is_simple - # true iff raw[:n] is of the form (T*)(S*) - - def __init__(self, ws): - self.raw = ws - S, T = Whitespace.S, Whitespace.T - count = [] - b = n = nt = 0 - for ch in self.raw: - if ch == S: - n = n + 1 - b = b + 1 - elif ch == T: - n = n + 1 - nt = nt + 1 - if b >= len(count): - count = count + [0] * (b - len(count) + 1) - count[b] = count[b] + 1 - b = 0 - else: - break - self.n = n - self.nt = nt - self.norm = tuple(count), b - self.is_simple = len(count) <= 1 - - # return length of longest contiguous run of spaces (whether or not - # preceding a tab) - def longest_run_of_spaces(self): - count, trailing = self.norm - return max(len(count)-1, trailing) - - def indent_level(self, tabsize): - # count, il = self.norm - # for i in range(len(count)): - # if count[i]: - # il = il + (i/tabsize + 1)*tabsize * count[i] - # return il - - # quicker: - # il = trailing + sum (i/ts + 1)*ts*count[i] = - # trailing + ts * sum (i/ts + 1)*count[i] = - # trailing + ts * sum i/ts*count[i] + count[i] = - # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] = - # trailing + ts * [(sum i/ts*count[i]) + num_tabs] - # and note that i/ts*count[i] is 0 when i < ts - - count, trailing = self.norm - il = 0 - for i in range(tabsize, len(count)): - il = il + i/tabsize * count[i] - return trailing + tabsize * (il + self.nt) - - # return true iff self.indent_level(t) == other.indent_level(t) - # for all t >= 1 - def equal(self, other): - return self.norm == other.norm - - # return a list of tuples (ts, i1, i2) such that - # i1 == self.indent_level(ts) != other.indent_level(ts) == i2. - # Intended to be used after not self.equal(other) is known, in which - # case it will return at least one witnessing tab size. - def not_equal_witness(self, other): - n = max(self.longest_run_of_spaces(), - other.longest_run_of_spaces()) + 1 - a = [] - for ts in range(1, n+1): - if self.indent_level(ts) != other.indent_level(ts): - a.append( (ts, - self.indent_level(ts), - other.indent_level(ts)) ) - return a - - # Return true iff self.indent_level(t) < other.indent_level(t) - # for all t >= 1. - # The algorithm is due to Vincent Broman. - # Easy to prove it's correct. - # XXXpost that. - # Trivial to prove n is sharp (consider T vs ST). - # Unknown whether there's a faster general way. I suspected so at - # first, but no longer. - # For the special (but common!) case where M and N are both of the - # form (T*)(S*), M.less(N) iff M.len() < N.len() and - # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded. - # XXXwrite that up. - # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1. - def less(self, other): - if self.n >= other.n: - return 0 - if self.is_simple and other.is_simple: - return self.nt <= other.nt - n = max(self.longest_run_of_spaces(), - other.longest_run_of_spaces()) + 1 - # the self.n >= other.n test already did it for ts=1 - for ts in range(2, n+1): - if self.indent_level(ts) >= other.indent_level(ts): - return 0 - return 1 - - # return a list of tuples (ts, i1, i2) such that - # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2. - # Intended to be used after not self.less(other) is known, in which - # case it will return at least one witnessing tab size. - def not_less_witness(self, other): - n = max(self.longest_run_of_spaces(), - other.longest_run_of_spaces()) + 1 - a = [] - for ts in range(1, n+1): - if self.indent_level(ts) >= other.indent_level(ts): - a.append( (ts, - self.indent_level(ts), - other.indent_level(ts)) ) - return a - -def format_witnesses(w): - import string - firsts = map(lambda tup: str(tup[0]), w) - prefix = "at tab size" - if len(w) > 1: - prefix = prefix + "s" - return prefix + " " + string.join(firsts, ', ') - -# The collection of globals, the reset_globals() function, and the -# tokeneater() function, depend on which version of tokenize is -# in use. - -if hasattr(tokenize, 'NL'): - # take advantage of Guido's patch! - - indents = [] - check_equal = 0 - - def reset_globals(): - global indents, check_equal - check_equal = 0 - indents = [Whitespace("")] - - def tokeneater(type, token, start, end, line, - INDENT=tokenize.INDENT, - DEDENT=tokenize.DEDENT, - NEWLINE=tokenize.NEWLINE, - JUNK=(tokenize.COMMENT, tokenize.NL) ): - global indents, check_equal - - if type == NEWLINE: - # a program statement, or ENDMARKER, will eventually follow, - # after some (possibly empty) run of tokens of the form - # (NL | COMMENT)* (INDENT | DEDENT+)? - # If an INDENT appears, setting check_equal is wrong, and will - # be undone when we see the INDENT. - check_equal = 1 - - elif type == INDENT: - check_equal = 0 - thisguy = Whitespace(token) - if not indents[-1].less(thisguy): - witness = indents[-1].not_less_witness(thisguy) - msg = "indent not greater e.g. " + format_witnesses(witness) - raise NannyNag(start[0], msg, line) - indents.append(thisguy) - - elif type == DEDENT: - # there's nothing we need to check here! what's important is - # that when the run of DEDENTs ends, the indentation of the - # program statement (or ENDMARKER) that triggered the run is - # equal to what's left at the top of the indents stack - - # Ouch! This assert triggers if the last line of the source - # is indented *and* lacks a newline -- then DEDENTs pop out - # of thin air. - # assert check_equal # else no earlier NEWLINE, or an earlier INDENT - check_equal = 1 - - del indents[-1] - - elif check_equal and type not in JUNK: - # this is the first "real token" following a NEWLINE, so it - # must be the first token of the next program statement, or an - # ENDMARKER; the "line" argument exposes the leading whitespace - # for this statement; in the case of ENDMARKER, line is an empty - # string, so will properly match the empty string with which the - # "indents" stack was seeded - check_equal = 0 - thisguy = Whitespace(line) - if not indents[-1].equal(thisguy): - witness = indents[-1].not_equal_witness(thisguy) - msg = "indent not equal e.g. " + format_witnesses(witness) - raise NannyNag(start[0], msg, line) - -else: - # unpatched version of tokenize - - nesting_level = 0 - indents = [] - check_equal = 0 - - def reset_globals(): - global nesting_level, indents, check_equal - nesting_level = check_equal = 0 - indents = [Whitespace("")] - - def tokeneater(type, token, start, end, line, - INDENT=tokenize.INDENT, - DEDENT=tokenize.DEDENT, - NEWLINE=tokenize.NEWLINE, - COMMENT=tokenize.COMMENT, - OP=tokenize.OP): - global nesting_level, indents, check_equal - - if type == INDENT: - check_equal = 0 - thisguy = Whitespace(token) - if not indents[-1].less(thisguy): - witness = indents[-1].not_less_witness(thisguy) - msg = "indent not greater e.g. " + format_witnesses(witness) - raise NannyNag(start[0], msg, line) - indents.append(thisguy) - - elif type == DEDENT: - del indents[-1] - - elif type == NEWLINE: - if nesting_level == 0: - check_equal = 1 - - elif type == COMMENT: - pass - - elif check_equal: - check_equal = 0 - thisguy = Whitespace(line) - if not indents[-1].equal(thisguy): - witness = indents[-1].not_equal_witness(thisguy) - msg = "indent not equal e.g. " + format_witnesses(witness) - raise NannyNag(start[0], msg, line) - - if type == OP and token in ('{', '[', '('): - nesting_level = nesting_level + 1 - - elif type == OP and token in ('}', ']', ')'): - if nesting_level == 0: - raise NannyNag(start[0], - "unbalanced bracket '" + token + "'", - line) - nesting_level = nesting_level - 1 - -if __name__ == '__main__': - main() - |