diff options
-rw-r--r-- | Tools/idle/pyclbr.py | 336 |
1 files changed, 336 insertions, 0 deletions
diff --git a/Tools/idle/pyclbr.py b/Tools/idle/pyclbr.py index e69de29..74b7ff7 100644 --- a/Tools/idle/pyclbr.py +++ b/Tools/idle/pyclbr.py @@ -0,0 +1,336 @@ +"""Parse a Python file and retrieve classes and methods. + +Parse enough of a Python file to recognize class and method +definitions and to find out the superclasses of a class. + +The interface consists of a single function: + readmodule(module, path) +module is the name of a Python module, path is an optional list of +directories where the module is to be searched. If present, path is +prepended to the system search path sys.path. +The return value is a dictionary. The keys of the dictionary are +the names of the classes defined in the module (including classes +that are defined via the from XXX import YYY construct). The values +are class instances of the class Class defined here. + +A class is described by the class Class in this module. Instances +of this class have the following instance variables: + name -- the name of the class + super -- a list of super classes (Class instances) + methods -- a dictionary of methods + file -- the file in which the class was defined + lineno -- the line in the file on which the class statement occurred +The dictionary of methods uses the method names as keys and the line +numbers on which the method was defined as values. +If the name of a super class is not recognized, the corresponding +entry in the list of super classes is not a class instance but a +string giving the name of the super class. Since import statements +are recognized and imported modules are scanned as well, this +shouldn't happen often. + +BUGS +- Continuation lines are not dealt with at all. +- While triple-quoted strings won't confuse it, lines that look like + def, class, import or "from ... import" stmts inside backslash-continued + single-quoted strings are treated like code. The expense of stopping + that isn't worth it. +- Code that doesn't pass tabnanny or python -t will confuse it, unless + you set the module TABWIDTH vrbl (default 8) to the correct tab width + for the file. + +PACKAGE RELATED BUGS +- If you have a package and a module inside that or another package + with the same name, module caching doesn't work properly since the + key is the base name of the module/package. +- The only entry that is returned when you readmodule a package is a + __path__ whose value is a list which confuses certain class browsers. +- When code does: + from package import subpackage + class MyClass(subpackage.SuperClass): + ... + It can't locate the parent. It probably needs to have the same + hairy logic that the import locator already does. (This logic + exists coded in Python in the freeze package.) +""" + +import os +import sys +import imp +import re +import string + +TABWIDTH = 8 + +_getnext = re.compile(r""" + (?P<String> + \""" [^"\\]* (?: + (?: \\. | "(?!"") ) + [^"\\]* + )* + \""" + + | ''' [^'\\]* (?: + (?: \\. | '(?!'') ) + [^'\\]* + )* + ''' + ) + +| (?P<Method> + ^ + (?P<MethodIndent> [ \t]* ) + def [ \t]+ + (?P<MethodName> [a-zA-Z_] \w* ) + [ \t]* \( + ) + +| (?P<Class> + ^ + (?P<ClassIndent> [ \t]* ) + class [ \t]+ + (?P<ClassName> [a-zA-Z_] \w* ) + [ \t]* + (?P<ClassSupers> \( [^)\n]* \) )? + [ \t]* : + ) + +| (?P<Import> + ^ import [ \t]+ + (?P<ImportList> [^#;\n]+ ) + ) + +| (?P<ImportFrom> + ^ from [ \t]+ + (?P<ImportFromPath> + [a-zA-Z_] \w* + (?: + [ \t]* \. [ \t]* [a-zA-Z_] \w* + )* + ) + [ \t]+ + import [ \t]+ + (?P<ImportFromList> [^#;\n]+ ) + ) +""", re.VERBOSE | re.DOTALL | re.MULTILINE).search + +_modules = {} # cache of modules we've seen + +# each Python class is represented by an instance of this class +class Class: + '''Class to represent a Python class.''' + def __init__(self, module, name, super, file, lineno): + self.module = module + self.name = name + if super is None: + super = [] + self.super = super + self.methods = {} + self.file = file + self.lineno = lineno + + def _addmethod(self, name, lineno): + self.methods[name] = lineno + +class Function(Class): + '''Class to represent a top-level Python function''' + def __init__(self, module, name, file, lineno): + Class.__init__(self, module, name, None, file, lineno) + def _addmethod(self, name, lineno): + assert 0, "Function._addmethod() shouldn't be called" + +def readmodule(module, path=[], inpackage=0): + '''Backwards compatible interface. + + Like readmodule_ex() but strips Function objects from the + resulting dictionary.''' + + dict = readmodule_ex(module, path, inpackage) + res = {} + for key, value in dict.items(): + if not isinstance(value, Function): + res[key] = value + return res + +def readmodule_ex(module, path=[], inpackage=0): + '''Read a module file and return a dictionary of classes. + + Search for MODULE in PATH and sys.path, read and parse the + module and return a dictionary with one entry for each class + found in the module.''' + + dict = {} + + i = string.rfind(module, '.') + if i >= 0: + # Dotted module name + package = string.strip(module[:i]) + submodule = string.strip(module[i+1:]) + parent = readmodule(package, path, inpackage) + child = readmodule(submodule, parent['__path__'], 1) + return child + + if _modules.has_key(module): + # we've seen this module before... + return _modules[module] + if module in sys.builtin_module_names: + # this is a built-in module + _modules[module] = dict + return dict + + # search the path for the module + f = None + if inpackage: + try: + f, file, (suff, mode, type) = \ + imp.find_module(module, path) + except ImportError: + f = None + if f is None: + fullpath = list(path) + sys.path + f, file, (suff, mode, type) = imp.find_module(module, fullpath) + if type == imp.PKG_DIRECTORY: + dict['__path__'] = [file] + _modules[module] = dict + path = [file] + path + f, file, (suff, mode, type) = \ + imp.find_module('__init__', [file]) + if type != imp.PY_SOURCE: + # not Python source, can't do anything with this module + f.close() + _modules[module] = dict + return dict + + _modules[module] = dict + imports = [] + classstack = [] # stack of (class, indent) pairs + src = f.read() + f.close() + + # To avoid having to stop the regexp at each newline, instead + # when we need a line number we simply string.count the number of + # newlines in the string since the last time we did this; i.e., + # lineno = lineno + \ + # string.count(src, '\n', last_lineno_pos, here) + # last_lineno_pos = here + countnl = string.count + lineno, last_lineno_pos = 1, 0 + i = 0 + while 1: + m = _getnext(src, i) + if not m: + break + start, i = m.span() + + if m.start("Method") >= 0: + # found a method definition or function + thisindent = _indent(m.group("MethodIndent")) + meth_name = m.group("MethodName") + lineno = lineno + \ + countnl(src, '\n', + last_lineno_pos, start) + last_lineno_pos = start + # close all classes indented at least as much + while classstack and \ + classstack[-1][1] >= thisindent: + del classstack[-1] + if classstack: + # it's a class method + cur_class = classstack[-1][0] + cur_class._addmethod(meth_name, lineno) + else: + # it's a function + f = Function(module, meth_name, + file, lineno) + dict[meth_name] = f + + elif m.start("String") >= 0: + pass + + elif m.start("Class") >= 0: + # we found a class definition + thisindent = _indent(m.group("ClassIndent")) + # close all classes indented at least as much + while classstack and \ + classstack[-1][1] >= thisindent: + del classstack[-1] + lineno = lineno + \ + countnl(src, '\n', last_lineno_pos, start) + last_lineno_pos = start + class_name = m.group("ClassName") + inherit = m.group("ClassSupers") + if inherit: + # the class inherits from other classes + inherit = string.strip(inherit[1:-1]) + names = [] + for n in string.splitfields(inherit, ','): + n = string.strip(n) + if dict.has_key(n): + # we know this super class + n = dict[n] + else: + c = string.splitfields(n, '.') + if len(c) > 1: + # super class + # is of the + # form module.class: + # look in + # module for class + m = c[-2] + c = c[-1] + if _modules.has_key(m): + d = _modules[m] + if d.has_key(c): + n = d[c] + names.append(n) + inherit = names + # remember this class + cur_class = Class(module, class_name, inherit, + file, lineno) + dict[class_name] = cur_class + classstack.append((cur_class, thisindent)) + + elif m.start("Import") >= 0: + # import module + for n in string.split(m.group("ImportList"), ','): + n = string.strip(n) + try: + # recursively read the imported module + d = readmodule(n, path, inpackage) + except: + ##print 'module', n, 'not found' + pass + + elif m.start("ImportFrom") >= 0: + # from module import stuff + mod = m.group("ImportFromPath") + names = string.split(m.group("ImportFromList"), ',') + try: + # recursively read the imported module + d = readmodule(mod, path, inpackage) + except: + ##print 'module', mod, 'not found' + continue + # add any classes that were defined in the + # imported module to our name space if they + # were mentioned in the list + for n in names: + n = string.strip(n) + if d.has_key(n): + dict[n] = d[n] + elif n == '*': + # only add a name if not + # already there (to mimic what + # Python does internally) + # also don't add names that + # start with _ + for n in d.keys(): + if n[0] != '_' and \ + not dict.has_key(n): + dict[n] = d[n] + else: + assert 0, "regexp _getnext found something unexpected" + + return dict + +def _indent(ws, _expandtabs=string.expandtabs): + return len(_expandtabs(ws, TABWIDTH)) |