From 0a6f954766cd091b117a1c5b16bdb0d2a837c2a8 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 3 Dec 2002 08:14:35 +0000 Subject: Another big update, fixing all known bugs related to nesting functions and classes. Also add a mini main program that dumps the results for a given file or module. --- Lib/pyclbr.py | 130 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 80 insertions(+), 50 deletions(-) diff --git a/Lib/pyclbr.py b/Lib/pyclbr.py index aa00f6f..26cc0ce 100644 --- a/Lib/pyclbr.py +++ b/Lib/pyclbr.py @@ -1,18 +1,19 @@ -"""Parse a Python file and retrieve classes and methods. +"""Parse a Python module and describe its classes and methods. -Parse enough of a Python file to recognize class and method -definitions and to find out the superclasses of a class. +Parse enough of a Python file to recognize imports and class and +method definitions, and to find out the superclasses of a class. The interface consists of a single function: - readmodule_ex(module [, path[, inpackage]]) -module is the name of a Python module, path is an optional list of -directories where the module is to be searched. If present, path is -prepended to the system search path sys.path. (inpackage is used -internally to search for a submodule of a package.) -The return value is a dictionary. The keys of the dictionary are -the names of the classes defined in the module (including classes -that are defined via the from XXX import YYY construct). The values -are class instances of the class Class defined here. + readmodule_ex(module [, path]) +where module is the name of a Python module, and path is an optional +list of directories where the module is to be searched. If present, +path is prepended to the system search path sys.path. The return +value is a dictionary. The keys of the dictionary are the names of +the classes defined in the module (including classes that are defined +via the from XXX import YYY construct). The values are class +instances of the class Class defined here. One special key/value pair +is present for packages: the key '__path__' has a list as its value +which contains the package search path. A class is described by the class Class in this module. Instances of this class have the following instance variables: @@ -36,21 +37,12 @@ Instances of this class have the following instance variables: name -- the name of the class file -- the file in which the class was defined lineno -- the line in the file on which the class statement occurred - - -BUGS -- Nested classes and functions can confuse it. - -PACKAGE CAVEAT -- When you call readmodule_ex for a package, dict['__path__'] is a - list, which may confuse older class browsers. (readmodule filters - these out though.) """ import sys import imp import tokenize # Python tokenizer -from token import NAME +from token import NAME, DEDENT, NEWLINE __all__ = ["readmodule", "readmodule_ex", "Class", "Function"] @@ -86,14 +78,14 @@ def readmodule(module, path=[]): Call readmodule_ex() and then only keep Class objects from the resulting dictionary.''' - dict = readmodule_ex(module, path) + dict = _readmodule(module, path) res = {} for key, value in dict.items(): if isinstance(value, Class): res[key] = value return res -def readmodule_ex(module, path=[], inpackage=None): +def readmodule_ex(module, path=[]): '''Read a module file and return a dictionary of classes. Search for MODULE in PATH and sys.path, read and parse the @@ -105,7 +97,10 @@ def readmodule_ex(module, path=[], inpackage=None): package search path; otherwise, we are searching for a top-level module, and PATH is combined with sys.path. ''' + return _readmodule(module, path) +def _readmodule(module, path, inpackage=None): + '''Do the hard work for readmodule[_ex].''' # Compute the full module name (prepending inpackage if set) if inpackage: fullmodule = "%s.%s" % (inpackage, module) @@ -129,10 +124,10 @@ def readmodule_ex(module, path=[], inpackage=None): if i >= 0: package = module[:i] submodule = module[i+1:] - parent = readmodule_ex(package, path, inpackage) + parent = _readmodule(package, path, inpackage) if inpackage: package = "%s.%s" % (inpackage, package) - return readmodule_ex(submodule, parent['__path__'], package) + return _readmodule(submodule, parent['__path__'], package) # Search the path for the module f = None @@ -150,36 +145,42 @@ def readmodule_ex(module, path=[], inpackage=None): f.close() return dict - classstack = [] # stack of (class, indent) pairs + stack = [] # stack of (class, indent) pairs g = tokenize.generate_tokens(f.readline) try: for tokentype, token, start, end, line in g: - if token == 'def': + if tokentype == DEDENT: + lineno, thisindent = start + # close nested classes and defs + while stack and stack[-1][1] >= thisindent: + del stack[-1] + elif token == 'def': lineno, thisindent = start + # close previous nested classes and defs + while stack and stack[-1][1] >= thisindent: + del stack[-1] tokentype, meth_name, start, end, line = g.next() if tokentype != NAME: continue # Syntax error - # close all classes indented at least as much - while classstack and \ - classstack[-1][1] >= thisindent: - del classstack[-1] - if classstack: - # it's a class method - cur_class = classstack[-1][0] - cur_class._addmethod(meth_name, lineno) + if stack: + cur_class = stack[-1][0] + if isinstance(cur_class, Class): + # it's a method + cur_class._addmethod(meth_name, lineno) + # else it's a nested def else: # it's a function dict[meth_name] = Function(module, meth_name, file, lineno) + stack.append((None, thisindent)) # Marker for nested fns elif token == 'class': lineno, thisindent = start + # close previous nested classes and defs + while stack and stack[-1][1] >= thisindent: + del stack[-1] tokentype, class_name, start, end, line = g.next() if tokentype != NAME: continue # Syntax error - # close all classes indented at least as much - while classstack and \ - classstack[-1][1] >= thisindent: - del classstack[-1] # parse what follows the class name tokentype, token, start, end, line = g.next() inherit = None @@ -208,6 +209,7 @@ def readmodule_ex(module, path=[], inpackage=None): if c in d: n = d[c] names.append(n) + super = [] if token == '(': level += 1 elif token == ')': @@ -220,20 +222,21 @@ def readmodule_ex(module, path=[], inpackage=None): super.append(token) inherit = names cur_class = Class(module, class_name, inherit, file, lineno) - dict[class_name] = cur_class - classstack.append((cur_class, thisindent)) + if not stack: + dict[class_name] = cur_class + stack.append((cur_class, thisindent)) elif token == 'import' and start[1] == 0: modules = _getnamelist(g) for mod, mod2 in modules: try: # Recursively read the imported module if not inpackage: - readmodule_ex(mod, path) + _readmodule(mod, path) else: try: - readmodule_ex(mod, path, inpackage) + _readmodule(mod, path, inpackage) except ImportError: - readmodule_ex(mod) + _readmodule(mod, []) except: # If we can't find or parse the imported module, # too bad -- don't die here. @@ -245,7 +248,7 @@ def readmodule_ex(module, path=[], inpackage=None): names = _getnamelist(g) try: # Recursively read the imported module - d = readmodule_ex(mod, path, inpackage) + d = _readmodule(mod, path, inpackage) except: # If we can't find or parse the imported module, # too bad -- don't die here. @@ -256,11 +259,9 @@ def readmodule_ex(module, path=[], inpackage=None): if n in d: dict[n2 or n] = d[n] elif n == '*': - # only add a name if not already there (to mimic - # what Python does internally) also don't add - # names that start with _ + # don't add names that start with _ for n in d: - if n[0] != '_' and not n in dict: + if n[0] != '_': dict[n] = d[n] except StopIteration: pass @@ -306,3 +307,32 @@ def _getname(g): break parts.append(token) return (".".join(parts), token) + +def _main(): + # Main program for testing. + import os + mod = sys.argv[1] + if os.path.exists(mod): + path = [os.path.dirname(mod)] + mod = os.path.basename(mod) + if mod.lower().endswith(".py"): + mod = mod[:-3] + else: + path = [] + dict = readmodule_ex(mod, path) + objs = dict.values() + objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0), + getattr(b, 'lineno', 0))) + for obj in objs: + if isinstance(obj, Class): + print "class", obj.name, obj.super, obj.lineno + methods = obj.methods.items() + methods.sort(lambda a, b: cmp(a[1], b[1])) + for name, lineno in methods: + if name != "__path__": + print " def", name, lineno + elif isinstance(obj, Function): + print "def", obj.name, obj.lineno + +if __name__ == "__main__": + _main() -- cgit v0.12