diff options
author | csabella <cheryl.sabella@gmail.com> | 2017-07-04 01:31:25 (GMT) |
---|---|---|
committer | terryjreedy <tjreedy@udel.edu> | 2017-07-04 01:31:25 (GMT) |
commit | 246ff3bd00f97658e567a7087645a6b76e056491 (patch) | |
tree | 08110a51b6b57a925494b76c25547251cf6f5dac /Lib/pyclbr.py | |
parent | 6969eaf4682beb01bc95eeb14f5ce6c01312e297 (diff) | |
download | cpython-246ff3bd00f97658e567a7087645a6b76e056491.zip cpython-246ff3bd00f97658e567a7087645a6b76e056491.tar.gz cpython-246ff3bd00f97658e567a7087645a6b76e056491.tar.bz2 |
bpo-6691: Pyclbr now reports nested classes and functions. (#2503)
Original patch by Guilherme Polo. Revisions by Cheryl Sabella.
Diffstat (limited to 'Lib/pyclbr.py')
-rw-r--r-- | Lib/pyclbr.py | 311 |
1 files changed, 180 insertions, 131 deletions
diff --git a/Lib/pyclbr.py b/Lib/pyclbr.py index d7dba97..2c798df 100644 --- a/Lib/pyclbr.py +++ b/Lib/pyclbr.py @@ -1,42 +1,41 @@ -"""Parse a Python module and describe its classes and methods. +"""Parse a Python module and describe its classes and functions. Parse enough of a Python file to recognize imports and class and -method definitions, and to find out the superclasses of a class. +function definitions, and to find out the superclasses of a class. The interface consists of a single function: - readmodule_ex(module [, path]) + readmodule_ex(module, path=None) where module is the name of a Python module, and path is an optional list of directories where the module is to be searched. If present, -path is prepended to the system search path sys.path. The return -value is a dictionary. The keys of the dictionary are the names of -the classes defined in the module (including classes that are defined -via the from XXX import YYY construct). The values are class -instances of the class Class defined here. One special key/value pair -is present for packages: the key '__path__' has a list as its value -which contains the package search path. - -A class is described by the class Class in this module. Instances -of this class have the following instance variables: - module -- the module name - name -- the name of the class - super -- a list of super classes (Class instances) - methods -- a dictionary of methods - file -- the file in which the class was defined - lineno -- the line in the file on which the class statement occurred -The dictionary of methods uses the method names as keys and the line -numbers on which the method was defined as values. +path is prepended to the system search path sys.path. The return value +is a dictionary. The keys of the dictionary are the names of the +classes and functions defined in the module (including classes that are +defined via the from XXX import YYY construct). The values are +instances of classes Class and Function. One special key/value pair is +present for packages: the key '__path__' has a list as its value which +contains the package search path. + +Classes and Functions have a common superclass: _Object. Every instance +has the following attributes: + module -- name of the module; + name -- name of the object; + file -- file in which the object is defined; + lineno -- line in the file where the object's definition starts; + parent -- parent of this object, if any; + children -- nested objects contained in this object. +The 'children' attribute is a dictionary mapping names to objects. + +Instances of Function describe functions with the attributes from _Object. + +Instances of Class describe classes with the attributes from _Object, +plus the following: + super -- list of super classes (Class instances if possible); + methods -- mapping of method names to beginning line numbers. If the name of a super class is not recognized, the corresponding entry in the list of super classes is not a class instance but a string giving the name of the super class. Since import statements are recognized and imported modules are scanned as well, this shouldn't happen often. - -A function is described by the class Function in this module. -Instances of this class have the following instance variables: - module -- the module name - name -- the name of the class - file -- the file in which the class was defined - lineno -- the line in the file on which the class statement occurred """ import io @@ -47,37 +46,59 @@ from token import NAME, DEDENT, OP __all__ = ["readmodule", "readmodule_ex", "Class", "Function"] -_modules = {} # cache of modules we've seen +_modules = {} # Initialize cache of modules we've seen. -# each Python class is represented by an instance of this class -class Class: - '''Class to represent a Python class.''' - def __init__(self, module, name, super, file, lineno): + +class _Object: + "Informaton about Python class or function." + def __init__(self, module, name, file, lineno, parent): self.module = module self.name = name - if super is None: - super = [] - self.super = super - self.methods = {} self.file = file self.lineno = lineno + self.parent = parent + self.children = {} + + def _addchild(self, name, obj): + self.children[name] = obj + + +class Function(_Object): + "Information about a Python function, including methods." + def __init__(self, module, name, file, lineno, parent=None): + _Object.__init__(self, module, name, file, lineno, parent) + + +class Class(_Object): + "Information about a Python class." + def __init__(self, module, name, super, file, lineno, parent=None): + _Object.__init__(self, module, name, file, lineno, parent) + self.super = [] if super is None else super + self.methods = {} def _addmethod(self, name, lineno): self.methods[name] = lineno -class Function: - '''Class to represent a top-level Python function''' - def __init__(self, module, name, file, lineno): - self.module = module - self.name = name - self.file = file - self.lineno = lineno + +def _nest_function(ob, func_name, lineno): + "Return a Function after nesting within ob." + newfunc = Function(ob.module, func_name, ob.file, lineno, ob) + ob._addchild(func_name, newfunc) + if isinstance(ob, Class): + ob._addmethod(func_name, lineno) + return newfunc + +def _nest_class(ob, class_name, lineno, super=None): + "Return a Class after nesting within ob." + newclass = Class(ob.module, class_name, super, ob.file, lineno, ob) + ob._addchild(class_name, newclass) + return newclass def readmodule(module, path=None): - '''Backwards compatible interface. + """Return Class objects for the top-level classes in module. - Call readmodule_ex() and then only keep Class objects from the - resulting dictionary.''' + This is the original interface, before Functions were added. + """ res = {} for key, value in _readmodule(module, path or []).items(): @@ -86,41 +107,41 @@ def readmodule(module, path=None): return res def readmodule_ex(module, path=None): - '''Read a module file and return a dictionary of classes. + """Return a dictionary with all functions and classes in module. - Search for MODULE in PATH and sys.path, read and parse the - module and return a dictionary with one entry for each class - found in the module. - ''' + Search for module in PATH + sys.path. + If possible, include imported superclasses. + Do this by reading source, without importing (and executing) it. + """ return _readmodule(module, path or []) def _readmodule(module, path, inpackage=None): - '''Do the hard work for readmodule[_ex]. + """Do the hard work for readmodule[_ex]. - If INPACKAGE is given, it must be the dotted name of the package in + If inpackage is given, it must be the dotted name of the package in which we are searching for a submodule, and then PATH must be the package search path; otherwise, we are searching for a top-level - module, and PATH is combined with sys.path. - ''' - # Compute the full module name (prepending inpackage if set) + module, and path is combined with sys.path. + """ + # Compute the full module name (prepending inpackage if set). if inpackage is not None: fullmodule = "%s.%s" % (inpackage, module) else: fullmodule = module - # Check in the cache + # Check in the cache. if fullmodule in _modules: return _modules[fullmodule] - # Initialize the dict for this module's contents - dict = {} + # Initialize the dict for this module's contents. + tree = {} - # Check if it is a built-in module; we don't do much for these + # Check if it is a built-in module; we don't do much for these. if module in sys.builtin_module_names and inpackage is None: - _modules[module] = dict - return dict + _modules[module] = tree + return tree - # Check for a dotted module name + # Check for a dotted module name. i = module.rfind('.') if i >= 0: package = module[:i] @@ -132,88 +153,97 @@ def _readmodule(module, path, inpackage=None): raise ImportError('No package named {}'.format(package)) return _readmodule(submodule, parent['__path__'], package) - # Search the path for the module + # Search the path for the module. f = None if inpackage is not None: search_path = path else: search_path = path + sys.path - # XXX This will change once issue19944 lands. spec = importlib.util._find_spec_from_path(fullmodule, search_path) - _modules[fullmodule] = dict - # is module a package? + _modules[fullmodule] = tree + # Is module a package? if spec.submodule_search_locations is not None: - dict['__path__'] = spec.submodule_search_locations + tree['__path__'] = spec.submodule_search_locations try: source = spec.loader.get_source(fullmodule) if source is None: - return dict + return tree except (AttributeError, ImportError): - # not Python source, can't do anything with this module - return dict + # If module is not Python source, we cannot do anything. + return tree fname = spec.loader.get_filename(fullmodule) + return _create_tree(fullmodule, path, fname, source, tree, inpackage) + +def _create_tree(fullmodule, path, fname, source, tree, inpackage): + """Return the tree for a particular module. + + fullmodule (full module name), inpackage+module, becomes o.module. + path is passed to recursive calls of _readmodule. + fname becomes o.file. + source is tokenized. Imports cause recursive calls to _readmodule. + tree is {} or {'__path__': <submodule search locations>}. + inpackage, None or string, is passed to recursive calls of _readmodule. + + The effect of recursive calls is mutation of global _modules. + """ f = io.StringIO(source) - stack = [] # stack of (class, indent) pairs + stack = [] # Initialize stack of (class, indent) pairs. g = tokenize.generate_tokens(f.readline) try: for tokentype, token, start, _end, _line in g: if tokentype == DEDENT: lineno, thisindent = start - # close nested classes and defs + # Close previous nested classes and defs. while stack and stack[-1][1] >= thisindent: del stack[-1] elif token == 'def': lineno, thisindent = start - # close previous nested classes and defs + # Close previous nested classes and defs. while stack and stack[-1][1] >= thisindent: del stack[-1] - tokentype, meth_name, start = next(g)[0:3] + tokentype, func_name, start = next(g)[0:3] if tokentype != NAME: - continue # Syntax error + continue # Skip def with syntax error. + cur_func = None if stack: - cur_class = stack[-1][0] - if isinstance(cur_class, Class): - # it's a method - cur_class._addmethod(meth_name, lineno) - # else it's a nested def + cur_obj = stack[-1][0] + cur_func = _nest_function(cur_obj, func_name, lineno) else: - # it's a function - dict[meth_name] = Function(fullmodule, meth_name, - fname, lineno) - stack.append((None, thisindent)) # Marker for nested fns + # It is just a function. + cur_func = Function(fullmodule, func_name, fname, lineno) + tree[func_name] = cur_func + stack.append((cur_func, thisindent)) elif token == 'class': lineno, thisindent = start - # close previous nested classes and defs + # Close previous nested classes and defs. while stack and stack[-1][1] >= thisindent: del stack[-1] tokentype, class_name, start = next(g)[0:3] if tokentype != NAME: - continue # Syntax error - # parse what follows the class name + continue # Skip class with syntax error. + # Parse what follows the class name. tokentype, token, start = next(g)[0:3] inherit = None if token == '(': - names = [] # List of superclasses - # there's a list of superclasses + names = [] # Initialize list of superclasses. level = 1 - super = [] # Tokens making up current superclass + super = [] # Tokens making up current superclass. while True: tokentype, token, start = next(g)[0:3] if token in (')', ',') and level == 1: n = "".join(super) - if n in dict: - # we know this super class - n = dict[n] + if n in tree: + # We know this super class. + n = tree[n] else: c = n.split('.') if len(c) > 1: - # super class is of the form - # module.class: look in module for - # class + # Super class form is module.class: + # look in module for class. m = c[-2] c = c[-1] if m in _modules: @@ -230,21 +260,25 @@ def _readmodule(module, path, inpackage=None): break elif token == ',' and level == 1: pass - # only use NAME and OP (== dot) tokens for type name + # Only use NAME and OP (== dot) tokens for type name. elif tokentype in (NAME, OP) and level == 1: super.append(token) - # expressions in the base list are not supported + # Expressions in the base list are not supported. inherit = names - cur_class = Class(fullmodule, class_name, inherit, - fname, lineno) - if not stack: - dict[class_name] = cur_class + if stack: + cur_obj = stack[-1][0] + cur_class = _nest_class( + cur_obj, class_name, lineno, inherit) + else: + cur_class = Class(fullmodule, class_name, inherit, + fname, lineno) + tree[class_name] = cur_class stack.append((cur_class, thisindent)) elif token == 'import' and start[1] == 0: modules = _getnamelist(g) for mod, _mod2 in modules: try: - # Recursively read the imported module + # Recursively read the imported module. if inpackage is None: _readmodule(mod, path) else: @@ -262,32 +296,34 @@ def _readmodule(module, path, inpackage=None): continue names = _getnamelist(g) try: - # Recursively read the imported module + # Recursively read the imported module. d = _readmodule(mod, path, inpackage) except: # If we can't find or parse the imported module, # too bad -- don't die here. continue - # add any classes that were defined in the imported module - # to our name space if they were mentioned in the list + # Add any classes that were defined in the imported module + # to our name space if they were mentioned in the list. for n, n2 in names: if n in d: - dict[n2 or n] = d[n] + tree[n2 or n] = d[n] elif n == '*': - # don't add names that start with _ + # Don't add names that start with _. for n in d: if n[0] != '_': - dict[n] = d[n] + tree[n] = d[n] except StopIteration: pass f.close() - return dict + return tree + def _getnamelist(g): - # Helper to get a comma-separated list of dotted names plus 'as' - # clauses. Return a list of pairs (name, name2) where name2 is - # the 'as' name, or None if there is no 'as' clause. + """Return list of (dotted-name, as-name or None) tuples for token source g. + + An as-name is the name that follows 'as' in an as clause. + """ names = [] while True: name, token = _getname(g) @@ -304,10 +340,9 @@ def _getnamelist(g): break return names + def _getname(g): - # Helper to get a dotted name, return a pair (name, token) where - # name is the dotted name, or None if there was no dotted name, - # and token is the next input token. + "Return (dotted-name or None, next-token) tuple for token source g." parts = [] tokentype, token = next(g)[0:2] if tokentype != NAME and token != '*': @@ -323,11 +358,14 @@ def _getname(g): parts.append(token) return (".".join(parts), token) + def _main(): - # Main program for testing. + "Print module output (default this file) for quick visual check." import os - from operator import itemgetter - mod = sys.argv[1] + try: + mod = sys.argv[1] + except: + mod = __file__ if os.path.exists(mod): path = [os.path.dirname(mod)] mod = os.path.basename(mod) @@ -335,18 +373,29 @@ def _main(): mod = mod[:-3] else: path = [] - dict = readmodule_ex(mod, path) - objs = list(dict.values()) - objs.sort(key=lambda a: getattr(a, 'lineno', 0)) - for obj in objs: + tree = readmodule_ex(mod, path) + lineno_key = lambda a: getattr(a, 'lineno', 0) + objs = sorted(tree.values(), key=lineno_key, reverse=True) + indent_level = 2 + while objs: + obj = objs.pop() + if isinstance(obj, list): + # Value is a __path__ key. + continue + if not hasattr(obj, 'indent'): + obj.indent = 0 + + if isinstance(obj, _Object): + new_objs = sorted(obj.children.values(), + key=lineno_key, reverse=True) + for ob in new_objs: + ob.indent = obj.indent + indent_level + objs.extend(new_objs) if isinstance(obj, Class): - print("class", obj.name, obj.super, obj.lineno) - methods = sorted(obj.methods.items(), key=itemgetter(1)) - for name, lineno in methods: - if name != "__path__": - print(" def", name, lineno) + print("{}class {} {} {}" + .format(' ' * obj.indent, obj.name, obj.super, obj.lineno)) elif isinstance(obj, Function): - print("def", obj.name, obj.lineno) + print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno)) if __name__ == "__main__": _main() |