From 246ff3bd00f97658e567a7087645a6b76e056491 Mon Sep 17 00:00:00 2001 From: csabella Date: Mon, 3 Jul 2017 21:31:25 -0400 Subject: bpo-6691: Pyclbr now reports nested classes and functions. (#2503) Original patch by Guilherme Polo. Revisions by Cheryl Sabella. --- Doc/library/pyclbr.rst | 147 ++++++++++++++--------- Lib/pyclbr.py | 311 ++++++++++++++++++++++++++++-------------------- Lib/test/test_pyclbr.py | 66 ++++++++++ 3 files changed, 338 insertions(+), 186 deletions(-) diff --git a/Doc/library/pyclbr.rst b/Doc/library/pyclbr.rst index 3284271..ea34dd0 100644 --- a/Doc/library/pyclbr.rst +++ b/Doc/library/pyclbr.rst @@ -10,107 +10,144 @@ -------------- -The :mod:`pyclbr` module can be used to determine some limited information -about the classes, methods and top-level functions defined in a module. The -information provided is sufficient to implement a traditional three-pane -class browser. The information is extracted from the source code rather -than by importing the module, so this module is safe to use with untrusted -code. This restriction makes it impossible to use this module with modules -not implemented in Python, including all standard and optional extension +The :mod:`pyclbr` module provides limited information about the +functions, classes, and methods defined in a python-coded module. The +information is sufficient to implement a module browser. The +information is extracted from the python source code rather than by +importing the module, so this module is safe to use with untrusted code. +This restriction makes it impossible to use this module with modules not +implemented in Python, including all standard and optional extension modules. .. function:: readmodule(module, path=None) - Read a module and return a dictionary mapping class names to class - descriptor objects. The parameter *module* should be the name of a - module as a string; it may be the name of a module within a package. The - *path* parameter should be a sequence, and is used to augment the value - of ``sys.path``, which is used to locate module source code. + Return a dictionary mapping module-level class names to class + descriptors. If possible, descriptors for imported base classes are + included. Parameter *module* is a string with the name of the module + to read; it may be the name of a module within a package. If given, + *path* is a sequence of directory paths prepended to ``sys.path``, + which is used to locate the module source code. .. function:: readmodule_ex(module, path=None) - Like :func:`readmodule`, but the returned dictionary, in addition to - mapping class names to class descriptor objects, also maps top-level - function names to function descriptor objects. Moreover, if the module - being read is a package, the key ``'__path__'`` in the returned - dictionary has as its value a list which contains the package search - path. + Return a dictionary-based tree containing a function or class + descriptors for each function and class defined in the module with a + ``def`` or ``class`` statement. The returned dictionary maps + module-level function and class names to their descriptors. Nested + objects are entered into the children dictionary of their parent. As + with readmodule, *module* names the module to be read and *path* is + prepended to sys.path. If the module being read is a package, the + returned dictionary has a key ``'__path__'`` whose value is a list + containing the package search path. +.. versionadded:: 3.7 + Descriptors for nested definitions. They are accessed through the + new children attibute. Each has a new parent attribute. -.. _pyclbr-class-objects: +The descriptors returned by these functions are instances of +Function and Class classes. Users are not expected to create instances +of these classes. -Class Objects -------------- -The :class:`Class` objects used as values in the dictionary returned by -:func:`readmodule` and :func:`readmodule_ex` provide the following data -attributes: +.. _pyclbr-function-objects: +Function Objects +---------------- +Class :class:`Function` instances describe functions defined by def +statements. They have the following attributes: -.. attribute:: Class.module - The name of the module defining the class described by the class descriptor. +.. attribute:: Function.file + Name of the file in which the function is defined. -.. attribute:: Class.name - The name of the class. +.. attribute:: Function.module + The name of the module defining the function described. -.. attribute:: Class.super - A list of :class:`Class` objects which describe the immediate base - classes of the class being described. Classes which are named as - superclasses but which are not discoverable by :func:`readmodule` are - listed as a string with the class name instead of as :class:`Class` - objects. +.. attribute:: Function.name + + The name of the function. -.. attribute:: Class.methods +.. attribute:: Function.lineno + + The line number in the file where the definition starts. + + +.. attribute:: Function.parent + + For top-level functions, None. For nested functions, the parent. + + .. versionadded:: 3.7 + + +.. attribute:: Function.children + + A dictionary mapping names to descriptors for nested functions and + classes. - A dictionary mapping method names to line numbers. + .. versionadded:: 3.7 + + +.. _pyclbr-class-objects: + +Class Objects +------------- +Class :class:`Class` instances describe classes defined by class +statements. They have the same attributes as Functions and two more. .. attribute:: Class.file - Name of the file containing the ``class`` statement defining the class. + Name of the file in which the class is defined. -.. attribute:: Class.lineno +.. attribute:: Class.module - The line number of the ``class`` statement within the file named by - :attr:`~Class.file`. + The name of the module defining the class described. -.. _pyclbr-function-objects: +.. attribute:: Class.name -Function Objects ----------------- + The name of the class. -The :class:`Function` objects used as values in the dictionary returned by -:func:`readmodule_ex` provide the following attributes: +.. attribute:: Class.lineno -.. attribute:: Function.module + The line number in the file where the definition starts. - The name of the module defining the function described by the function - descriptor. +.. attribute:: Class.parent -.. attribute:: Function.name + For top-level classes, None. For nested classes, the parent. - The name of the function. + .. versionadded:: 3.7 -.. attribute:: Function.file +.. attribute:: Class.children - Name of the file containing the ``def`` statement defining the function. + A dictionary mapping names to descriptors for nested functions and + classes. + .. versionadded:: 3.7 -.. attribute:: Function.lineno - The line number of the ``def`` statement within the file named by - :attr:`~Function.file`. +.. attribute:: Class.super + + A list of :class:`Class` objects which describe the immediate base + classes of the class being described. Classes which are named as + superclasses but which are not discoverable by :func:`readmodule_ex` + are listed as a string with the class name instead of as + :class:`Class` objects. + + +.. attribute:: Class.methods + A dictionary mapping method names to line numbers. This can be + derived from the newer children dictionary, but remains for + back-compatibility. diff --git a/Lib/pyclbr.py b/Lib/pyclbr.py index d7dba97..2c798df 100644 --- a/Lib/pyclbr.py +++ b/Lib/pyclbr.py @@ -1,42 +1,41 @@ -"""Parse a Python module and describe its classes and methods. +"""Parse a Python module and describe its classes and functions. Parse enough of a Python file to recognize imports and class and -method definitions, and to find out the superclasses of a class. +function definitions, and to find out the superclasses of a class. The interface consists of a single function: - readmodule_ex(module [, path]) + readmodule_ex(module, path=None) where module is the name of a Python module, and path is an optional list of directories where the module is to be searched. If present, -path is prepended to the system search path sys.path. The return -value is a dictionary. The keys of the dictionary are the names of -the classes defined in the module (including classes that are defined -via the from XXX import YYY construct). The values are class -instances of the class Class defined here. One special key/value pair -is present for packages: the key '__path__' has a list as its value -which contains the package search path. - -A class is described by the class Class in this module. Instances -of this class have the following instance variables: - module -- the module name - name -- the name of the class - super -- a list of super classes (Class instances) - methods -- a dictionary of methods - file -- the file in which the class was defined - lineno -- the line in the file on which the class statement occurred -The dictionary of methods uses the method names as keys and the line -numbers on which the method was defined as values. +path is prepended to the system search path sys.path. The return value +is a dictionary. The keys of the dictionary are the names of the +classes and functions defined in the module (including classes that are +defined via the from XXX import YYY construct). The values are +instances of classes Class and Function. One special key/value pair is +present for packages: the key '__path__' has a list as its value which +contains the package search path. + +Classes and Functions have a common superclass: _Object. Every instance +has the following attributes: + module -- name of the module; + name -- name of the object; + file -- file in which the object is defined; + lineno -- line in the file where the object's definition starts; + parent -- parent of this object, if any; + children -- nested objects contained in this object. +The 'children' attribute is a dictionary mapping names to objects. + +Instances of Function describe functions with the attributes from _Object. + +Instances of Class describe classes with the attributes from _Object, +plus the following: + super -- list of super classes (Class instances if possible); + methods -- mapping of method names to beginning line numbers. If the name of a super class is not recognized, the corresponding entry in the list of super classes is not a class instance but a string giving the name of the super class. Since import statements are recognized and imported modules are scanned as well, this shouldn't happen often. - -A function is described by the class Function in this module. -Instances of this class have the following instance variables: - module -- the module name - name -- the name of the class - file -- the file in which the class was defined - lineno -- the line in the file on which the class statement occurred """ import io @@ -47,37 +46,59 @@ from token import NAME, DEDENT, OP __all__ = ["readmodule", "readmodule_ex", "Class", "Function"] -_modules = {} # cache of modules we've seen +_modules = {} # Initialize cache of modules we've seen. -# each Python class is represented by an instance of this class -class Class: - '''Class to represent a Python class.''' - def __init__(self, module, name, super, file, lineno): + +class _Object: + "Informaton about Python class or function." + def __init__(self, module, name, file, lineno, parent): self.module = module self.name = name - if super is None: - super = [] - self.super = super - self.methods = {} self.file = file self.lineno = lineno + self.parent = parent + self.children = {} + + def _addchild(self, name, obj): + self.children[name] = obj + + +class Function(_Object): + "Information about a Python function, including methods." + def __init__(self, module, name, file, lineno, parent=None): + _Object.__init__(self, module, name, file, lineno, parent) + + +class Class(_Object): + "Information about a Python class." + def __init__(self, module, name, super, file, lineno, parent=None): + _Object.__init__(self, module, name, file, lineno, parent) + self.super = [] if super is None else super + self.methods = {} def _addmethod(self, name, lineno): self.methods[name] = lineno -class Function: - '''Class to represent a top-level Python function''' - def __init__(self, module, name, file, lineno): - self.module = module - self.name = name - self.file = file - self.lineno = lineno + +def _nest_function(ob, func_name, lineno): + "Return a Function after nesting within ob." + newfunc = Function(ob.module, func_name, ob.file, lineno, ob) + ob._addchild(func_name, newfunc) + if isinstance(ob, Class): + ob._addmethod(func_name, lineno) + return newfunc + +def _nest_class(ob, class_name, lineno, super=None): + "Return a Class after nesting within ob." + newclass = Class(ob.module, class_name, super, ob.file, lineno, ob) + ob._addchild(class_name, newclass) + return newclass def readmodule(module, path=None): - '''Backwards compatible interface. + """Return Class objects for the top-level classes in module. - Call readmodule_ex() and then only keep Class objects from the - resulting dictionary.''' + This is the original interface, before Functions were added. + """ res = {} for key, value in _readmodule(module, path or []).items(): @@ -86,41 +107,41 @@ def readmodule(module, path=None): return res def readmodule_ex(module, path=None): - '''Read a module file and return a dictionary of classes. + """Return a dictionary with all functions and classes in module. - Search for MODULE in PATH and sys.path, read and parse the - module and return a dictionary with one entry for each class - found in the module. - ''' + Search for module in PATH + sys.path. + If possible, include imported superclasses. + Do this by reading source, without importing (and executing) it. + """ return _readmodule(module, path or []) def _readmodule(module, path, inpackage=None): - '''Do the hard work for readmodule[_ex]. + """Do the hard work for readmodule[_ex]. - If INPACKAGE is given, it must be the dotted name of the package in + If inpackage is given, it must be the dotted name of the package in which we are searching for a submodule, and then PATH must be the package search path; otherwise, we are searching for a top-level - module, and PATH is combined with sys.path. - ''' - # Compute the full module name (prepending inpackage if set) + module, and path is combined with sys.path. + """ + # Compute the full module name (prepending inpackage if set). if inpackage is not None: fullmodule = "%s.%s" % (inpackage, module) else: fullmodule = module - # Check in the cache + # Check in the cache. if fullmodule in _modules: return _modules[fullmodule] - # Initialize the dict for this module's contents - dict = {} + # Initialize the dict for this module's contents. + tree = {} - # Check if it is a built-in module; we don't do much for these + # Check if it is a built-in module; we don't do much for these. if module in sys.builtin_module_names and inpackage is None: - _modules[module] = dict - return dict + _modules[module] = tree + return tree - # Check for a dotted module name + # Check for a dotted module name. i = module.rfind('.') if i >= 0: package = module[:i] @@ -132,88 +153,97 @@ def _readmodule(module, path, inpackage=None): raise ImportError('No package named {}'.format(package)) return _readmodule(submodule, parent['__path__'], package) - # Search the path for the module + # Search the path for the module. f = None if inpackage is not None: search_path = path else: search_path = path + sys.path - # XXX This will change once issue19944 lands. spec = importlib.util._find_spec_from_path(fullmodule, search_path) - _modules[fullmodule] = dict - # is module a package? + _modules[fullmodule] = tree + # Is module a package? if spec.submodule_search_locations is not None: - dict['__path__'] = spec.submodule_search_locations + tree['__path__'] = spec.submodule_search_locations try: source = spec.loader.get_source(fullmodule) if source is None: - return dict + return tree except (AttributeError, ImportError): - # not Python source, can't do anything with this module - return dict + # If module is not Python source, we cannot do anything. + return tree fname = spec.loader.get_filename(fullmodule) + return _create_tree(fullmodule, path, fname, source, tree, inpackage) + +def _create_tree(fullmodule, path, fname, source, tree, inpackage): + """Return the tree for a particular module. + + fullmodule (full module name), inpackage+module, becomes o.module. + path is passed to recursive calls of _readmodule. + fname becomes o.file. + source is tokenized. Imports cause recursive calls to _readmodule. + tree is {} or {'__path__': }. + inpackage, None or string, is passed to recursive calls of _readmodule. + + The effect of recursive calls is mutation of global _modules. + """ f = io.StringIO(source) - stack = [] # stack of (class, indent) pairs + stack = [] # Initialize stack of (class, indent) pairs. g = tokenize.generate_tokens(f.readline) try: for tokentype, token, start, _end, _line in g: if tokentype == DEDENT: lineno, thisindent = start - # close nested classes and defs + # Close previous nested classes and defs. while stack and stack[-1][1] >= thisindent: del stack[-1] elif token == 'def': lineno, thisindent = start - # close previous nested classes and defs + # Close previous nested classes and defs. while stack and stack[-1][1] >= thisindent: del stack[-1] - tokentype, meth_name, start = next(g)[0:3] + tokentype, func_name, start = next(g)[0:3] if tokentype != NAME: - continue # Syntax error + continue # Skip def with syntax error. + cur_func = None if stack: - cur_class = stack[-1][0] - if isinstance(cur_class, Class): - # it's a method - cur_class._addmethod(meth_name, lineno) - # else it's a nested def + cur_obj = stack[-1][0] + cur_func = _nest_function(cur_obj, func_name, lineno) else: - # it's a function - dict[meth_name] = Function(fullmodule, meth_name, - fname, lineno) - stack.append((None, thisindent)) # Marker for nested fns + # It is just a function. + cur_func = Function(fullmodule, func_name, fname, lineno) + tree[func_name] = cur_func + stack.append((cur_func, thisindent)) elif token == 'class': lineno, thisindent = start - # close previous nested classes and defs + # Close previous nested classes and defs. while stack and stack[-1][1] >= thisindent: del stack[-1] tokentype, class_name, start = next(g)[0:3] if tokentype != NAME: - continue # Syntax error - # parse what follows the class name + continue # Skip class with syntax error. + # Parse what follows the class name. tokentype, token, start = next(g)[0:3] inherit = None if token == '(': - names = [] # List of superclasses - # there's a list of superclasses + names = [] # Initialize list of superclasses. level = 1 - super = [] # Tokens making up current superclass + super = [] # Tokens making up current superclass. while True: tokentype, token, start = next(g)[0:3] if token in (')', ',') and level == 1: n = "".join(super) - if n in dict: - # we know this super class - n = dict[n] + if n in tree: + # We know this super class. + n = tree[n] else: c = n.split('.') if len(c) > 1: - # super class is of the form - # module.class: look in module for - # class + # Super class form is module.class: + # look in module for class. m = c[-2] c = c[-1] if m in _modules: @@ -230,21 +260,25 @@ def _readmodule(module, path, inpackage=None): break elif token == ',' and level == 1: pass - # only use NAME and OP (== dot) tokens for type name + # Only use NAME and OP (== dot) tokens for type name. elif tokentype in (NAME, OP) and level == 1: super.append(token) - # expressions in the base list are not supported + # Expressions in the base list are not supported. inherit = names - cur_class = Class(fullmodule, class_name, inherit, - fname, lineno) - if not stack: - dict[class_name] = cur_class + if stack: + cur_obj = stack[-1][0] + cur_class = _nest_class( + cur_obj, class_name, lineno, inherit) + else: + cur_class = Class(fullmodule, class_name, inherit, + fname, lineno) + tree[class_name] = cur_class stack.append((cur_class, thisindent)) elif token == 'import' and start[1] == 0: modules = _getnamelist(g) for mod, _mod2 in modules: try: - # Recursively read the imported module + # Recursively read the imported module. if inpackage is None: _readmodule(mod, path) else: @@ -262,32 +296,34 @@ def _readmodule(module, path, inpackage=None): continue names = _getnamelist(g) try: - # Recursively read the imported module + # Recursively read the imported module. d = _readmodule(mod, path, inpackage) except: # If we can't find or parse the imported module, # too bad -- don't die here. continue - # add any classes that were defined in the imported module - # to our name space if they were mentioned in the list + # Add any classes that were defined in the imported module + # to our name space if they were mentioned in the list. for n, n2 in names: if n in d: - dict[n2 or n] = d[n] + tree[n2 or n] = d[n] elif n == '*': - # don't add names that start with _ + # Don't add names that start with _. for n in d: if n[0] != '_': - dict[n] = d[n] + tree[n] = d[n] except StopIteration: pass f.close() - return dict + return tree + def _getnamelist(g): - # Helper to get a comma-separated list of dotted names plus 'as' - # clauses. Return a list of pairs (name, name2) where name2 is - # the 'as' name, or None if there is no 'as' clause. + """Return list of (dotted-name, as-name or None) tuples for token source g. + + An as-name is the name that follows 'as' in an as clause. + """ names = [] while True: name, token = _getname(g) @@ -304,10 +340,9 @@ def _getnamelist(g): break return names + def _getname(g): - # Helper to get a dotted name, return a pair (name, token) where - # name is the dotted name, or None if there was no dotted name, - # and token is the next input token. + "Return (dotted-name or None, next-token) tuple for token source g." parts = [] tokentype, token = next(g)[0:2] if tokentype != NAME and token != '*': @@ -323,11 +358,14 @@ def _getname(g): parts.append(token) return (".".join(parts), token) + def _main(): - # Main program for testing. + "Print module output (default this file) for quick visual check." import os - from operator import itemgetter - mod = sys.argv[1] + try: + mod = sys.argv[1] + except: + mod = __file__ if os.path.exists(mod): path = [os.path.dirname(mod)] mod = os.path.basename(mod) @@ -335,18 +373,29 @@ def _main(): mod = mod[:-3] else: path = [] - dict = readmodule_ex(mod, path) - objs = list(dict.values()) - objs.sort(key=lambda a: getattr(a, 'lineno', 0)) - for obj in objs: + tree = readmodule_ex(mod, path) + lineno_key = lambda a: getattr(a, 'lineno', 0) + objs = sorted(tree.values(), key=lineno_key, reverse=True) + indent_level = 2 + while objs: + obj = objs.pop() + if isinstance(obj, list): + # Value is a __path__ key. + continue + if not hasattr(obj, 'indent'): + obj.indent = 0 + + if isinstance(obj, _Object): + new_objs = sorted(obj.children.values(), + key=lineno_key, reverse=True) + for ob in new_objs: + ob.indent = obj.indent + indent_level + objs.extend(new_objs) if isinstance(obj, Class): - print("class", obj.name, obj.super, obj.lineno) - methods = sorted(obj.methods.items(), key=itemgetter(1)) - for name, lineno in methods: - if name != "__path__": - print(" def", name, lineno) + print("{}class {} {} {}" + .format(' ' * obj.indent, obj.name, obj.super, obj.lineno)) elif isinstance(obj, Function): - print("def", obj.name, obj.lineno) + print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno)) if __name__ == "__main__": _main() diff --git a/Lib/test/test_pyclbr.py b/Lib/test/test_pyclbr.py index 9c216d3..238eb71 100644 --- a/Lib/test/test_pyclbr.py +++ b/Lib/test/test_pyclbr.py @@ -2,10 +2,15 @@ Test cases for pyclbr.py Nick Mathewson ''' + +import os import sys +from textwrap import dedent from types import FunctionType, MethodType, BuiltinFunctionType import pyclbr from unittest import TestCase, main as unittest_main +from test import support +from functools import partial StaticMethodType = type(staticmethod(lambda: None)) ClassMethodType = type(classmethod(lambda c: None)) @@ -150,6 +155,67 @@ class PyclbrTest(TestCase): # self.checkModule('test.pyclbr_input', ignore=['om']) + def test_nested(self): + mb = pyclbr + # Set arguments for descriptor creation and _creat_tree call. + m, p, f, t, i = 'test', '', 'test.py', {}, None + source = dedent("""\ + def f0: + def f1(a,b,c): + def f2(a=1, b=2, c=3): pass + return f1(a,b,d) + class c1: pass + class C0: + "Test class." + def F1(): + "Method." + return 'return' + class C1(): + class C2: + "Class nested within nested class." + def F3(): return 1+1 + + """) + actual = mb._create_tree(m, p, f, source, t, i) + + # Create descriptors, linked together, and expected dict. + f0 = mb.Function(m, 'f0', f, 1) + f1 = mb._nest_function(f0, 'f1', 2) + f2 = mb._nest_function(f1, 'f2', 3) + c1 = mb._nest_class(f0, 'c1', 5) + C0 = mb.Class(m, 'C0', None, f, 6) + F1 = mb._nest_function(C0, 'F1', 8) + C1 = mb._nest_class(C0, 'C1', 11) + C2 = mb._nest_class(C1, 'C2', 12) + F3 = mb._nest_function(C2, 'F3', 14) + expected = {'f0':f0, 'C0':C0} + + def compare(parent1, children1, parent2, children2): + """Return equality of tree pairs. + + Each parent,children pair define a tree. The parents are + assumed equal. Comparing the children dictionaries as such + does not work due to comparison by identity and double + linkage. We separate comparing string and number attributes + from comparing the children of input children. + """ + self.assertEqual(children1.keys(), children2.keys()) + for ob in children1.values(): + self.assertIs(ob.parent, parent1) + for ob in children2.values(): + self.assertIs(ob.parent, parent2) + for key in children1.keys(): + o1, o2 = children1[key], children2[key] + t1 = type(o1), o1.name, o1.file, o1.module, o1.lineno + t2 = type(o2), o2.name, o2.file, o2.module, o2.lineno + self.assertEqual(t1, t2) + if type(o1) is mb.Class: + self.assertEqual(o1.methods, o2.methods) + # Skip superclasses for now as not part of example + compare(o1, o1.children, o2, o2.children) + + compare(None, actual, None, expected) + def test_others(self): cm = self.checkModule -- cgit v0.12