summaryrefslogtreecommitdiffstats
path: root/Lib/pyclbr.py
diff options
context:
space:
mode:
authorcsabella <cheryl.sabella@gmail.com>2017-07-04 01:31:25 (GMT)
committerterryjreedy <tjreedy@udel.edu>2017-07-04 01:31:25 (GMT)
commit246ff3bd00f97658e567a7087645a6b76e056491 (patch)
tree08110a51b6b57a925494b76c25547251cf6f5dac /Lib/pyclbr.py
parent6969eaf4682beb01bc95eeb14f5ce6c01312e297 (diff)
downloadcpython-246ff3bd00f97658e567a7087645a6b76e056491.zip
cpython-246ff3bd00f97658e567a7087645a6b76e056491.tar.gz
cpython-246ff3bd00f97658e567a7087645a6b76e056491.tar.bz2
bpo-6691: Pyclbr now reports nested classes and functions. (#2503)
Original patch by Guilherme Polo. Revisions by Cheryl Sabella.
Diffstat (limited to 'Lib/pyclbr.py')
-rw-r--r--Lib/pyclbr.py311
1 files changed, 180 insertions, 131 deletions
diff --git a/Lib/pyclbr.py b/Lib/pyclbr.py
index d7dba97..2c798df 100644
--- a/Lib/pyclbr.py
+++ b/Lib/pyclbr.py
@@ -1,42 +1,41 @@
-"""Parse a Python module and describe its classes and methods.
+"""Parse a Python module and describe its classes and functions.
Parse enough of a Python file to recognize imports and class and
-method definitions, and to find out the superclasses of a class.
+function definitions, and to find out the superclasses of a class.
The interface consists of a single function:
- readmodule_ex(module [, path])
+ readmodule_ex(module, path=None)
where module is the name of a Python module, and path is an optional
list of directories where the module is to be searched. If present,
-path is prepended to the system search path sys.path. The return
-value is a dictionary. The keys of the dictionary are the names of
-the classes defined in the module (including classes that are defined
-via the from XXX import YYY construct). The values are class
-instances of the class Class defined here. One special key/value pair
-is present for packages: the key '__path__' has a list as its value
-which contains the package search path.
-
-A class is described by the class Class in this module. Instances
-of this class have the following instance variables:
- module -- the module name
- name -- the name of the class
- super -- a list of super classes (Class instances)
- methods -- a dictionary of methods
- file -- the file in which the class was defined
- lineno -- the line in the file on which the class statement occurred
-The dictionary of methods uses the method names as keys and the line
-numbers on which the method was defined as values.
+path is prepended to the system search path sys.path. The return value
+is a dictionary. The keys of the dictionary are the names of the
+classes and functions defined in the module (including classes that are
+defined via the from XXX import YYY construct). The values are
+instances of classes Class and Function. One special key/value pair is
+present for packages: the key '__path__' has a list as its value which
+contains the package search path.
+
+Classes and Functions have a common superclass: _Object. Every instance
+has the following attributes:
+ module -- name of the module;
+ name -- name of the object;
+ file -- file in which the object is defined;
+ lineno -- line in the file where the object's definition starts;
+ parent -- parent of this object, if any;
+ children -- nested objects contained in this object.
+The 'children' attribute is a dictionary mapping names to objects.
+
+Instances of Function describe functions with the attributes from _Object.
+
+Instances of Class describe classes with the attributes from _Object,
+plus the following:
+ super -- list of super classes (Class instances if possible);
+ methods -- mapping of method names to beginning line numbers.
If the name of a super class is not recognized, the corresponding
entry in the list of super classes is not a class instance but a
string giving the name of the super class. Since import statements
are recognized and imported modules are scanned as well, this
shouldn't happen often.
-
-A function is described by the class Function in this module.
-Instances of this class have the following instance variables:
- module -- the module name
- name -- the name of the class
- file -- the file in which the class was defined
- lineno -- the line in the file on which the class statement occurred
"""
import io
@@ -47,37 +46,59 @@ from token import NAME, DEDENT, OP
__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
-_modules = {} # cache of modules we've seen
+_modules = {} # Initialize cache of modules we've seen.
-# each Python class is represented by an instance of this class
-class Class:
- '''Class to represent a Python class.'''
- def __init__(self, module, name, super, file, lineno):
+
+class _Object:
+ "Informaton about Python class or function."
+ def __init__(self, module, name, file, lineno, parent):
self.module = module
self.name = name
- if super is None:
- super = []
- self.super = super
- self.methods = {}
self.file = file
self.lineno = lineno
+ self.parent = parent
+ self.children = {}
+
+ def _addchild(self, name, obj):
+ self.children[name] = obj
+
+
+class Function(_Object):
+ "Information about a Python function, including methods."
+ def __init__(self, module, name, file, lineno, parent=None):
+ _Object.__init__(self, module, name, file, lineno, parent)
+
+
+class Class(_Object):
+ "Information about a Python class."
+ def __init__(self, module, name, super, file, lineno, parent=None):
+ _Object.__init__(self, module, name, file, lineno, parent)
+ self.super = [] if super is None else super
+ self.methods = {}
def _addmethod(self, name, lineno):
self.methods[name] = lineno
-class Function:
- '''Class to represent a top-level Python function'''
- def __init__(self, module, name, file, lineno):
- self.module = module
- self.name = name
- self.file = file
- self.lineno = lineno
+
+def _nest_function(ob, func_name, lineno):
+ "Return a Function after nesting within ob."
+ newfunc = Function(ob.module, func_name, ob.file, lineno, ob)
+ ob._addchild(func_name, newfunc)
+ if isinstance(ob, Class):
+ ob._addmethod(func_name, lineno)
+ return newfunc
+
+def _nest_class(ob, class_name, lineno, super=None):
+ "Return a Class after nesting within ob."
+ newclass = Class(ob.module, class_name, super, ob.file, lineno, ob)
+ ob._addchild(class_name, newclass)
+ return newclass
def readmodule(module, path=None):
- '''Backwards compatible interface.
+ """Return Class objects for the top-level classes in module.
- Call readmodule_ex() and then only keep Class objects from the
- resulting dictionary.'''
+ This is the original interface, before Functions were added.
+ """
res = {}
for key, value in _readmodule(module, path or []).items():
@@ -86,41 +107,41 @@ def readmodule(module, path=None):
return res
def readmodule_ex(module, path=None):
- '''Read a module file and return a dictionary of classes.
+ """Return a dictionary with all functions and classes in module.
- Search for MODULE in PATH and sys.path, read and parse the
- module and return a dictionary with one entry for each class
- found in the module.
- '''
+ Search for module in PATH + sys.path.
+ If possible, include imported superclasses.
+ Do this by reading source, without importing (and executing) it.
+ """
return _readmodule(module, path or [])
def _readmodule(module, path, inpackage=None):
- '''Do the hard work for readmodule[_ex].
+ """Do the hard work for readmodule[_ex].
- If INPACKAGE is given, it must be the dotted name of the package in
+ If inpackage is given, it must be the dotted name of the package in
which we are searching for a submodule, and then PATH must be the
package search path; otherwise, we are searching for a top-level
- module, and PATH is combined with sys.path.
- '''
- # Compute the full module name (prepending inpackage if set)
+ module, and path is combined with sys.path.
+ """
+ # Compute the full module name (prepending inpackage if set).
if inpackage is not None:
fullmodule = "%s.%s" % (inpackage, module)
else:
fullmodule = module
- # Check in the cache
+ # Check in the cache.
if fullmodule in _modules:
return _modules[fullmodule]
- # Initialize the dict for this module's contents
- dict = {}
+ # Initialize the dict for this module's contents.
+ tree = {}
- # Check if it is a built-in module; we don't do much for these
+ # Check if it is a built-in module; we don't do much for these.
if module in sys.builtin_module_names and inpackage is None:
- _modules[module] = dict
- return dict
+ _modules[module] = tree
+ return tree
- # Check for a dotted module name
+ # Check for a dotted module name.
i = module.rfind('.')
if i >= 0:
package = module[:i]
@@ -132,88 +153,97 @@ def _readmodule(module, path, inpackage=None):
raise ImportError('No package named {}'.format(package))
return _readmodule(submodule, parent['__path__'], package)
- # Search the path for the module
+ # Search the path for the module.
f = None
if inpackage is not None:
search_path = path
else:
search_path = path + sys.path
- # XXX This will change once issue19944 lands.
spec = importlib.util._find_spec_from_path(fullmodule, search_path)
- _modules[fullmodule] = dict
- # is module a package?
+ _modules[fullmodule] = tree
+ # Is module a package?
if spec.submodule_search_locations is not None:
- dict['__path__'] = spec.submodule_search_locations
+ tree['__path__'] = spec.submodule_search_locations
try:
source = spec.loader.get_source(fullmodule)
if source is None:
- return dict
+ return tree
except (AttributeError, ImportError):
- # not Python source, can't do anything with this module
- return dict
+ # If module is not Python source, we cannot do anything.
+ return tree
fname = spec.loader.get_filename(fullmodule)
+ return _create_tree(fullmodule, path, fname, source, tree, inpackage)
+
+def _create_tree(fullmodule, path, fname, source, tree, inpackage):
+ """Return the tree for a particular module.
+
+ fullmodule (full module name), inpackage+module, becomes o.module.
+ path is passed to recursive calls of _readmodule.
+ fname becomes o.file.
+ source is tokenized. Imports cause recursive calls to _readmodule.
+ tree is {} or {'__path__': <submodule search locations>}.
+ inpackage, None or string, is passed to recursive calls of _readmodule.
+
+ The effect of recursive calls is mutation of global _modules.
+ """
f = io.StringIO(source)
- stack = [] # stack of (class, indent) pairs
+ stack = [] # Initialize stack of (class, indent) pairs.
g = tokenize.generate_tokens(f.readline)
try:
for tokentype, token, start, _end, _line in g:
if tokentype == DEDENT:
lineno, thisindent = start
- # close nested classes and defs
+ # Close previous nested classes and defs.
while stack and stack[-1][1] >= thisindent:
del stack[-1]
elif token == 'def':
lineno, thisindent = start
- # close previous nested classes and defs
+ # Close previous nested classes and defs.
while stack and stack[-1][1] >= thisindent:
del stack[-1]
- tokentype, meth_name, start = next(g)[0:3]
+ tokentype, func_name, start = next(g)[0:3]
if tokentype != NAME:
- continue # Syntax error
+ continue # Skip def with syntax error.
+ cur_func = None
if stack:
- cur_class = stack[-1][0]
- if isinstance(cur_class, Class):
- # it's a method
- cur_class._addmethod(meth_name, lineno)
- # else it's a nested def
+ cur_obj = stack[-1][0]
+ cur_func = _nest_function(cur_obj, func_name, lineno)
else:
- # it's a function
- dict[meth_name] = Function(fullmodule, meth_name,
- fname, lineno)
- stack.append((None, thisindent)) # Marker for nested fns
+ # It is just a function.
+ cur_func = Function(fullmodule, func_name, fname, lineno)
+ tree[func_name] = cur_func
+ stack.append((cur_func, thisindent))
elif token == 'class':
lineno, thisindent = start
- # close previous nested classes and defs
+ # Close previous nested classes and defs.
while stack and stack[-1][1] >= thisindent:
del stack[-1]
tokentype, class_name, start = next(g)[0:3]
if tokentype != NAME:
- continue # Syntax error
- # parse what follows the class name
+ continue # Skip class with syntax error.
+ # Parse what follows the class name.
tokentype, token, start = next(g)[0:3]
inherit = None
if token == '(':
- names = [] # List of superclasses
- # there's a list of superclasses
+ names = [] # Initialize list of superclasses.
level = 1
- super = [] # Tokens making up current superclass
+ super = [] # Tokens making up current superclass.
while True:
tokentype, token, start = next(g)[0:3]
if token in (')', ',') and level == 1:
n = "".join(super)
- if n in dict:
- # we know this super class
- n = dict[n]
+ if n in tree:
+ # We know this super class.
+ n = tree[n]
else:
c = n.split('.')
if len(c) > 1:
- # super class is of the form
- # module.class: look in module for
- # class
+ # Super class form is module.class:
+ # look in module for class.
m = c[-2]
c = c[-1]
if m in _modules:
@@ -230,21 +260,25 @@ def _readmodule(module, path, inpackage=None):
break
elif token == ',' and level == 1:
pass
- # only use NAME and OP (== dot) tokens for type name
+ # Only use NAME and OP (== dot) tokens for type name.
elif tokentype in (NAME, OP) and level == 1:
super.append(token)
- # expressions in the base list are not supported
+ # Expressions in the base list are not supported.
inherit = names
- cur_class = Class(fullmodule, class_name, inherit,
- fname, lineno)
- if not stack:
- dict[class_name] = cur_class
+ if stack:
+ cur_obj = stack[-1][0]
+ cur_class = _nest_class(
+ cur_obj, class_name, lineno, inherit)
+ else:
+ cur_class = Class(fullmodule, class_name, inherit,
+ fname, lineno)
+ tree[class_name] = cur_class
stack.append((cur_class, thisindent))
elif token == 'import' and start[1] == 0:
modules = _getnamelist(g)
for mod, _mod2 in modules:
try:
- # Recursively read the imported module
+ # Recursively read the imported module.
if inpackage is None:
_readmodule(mod, path)
else:
@@ -262,32 +296,34 @@ def _readmodule(module, path, inpackage=None):
continue
names = _getnamelist(g)
try:
- # Recursively read the imported module
+ # Recursively read the imported module.
d = _readmodule(mod, path, inpackage)
except:
# If we can't find or parse the imported module,
# too bad -- don't die here.
continue
- # add any classes that were defined in the imported module
- # to our name space if they were mentioned in the list
+ # Add any classes that were defined in the imported module
+ # to our name space if they were mentioned in the list.
for n, n2 in names:
if n in d:
- dict[n2 or n] = d[n]
+ tree[n2 or n] = d[n]
elif n == '*':
- # don't add names that start with _
+ # Don't add names that start with _.
for n in d:
if n[0] != '_':
- dict[n] = d[n]
+ tree[n] = d[n]
except StopIteration:
pass
f.close()
- return dict
+ return tree
+
def _getnamelist(g):
- # Helper to get a comma-separated list of dotted names plus 'as'
- # clauses. Return a list of pairs (name, name2) where name2 is
- # the 'as' name, or None if there is no 'as' clause.
+ """Return list of (dotted-name, as-name or None) tuples for token source g.
+
+ An as-name is the name that follows 'as' in an as clause.
+ """
names = []
while True:
name, token = _getname(g)
@@ -304,10 +340,9 @@ def _getnamelist(g):
break
return names
+
def _getname(g):
- # Helper to get a dotted name, return a pair (name, token) where
- # name is the dotted name, or None if there was no dotted name,
- # and token is the next input token.
+ "Return (dotted-name or None, next-token) tuple for token source g."
parts = []
tokentype, token = next(g)[0:2]
if tokentype != NAME and token != '*':
@@ -323,11 +358,14 @@ def _getname(g):
parts.append(token)
return (".".join(parts), token)
+
def _main():
- # Main program for testing.
+ "Print module output (default this file) for quick visual check."
import os
- from operator import itemgetter
- mod = sys.argv[1]
+ try:
+ mod = sys.argv[1]
+ except:
+ mod = __file__
if os.path.exists(mod):
path = [os.path.dirname(mod)]
mod = os.path.basename(mod)
@@ -335,18 +373,29 @@ def _main():
mod = mod[:-3]
else:
path = []
- dict = readmodule_ex(mod, path)
- objs = list(dict.values())
- objs.sort(key=lambda a: getattr(a, 'lineno', 0))
- for obj in objs:
+ tree = readmodule_ex(mod, path)
+ lineno_key = lambda a: getattr(a, 'lineno', 0)
+ objs = sorted(tree.values(), key=lineno_key, reverse=True)
+ indent_level = 2
+ while objs:
+ obj = objs.pop()
+ if isinstance(obj, list):
+ # Value is a __path__ key.
+ continue
+ if not hasattr(obj, 'indent'):
+ obj.indent = 0
+
+ if isinstance(obj, _Object):
+ new_objs = sorted(obj.children.values(),
+ key=lineno_key, reverse=True)
+ for ob in new_objs:
+ ob.indent = obj.indent + indent_level
+ objs.extend(new_objs)
if isinstance(obj, Class):
- print("class", obj.name, obj.super, obj.lineno)
- methods = sorted(obj.methods.items(), key=itemgetter(1))
- for name, lineno in methods:
- if name != "__path__":
- print(" def", name, lineno)
+ print("{}class {} {} {}"
+ .format(' ' * obj.indent, obj.name, obj.super, obj.lineno))
elif isinstance(obj, Function):
- print("def", obj.name, obj.lineno)
+ print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno))
if __name__ == "__main__":
_main()