summaryrefslogtreecommitdiffstats
path: root/Lib/idlelib/pyclbr.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/idlelib/pyclbr.py')
-rw-r--r--Lib/idlelib/pyclbr.py336
1 files changed, 336 insertions, 0 deletions
diff --git a/Lib/idlelib/pyclbr.py b/Lib/idlelib/pyclbr.py
new file mode 100644
index 0000000..74b7ff7
--- /dev/null
+++ b/Lib/idlelib/pyclbr.py
@@ -0,0 +1,336 @@
+"""Parse a Python file and retrieve classes and methods.
+
+Parse enough of a Python file to recognize class and method
+definitions and to find out the superclasses of a class.
+
+The interface consists of a single function:
+ readmodule(module, path)
+module is the name of a Python module, path is an optional list of
+directories where the module is to be searched. If present, path is
+prepended to the system search path sys.path.
+The return value is a dictionary. The keys of the dictionary are
+the names of the classes defined in the module (including classes
+that are defined via the from XXX import YYY construct). The values
+are class instances of the class Class defined here.
+
+A class is described by the class Class in this module. Instances
+of this class have the following instance variables:
+ name -- the name of the class
+ super -- a list of super classes (Class instances)
+ methods -- a dictionary of methods
+ file -- the file in which the class was defined
+ lineno -- the line in the file on which the class statement occurred
+The dictionary of methods uses the method names as keys and the line
+numbers on which the method was defined as values.
+If the name of a super class is not recognized, the corresponding
+entry in the list of super classes is not a class instance but a
+string giving the name of the super class. Since import statements
+are recognized and imported modules are scanned as well, this
+shouldn't happen often.
+
+BUGS
+- Continuation lines are not dealt with at all.
+- While triple-quoted strings won't confuse it, lines that look like
+ def, class, import or "from ... import" stmts inside backslash-continued
+ single-quoted strings are treated like code. The expense of stopping
+ that isn't worth it.
+- Code that doesn't pass tabnanny or python -t will confuse it, unless
+ you set the module TABWIDTH vrbl (default 8) to the correct tab width
+ for the file.
+
+PACKAGE RELATED BUGS
+- If you have a package and a module inside that or another package
+ with the same name, module caching doesn't work properly since the
+ key is the base name of the module/package.
+- The only entry that is returned when you readmodule a package is a
+ __path__ whose value is a list which confuses certain class browsers.
+- When code does:
+ from package import subpackage
+ class MyClass(subpackage.SuperClass):
+ ...
+ It can't locate the parent. It probably needs to have the same
+ hairy logic that the import locator already does. (This logic
+ exists coded in Python in the freeze package.)
+"""
+
+import os
+import sys
+import imp
+import re
+import string
+
+TABWIDTH = 8
+
+_getnext = re.compile(r"""
+ (?P<String>
+ \""" [^"\\]* (?:
+ (?: \\. | "(?!"") )
+ [^"\\]*
+ )*
+ \"""
+
+ | ''' [^'\\]* (?:
+ (?: \\. | '(?!'') )
+ [^'\\]*
+ )*
+ '''
+ )
+
+| (?P<Method>
+ ^
+ (?P<MethodIndent> [ \t]* )
+ def [ \t]+
+ (?P<MethodName> [a-zA-Z_] \w* )
+ [ \t]* \(
+ )
+
+| (?P<Class>
+ ^
+ (?P<ClassIndent> [ \t]* )
+ class [ \t]+
+ (?P<ClassName> [a-zA-Z_] \w* )
+ [ \t]*
+ (?P<ClassSupers> \( [^)\n]* \) )?
+ [ \t]* :
+ )
+
+| (?P<Import>
+ ^ import [ \t]+
+ (?P<ImportList> [^#;\n]+ )
+ )
+
+| (?P<ImportFrom>
+ ^ from [ \t]+
+ (?P<ImportFromPath>
+ [a-zA-Z_] \w*
+ (?:
+ [ \t]* \. [ \t]* [a-zA-Z_] \w*
+ )*
+ )
+ [ \t]+
+ import [ \t]+
+ (?P<ImportFromList> [^#;\n]+ )
+ )
+""", re.VERBOSE | re.DOTALL | re.MULTILINE).search
+
+_modules = {} # cache of modules we've seen
+
+# each Python class is represented by an instance of this class
+class Class:
+ '''Class to represent a Python class.'''
+ def __init__(self, module, name, super, file, lineno):
+ self.module = module
+ self.name = name
+ if super is None:
+ super = []
+ self.super = super
+ self.methods = {}
+ self.file = file
+ self.lineno = lineno
+
+ def _addmethod(self, name, lineno):
+ self.methods[name] = lineno
+
+class Function(Class):
+ '''Class to represent a top-level Python function'''
+ def __init__(self, module, name, file, lineno):
+ Class.__init__(self, module, name, None, file, lineno)
+ def _addmethod(self, name, lineno):
+ assert 0, "Function._addmethod() shouldn't be called"
+
+def readmodule(module, path=[], inpackage=0):
+ '''Backwards compatible interface.
+
+ Like readmodule_ex() but strips Function objects from the
+ resulting dictionary.'''
+
+ dict = readmodule_ex(module, path, inpackage)
+ res = {}
+ for key, value in dict.items():
+ if not isinstance(value, Function):
+ res[key] = value
+ return res
+
+def readmodule_ex(module, path=[], inpackage=0):
+ '''Read a module file and return a dictionary of classes.
+
+ Search for MODULE in PATH and sys.path, read and parse the
+ module and return a dictionary with one entry for each class
+ found in the module.'''
+
+ dict = {}
+
+ i = string.rfind(module, '.')
+ if i >= 0:
+ # Dotted module name
+ package = string.strip(module[:i])
+ submodule = string.strip(module[i+1:])
+ parent = readmodule(package, path, inpackage)
+ child = readmodule(submodule, parent['__path__'], 1)
+ return child
+
+ if _modules.has_key(module):
+ # we've seen this module before...
+ return _modules[module]
+ if module in sys.builtin_module_names:
+ # this is a built-in module
+ _modules[module] = dict
+ return dict
+
+ # search the path for the module
+ f = None
+ if inpackage:
+ try:
+ f, file, (suff, mode, type) = \
+ imp.find_module(module, path)
+ except ImportError:
+ f = None
+ if f is None:
+ fullpath = list(path) + sys.path
+ f, file, (suff, mode, type) = imp.find_module(module, fullpath)
+ if type == imp.PKG_DIRECTORY:
+ dict['__path__'] = [file]
+ _modules[module] = dict
+ path = [file] + path
+ f, file, (suff, mode, type) = \
+ imp.find_module('__init__', [file])
+ if type != imp.PY_SOURCE:
+ # not Python source, can't do anything with this module
+ f.close()
+ _modules[module] = dict
+ return dict
+
+ _modules[module] = dict
+ imports = []
+ classstack = [] # stack of (class, indent) pairs
+ src = f.read()
+ f.close()
+
+ # To avoid having to stop the regexp at each newline, instead
+ # when we need a line number we simply string.count the number of
+ # newlines in the string since the last time we did this; i.e.,
+ # lineno = lineno + \
+ # string.count(src, '\n', last_lineno_pos, here)
+ # last_lineno_pos = here
+ countnl = string.count
+ lineno, last_lineno_pos = 1, 0
+ i = 0
+ while 1:
+ m = _getnext(src, i)
+ if not m:
+ break
+ start, i = m.span()
+
+ if m.start("Method") >= 0:
+ # found a method definition or function
+ thisindent = _indent(m.group("MethodIndent"))
+ meth_name = m.group("MethodName")
+ lineno = lineno + \
+ countnl(src, '\n',
+ last_lineno_pos, start)
+ last_lineno_pos = start
+ # close all classes indented at least as much
+ while classstack and \
+ classstack[-1][1] >= thisindent:
+ del classstack[-1]
+ if classstack:
+ # it's a class method
+ cur_class = classstack[-1][0]
+ cur_class._addmethod(meth_name, lineno)
+ else:
+ # it's a function
+ f = Function(module, meth_name,
+ file, lineno)
+ dict[meth_name] = f
+
+ elif m.start("String") >= 0:
+ pass
+
+ elif m.start("Class") >= 0:
+ # we found a class definition
+ thisindent = _indent(m.group("ClassIndent"))
+ # close all classes indented at least as much
+ while classstack and \
+ classstack[-1][1] >= thisindent:
+ del classstack[-1]
+ lineno = lineno + \
+ countnl(src, '\n', last_lineno_pos, start)
+ last_lineno_pos = start
+ class_name = m.group("ClassName")
+ inherit = m.group("ClassSupers")
+ if inherit:
+ # the class inherits from other classes
+ inherit = string.strip(inherit[1:-1])
+ names = []
+ for n in string.splitfields(inherit, ','):
+ n = string.strip(n)
+ if dict.has_key(n):
+ # we know this super class
+ n = dict[n]
+ else:
+ c = string.splitfields(n, '.')
+ if len(c) > 1:
+ # super class
+ # is of the
+ # form module.class:
+ # look in
+ # module for class
+ m = c[-2]
+ c = c[-1]
+ if _modules.has_key(m):
+ d = _modules[m]
+ if d.has_key(c):
+ n = d[c]
+ names.append(n)
+ inherit = names
+ # remember this class
+ cur_class = Class(module, class_name, inherit,
+ file, lineno)
+ dict[class_name] = cur_class
+ classstack.append((cur_class, thisindent))
+
+ elif m.start("Import") >= 0:
+ # import module
+ for n in string.split(m.group("ImportList"), ','):
+ n = string.strip(n)
+ try:
+ # recursively read the imported module
+ d = readmodule(n, path, inpackage)
+ except:
+ ##print 'module', n, 'not found'
+ pass
+
+ elif m.start("ImportFrom") >= 0:
+ # from module import stuff
+ mod = m.group("ImportFromPath")
+ names = string.split(m.group("ImportFromList"), ',')
+ try:
+ # recursively read the imported module
+ d = readmodule(mod, path, inpackage)
+ except:
+ ##print 'module', mod, 'not found'
+ continue
+ # add any classes that were defined in the
+ # imported module to our name space if they
+ # were mentioned in the list
+ for n in names:
+ n = string.strip(n)
+ if d.has_key(n):
+ dict[n] = d[n]
+ elif n == '*':
+ # only add a name if not
+ # already there (to mimic what
+ # Python does internally)
+ # also don't add names that
+ # start with _
+ for n in d.keys():
+ if n[0] != '_' and \
+ not dict.has_key(n):
+ dict[n] = d[n]
+ else:
+ assert 0, "regexp _getnext found something unexpected"
+
+ return dict
+
+def _indent(ws, _expandtabs=string.expandtabs):
+ return len(_expandtabs(ws, TABWIDTH))