summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/pyclbr.py231
1 files changed, 149 insertions, 82 deletions
diff --git a/Lib/pyclbr.py b/Lib/pyclbr.py
index 75b6a93..ad20c99 100644
--- a/Lib/pyclbr.py
+++ b/Lib/pyclbr.py
@@ -30,7 +30,9 @@ shouldn't happen often.
BUGS
Continuation lines are not dealt with at all and strings may confuse
-the hell out of the parser, but it usually works.''' # ' <-- bow to font lock
+the hell out of the parser, but it usually works.
+Nested classes are not recognized.
+Nested defs may be mistaken for class methods.''' # ' <-- bow to font lock
import os
import sys
@@ -38,16 +40,70 @@ import imp
import re
import string
-id = '[A-Za-z_][A-Za-z0-9_]*' # match identifier
-blank_line = re.compile('^[ \t]*($|#)')
-is_class = re.compile('^class[ \t]+(?P<id>'+id+')[ \t]*(?P<sup>\([^)]*\))?[ \t]*:')
-is_method = re.compile('^[ \t]+def[ \t]+(?P<id>'+id+')[ \t]*\(')
-is_import = re.compile('^import[ \t]*(?P<imp>[^#;]+)')
-is_from = re.compile('^from[ \t]+(?P<module>'+id+'([ \t]*\\.[ \t]*'+id+')*)[ \t]+import[ \t]+(?P<imp>[^#;]+)')
-dedent = re.compile('^[^ \t]')
-indent = re.compile('^[^ \t]*')
+_getnext = re.compile(r"""
+## String slows it down by more than a factor of 2 (not because the
+## string regexp is slow, but because there are often a lot of strings,
+## which means the regexp has to get called that many more times).
+## (?P<String>
+## " [^"\\\n]* (?: \\. [^"\\\n]* )* "
+##
+## | ' [^'\\\n]* (?: \\. [^'\\\n]* )* '
+##
+## | \""" [^"\\]* (?:
+## (?: \\. | "(?!"") )
+## [^"\\]*
+## )*
+## \"""
+##
+## | ''' [^'\\]* (?:
+## (?: \\. | '(?!'') )
+## [^'\\]*
+## )*
+## '''
+## )
+##
+##| (?P<Method>
+ (?P<Method>
+ # dicey trick: assume a def not at top level is a method
+ ^ [ \t]+ def [ \t]+
+ (?P<MethodName> [a-zA-Z_] \w* )
+ [ \t]* \(
+ )
-_modules = {} # cache of modules we've seen
+| (?P<Class>
+ # lightly questionable: assume only top-level classes count
+ ^ class [ \t]+
+ (?P<ClassName> [a-zA-Z_] \w* )
+ [ \t]*
+ (?P<ClassSupers> \( [^)\n]* \) )?
+ [ \t]* :
+ )
+
+| (?P<Import>
+ ^ import [ \t]+
+ (?P<ImportList> [^#;\n]+ )
+ )
+
+| (?P<ImportFrom>
+ ^ from [ \t]+
+ (?P<ImportFromPath>
+ [a-zA-Z_] \w*
+ (?:
+ [ \t]* \. [ \t]* [a-zA-Z_] \w*
+ )*
+ )
+ [ \t]+
+ import [ \t]+
+ (?P<ImportFromList> [^#;\n]+ )
+ )
+
+| (?P<AtTopLevel>
+ # cheap trick: anything other than ws in first column
+ ^ \S
+ )
+""", re.VERBOSE | re.DOTALL | re.MULTILINE).search
+
+_modules = {} # cache of modules we've seen
# each Python class is represented by an instance of this class
class Class:
@@ -117,66 +173,49 @@ def readmodule(module, path=[], inpackage=0):
dict = {}
_modules[module] = dict
imports = []
- lineno = 0
+ src = f.read()
+ f.close()
+
+ # To avoid having to stop the regexp at each newline, instead
+ # when we need a line number we simply string.count the number of
+ # newlines in the string since the last time we did this; i.e.,
+ # lineno = lineno + \
+ # string.count(src, '\n', last_lineno_pos, here)
+ # last_lineno_pos = here
+ countnl = string.count
+ lineno, last_lineno_pos = 1, 0
+ i = 0
while 1:
- line = f.readline()
- if not line:
+ m = _getnext(src, i)
+ if not m:
break
- lineno = lineno + 1 # count lines
- line = line[:-1] # remove line feed
- if blank_line.match(line):
- # ignore blank (and comment only) lines
- continue
-## res = indent.match(line)
-## if res:
-## indentation = len(string.expandtabs(res.group(0), 8))
- res = is_import.match(line)
- if res:
- # import module
- for n in string.splitfields(res.group('imp'), ','):
- n = string.strip(n)
- try:
- # recursively read the
- # imported module
- d = readmodule(n, path, inpackage)
- except:
- print 'module',n,'not found'
- pass
- continue
- res = is_from.match(line)
- if res:
- # from module import stuff
- mod = res.group('module')
- names = string.splitfields(res.group('imp'), ',')
- try:
- # recursively read the imported module
- d = readmodule(mod, path, inpackage)
- except:
- print 'module',mod,'not found'
- continue
- # add any classes that were defined in the
- # imported module to our name space if they
- # were mentioned in the list
- for n in names:
- n = string.strip(n)
- if d.has_key(n):
- dict[n] = d[n]
- elif n == '*':
- # only add a name if not
- # already there (to mimic what
- # Python does internally)
- # also don't add names that
- # start with _
- for n in d.keys():
- if n[0] != '_' and \
- not dict.has_key(n):
- dict[n] = d[n]
- continue
- res = is_class.match(line)
- if res:
+ start, i = m.span()
+
+ if m.start("AtTopLevel") >= 0:
+ # end of class definition
+ cur_class = None
+
+## elif m.start("String") >= 0:
+## pass
+
+ elif m.start("Method") >= 0:
+ # found a method definition
+ if cur_class:
+ # and we know the class it belongs to
+ meth_name = m.group("MethodName")
+ lineno = lineno + \
+ countnl(src, '\n',
+ last_lineno_pos, start)
+ last_lineno_pos = start
+ cur_class._addmethod(meth_name, lineno)
+
+ elif m.start("Class") >= 0:
# we found a class definition
- class_name = res.group('id')
- inherit = res.group('sup')
+ lineno = lineno + \
+ countnl(src, '\n', last_lineno_pos, start)
+ last_lineno_pos = start
+ class_name = m.group("ClassName")
+ inherit = m.group("ClassSupers")
if inherit:
# the class inherits from other classes
inherit = string.strip(inherit[1:-1])
@@ -203,20 +242,48 @@ def readmodule(module, path=[], inpackage=0):
names.append(n)
inherit = names
# remember this class
- cur_class = Class(module, class_name, inherit, file, lineno)
+ cur_class = Class(module, class_name, inherit,
+ file, lineno)
dict[class_name] = cur_class
- continue
- res = is_method.match(line)
- if res:
- # found a method definition
- if cur_class:
- # and we know the class it belongs to
- meth_name = res.group('id')
- cur_class._addmethod(meth_name, lineno)
- continue
- if dedent.match(line):
- # end of class definition
- cur_class = None
- f.close()
- return dict
+ elif m.start("Import") >= 0:
+ # import module
+ for n in string.split(m.group("ImportList"), ','):
+ n = string.strip(n)
+ try:
+ # recursively read the imported module
+ d = readmodule(n, path, inpackage)
+ except:
+ print 'module', n, 'not found'
+
+ elif m.start("ImportFrom") >= 0:
+ # from module import stuff
+ mod = m.group("ImportFromPath")
+ names = string.split(m.group("ImportFromList"), ',')
+ try:
+ # recursively read the imported module
+ d = readmodule(mod, path, inpackage)
+ except:
+ print 'module', mod, 'not found'
+ continue
+ # add any classes that were defined in the
+ # imported module to our name space if they
+ # were mentioned in the list
+ for n in names:
+ n = string.strip(n)
+ if d.has_key(n):
+ dict[n] = d[n]
+ elif n == '*':
+ # only add a name if not
+ # already there (to mimic what
+ # Python does internally)
+ # also don't add names that
+ # start with _
+ for n in d.keys():
+ if n[0] != '_' and \
+ not dict.has_key(n):
+ dict[n] = d[n]
+ else:
+ assert 0, "regexp _getnext found something unexpected"
+
+ return dict