summaryrefslogtreecommitdiffstats
path: root/Lib/pyclbr.py
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1999-06-08 12:53:21 (GMT)
committerGuido van Rossum <guido@python.org>1999-06-08 12:53:21 (GMT)
commitdf9f7a3e525ced6f8771ff651c536574b93c0af6 (patch)
treec5e0999f0885f8c014dd03d37b3b4affb91ac55e /Lib/pyclbr.py
parent9a33707be7b52ca59accca6cdd09ee218e672f04 (diff)
downloadcpython-df9f7a3e525ced6f8771ff651c536574b93c0af6.zip
cpython-df9f7a3e525ced6f8771ff651c536574b93c0af6.tar.gz
cpython-df9f7a3e525ced6f8771ff651c536574b93c0af6.tar.bz2
Tim Peters: Taught it more "real Python" rules without slowing it
appreciably. Triple-quoted strings no longer confuse it, nor nested classes or defs, nor comments starting in column 1. Chews thru Tkinter.py in < 3 seconds for me; doctest.py no longer confuses it; no longer missing methods in PyShell.py; etc. Also captures defs starting in column 1 now, but ignores them; an interface should be added so that IDLE's class browser can show the top-level functions too.
Diffstat (limited to 'Lib/pyclbr.py')
-rw-r--r--Lib/pyclbr.py100
1 files changed, 53 insertions, 47 deletions
diff --git a/Lib/pyclbr.py b/Lib/pyclbr.py
index ad20c99..709a07b 100644
--- a/Lib/pyclbr.py
+++ b/Lib/pyclbr.py
@@ -29,10 +29,14 @@ are recognized and imported modules are scanned as well, this
shouldn't happen often.
BUGS
-Continuation lines are not dealt with at all and strings may confuse
-the hell out of the parser, but it usually works.
-Nested classes are not recognized.
-Nested defs may be mistaken for class methods.''' # ' <-- bow to font lock
+Continuation lines are not dealt with at all.
+While triple-quoted strings won't confuse it, lines that look like
+def, class, import or "from ... import" stmts inside backslash-continued
+single-quoted strings are treated like code. The expense of stopping
+that isn't worth it.
+Code that doesn't pass tabnanny or python -t will confuse it, unless
+you set the module TABWIDTH vrbl (default 8) to the correct tab width
+for the file.''' # ' <-- bow to font lock
import os
import sys
@@ -40,39 +44,35 @@ import imp
import re
import string
+TABWIDTH = 8
+
_getnext = re.compile(r"""
-## String slows it down by more than a factor of 2 (not because the
-## string regexp is slow, but because there are often a lot of strings,
-## which means the regexp has to get called that many more times).
-## (?P<String>
-## " [^"\\\n]* (?: \\. [^"\\\n]* )* "
-##
-## | ' [^'\\\n]* (?: \\. [^'\\\n]* )* '
-##
-## | \""" [^"\\]* (?:
-## (?: \\. | "(?!"") )
-## [^"\\]*
-## )*
-## \"""
-##
-## | ''' [^'\\]* (?:
-## (?: \\. | '(?!'') )
-## [^'\\]*
-## )*
-## '''
-## )
-##
-##| (?P<Method>
- (?P<Method>
- # dicey trick: assume a def not at top level is a method
- ^ [ \t]+ def [ \t]+
+ (?P<String>
+ \""" [^"\\]* (?:
+ (?: \\. | "(?!"") )
+ [^"\\]*
+ )*
+ \"""
+
+ | ''' [^'\\]* (?:
+ (?: \\. | '(?!'') )
+ [^'\\]*
+ )*
+ '''
+ )
+
+| (?P<Method>
+ ^
+ (?P<MethodIndent> [ \t]* )
+ def [ \t]+
(?P<MethodName> [a-zA-Z_] \w* )
[ \t]* \(
)
| (?P<Class>
- # lightly questionable: assume only top-level classes count
- ^ class [ \t]+
+ ^
+ (?P<ClassIndent> [ \t]* )
+ class [ \t]+
(?P<ClassName> [a-zA-Z_] \w* )
[ \t]*
(?P<ClassSupers> \( [^)\n]* \) )?
@@ -96,11 +96,6 @@ _getnext = re.compile(r"""
import [ \t]+
(?P<ImportFromList> [^#;\n]+ )
)
-
-| (?P<AtTopLevel>
- # cheap trick: anything other than ws in first column
- ^ \S
- )
""", re.VERBOSE | re.DOTALL | re.MULTILINE).search
_modules = {} # cache of modules we've seen
@@ -169,10 +164,10 @@ def readmodule(module, path=[], inpackage=0):
_modules[module] = dict
return dict
- cur_class = None
dict = {}
_modules[module] = dict
imports = []
+ classstack = [] # stack of (class, indent) pairs
src = f.read()
f.close()
@@ -191,26 +186,33 @@ def readmodule(module, path=[], inpackage=0):
break
start, i = m.span()
- if m.start("AtTopLevel") >= 0:
- # end of class definition
- cur_class = None
-
-## elif m.start("String") >= 0:
-## pass
-
- elif m.start("Method") >= 0:
- # found a method definition
- if cur_class:
+ if m.start("Method") >= 0:
+ # found a method definition or function
+ thisindent = _indent(m.group("MethodIndent"))
+ # close all classes indented at least as much
+ while classstack and \
+ classstack[-1][1] >= thisindent:
+ del classstack[-1]
+ if classstack:
# and we know the class it belongs to
meth_name = m.group("MethodName")
lineno = lineno + \
countnl(src, '\n',
last_lineno_pos, start)
last_lineno_pos = start
+ cur_class = classstack[-1][0]
cur_class._addmethod(meth_name, lineno)
+ elif m.start("String") >= 0:
+ pass
+
elif m.start("Class") >= 0:
# we found a class definition
+ thisindent = _indent(m.group("ClassIndent"))
+ # close all classes indented at least as much
+ while classstack and \
+ classstack[-1][1] >= thisindent:
+ del classstack[-1]
lineno = lineno + \
countnl(src, '\n', last_lineno_pos, start)
last_lineno_pos = start
@@ -245,6 +247,7 @@ def readmodule(module, path=[], inpackage=0):
cur_class = Class(module, class_name, inherit,
file, lineno)
dict[class_name] = cur_class
+ classstack.append((cur_class, thisindent))
elif m.start("Import") >= 0:
# import module
@@ -287,3 +290,6 @@ def readmodule(module, path=[], inpackage=0):
assert 0, "regexp _getnext found something unexpected"
return dict
+
+def _indent(ws, _expandtabs=string.expandtabs):
+ return len(_expandtabs(ws, TABWIDTH))