summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2001-06-18 22:08:13 (GMT)
committerTim Peters <tim.peters@gmail.com>2001-06-18 22:08:13 (GMT)
commit5ca576ed0a0c697c7e7547adfd0b3af010fd2053 (patch)
tree0b0db361191363b3c168a6c105030f53e181d3e5 /Lib
parent1dad6a86de55c38da5c356c2c6d81be8ff7884b1 (diff)
downloadcpython-5ca576ed0a0c697c7e7547adfd0b3af010fd2053.zip
cpython-5ca576ed0a0c697c7e7547adfd0b3af010fd2053.tar.gz
cpython-5ca576ed0a0c697c7e7547adfd0b3af010fd2053.tar.bz2
Merging the gen-branch into the main line, at Guido's direction. Yay!
Bugfix candidate in inspect.py: it was referencing "self" outside of a method.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/dis.py1
-rw-r--r--Lib/inspect.py42
-rwxr-xr-xLib/tabnanny.py84
-rw-r--r--Lib/tokenize.py35
4 files changed, 49 insertions, 113 deletions
diff --git a/Lib/dis.py b/Lib/dis.py
index fb97369..48ca176 100644
--- a/Lib/dis.py
+++ b/Lib/dis.py
@@ -223,6 +223,7 @@ def_op('LOAD_LOCALS', 82)
def_op('RETURN_VALUE', 83)
def_op('IMPORT_STAR', 84)
def_op('EXEC_STMT', 85)
+def_op('YIELD_STMT', 86)
def_op('POP_BLOCK', 87)
def_op('END_FINALLY', 88)
diff --git a/Lib/inspect.py b/Lib/inspect.py
index 2d88bc1..eeb54d2 100644
--- a/Lib/inspect.py
+++ b/Lib/inspect.py
@@ -349,32 +349,28 @@ class ListReader:
return self.lines[i]
else: return ''
-class EndOfBlock(Exception): pass
-
-class BlockFinder:
- """Provide a tokeneater() method to detect the end of a code block."""
- def __init__(self):
- self.indent = 0
- self.started = 0
- self.last = 0
-
- def tokeneater(self, type, token, (srow, scol), (erow, ecol), line):
- if not self.started:
- if type == tokenize.NAME: self.started = 1
+def getblock(lines):
+ """Extract the block of code at the top of the given list of lines."""
+
+ indent = 0
+ started = 0
+ last = 0
+ tokens = tokenize.generate_tokens(ListReader(lines).readline)
+
+ for (type, token, (srow, scol), (erow, ecol), line) in tokens:
+ if not started:
+ if type == tokenize.NAME:
+ started = 1
elif type == tokenize.NEWLINE:
- self.last = srow
+ last = srow
elif type == tokenize.INDENT:
- self.indent = self.indent + 1
+ indent = indent + 1
elif type == tokenize.DEDENT:
- self.indent = self.indent - 1
- if self.indent == 0: raise EndOfBlock, self.last
-
-def getblock(lines):
- """Extract the block of code at the top of the given list of lines."""
- try:
- tokenize.tokenize(ListReader(lines).readline, BlockFinder().tokeneater)
- except EndOfBlock, eob:
- return lines[:eob.args[0]]
+ indent = indent - 1
+ if indent == 0:
+ return lines[:last]
+ else:
+ raise ValueError, "unable to find block"
def getsourcelines(object):
"""Return a list of source lines and starting line number for an object.
diff --git a/Lib/tabnanny.py b/Lib/tabnanny.py
index 30f2e4b..8323a33 100755
--- a/Lib/tabnanny.py
+++ b/Lib/tabnanny.py
@@ -77,9 +77,8 @@ def check(file):
if verbose > 1:
print "checking", `file`, "..."
- reset_globals()
try:
- tokenize.tokenize(f.readline, tokeneater)
+ process_tokens(tokenize.generate_tokens(f.readline))
except tokenize.TokenError, msg:
errprint("%s: Token Error: %s" % (`file`, str(msg)))
@@ -244,28 +243,19 @@ def format_witnesses(w):
prefix = prefix + "s"
return prefix + " " + string.join(firsts, ', ')
-# The collection of globals, the reset_globals() function, and the
-# tokeneater() function, depend on which version of tokenize is
-# in use.
+# Need Guido's enhancement
+assert hasattr(tokenize, 'NL'), "tokenize module too old"
-if hasattr(tokenize, 'NL'):
- # take advantage of Guido's patch!
-
- indents = []
- check_equal = 0
-
- def reset_globals():
- global indents, check_equal
- check_equal = 0
- indents = [Whitespace("")]
-
- def tokeneater(type, token, start, end, line,
+def process_tokens(tokens,
INDENT=tokenize.INDENT,
DEDENT=tokenize.DEDENT,
NEWLINE=tokenize.NEWLINE,
- JUNK=(tokenize.COMMENT, tokenize.NL) ):
- global indents, check_equal
+ JUNK=(tokenize.COMMENT, tokenize.NL)):
+ indents = [Whitespace("")]
+ check_equal = 0
+
+ for (type, token, start, end, line) in tokens:
if type == NEWLINE:
# a program statement, or ENDMARKER, will eventually follow,
# after some (possibly empty) run of tokens of the form
@@ -311,62 +301,6 @@ if hasattr(tokenize, 'NL'):
msg = "indent not equal e.g. " + format_witnesses(witness)
raise NannyNag(start[0], msg, line)
-else:
- # unpatched version of tokenize
-
- nesting_level = 0
- indents = []
- check_equal = 0
-
- def reset_globals():
- global nesting_level, indents, check_equal
- nesting_level = check_equal = 0
- indents = [Whitespace("")]
-
- def tokeneater(type, token, start, end, line,
- INDENT=tokenize.INDENT,
- DEDENT=tokenize.DEDENT,
- NEWLINE=tokenize.NEWLINE,
- COMMENT=tokenize.COMMENT,
- OP=tokenize.OP):
- global nesting_level, indents, check_equal
-
- if type == INDENT:
- check_equal = 0
- thisguy = Whitespace(token)
- if not indents[-1].less(thisguy):
- witness = indents[-1].not_less_witness(thisguy)
- msg = "indent not greater e.g. " + format_witnesses(witness)
- raise NannyNag(start[0], msg, line)
- indents.append(thisguy)
-
- elif type == DEDENT:
- del indents[-1]
-
- elif type == NEWLINE:
- if nesting_level == 0:
- check_equal = 1
-
- elif type == COMMENT:
- pass
-
- elif check_equal:
- check_equal = 0
- thisguy = Whitespace(line)
- if not indents[-1].equal(thisguy):
- witness = indents[-1].not_equal_witness(thisguy)
- msg = "indent not equal e.g. " + format_witnesses(witness)
- raise NannyNag(start[0], msg, line)
-
- if type == OP and token in ('{', '[', '('):
- nesting_level = nesting_level + 1
-
- elif type == OP and token in ('}', ']', ')'):
- if nesting_level == 0:
- raise NannyNag(start[0],
- "unbalanced bracket '" + token + "'",
- line)
- nesting_level = nesting_level - 1
if __name__ == '__main__':
main()
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 2af595d..b3ee4a8 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -111,7 +111,12 @@ def tokenize(readline, tokeneater=printtoken):
except StopTokenizing:
pass
+# backwards compatible interface, probably not used
def tokenize_loop(readline, tokeneater):
+ for token_info in generate_tokens(readline):
+ apply(tokeneater, token_info)
+
+def generate_tokens(readline):
lnum = parenlev = continued = 0
namechars, numchars = string.letters + '_', string.digits
contstr, needcont = '', 0
@@ -129,12 +134,12 @@ def tokenize_loop(readline, tokeneater):
endmatch = endprog.match(line)
if endmatch:
pos = end = endmatch.end(0)
- tokeneater(STRING, contstr + line[:end],
+ yield (STRING, contstr + line[:end],
strstart, (lnum, end), contline + line)
contstr, needcont = '', 0
contline = None
elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
- tokeneater(ERRORTOKEN, contstr + line,
+ yield (ERRORTOKEN, contstr + line,
strstart, (lnum, len(line)), contline)
contstr = ''
contline = None
@@ -156,16 +161,16 @@ def tokenize_loop(readline, tokeneater):
if pos == max: break
if line[pos] in '#\r\n': # skip comments or blank lines
- tokeneater((NL, COMMENT)[line[pos] == '#'], line[pos:],
+ yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],
(lnum, pos), (lnum, len(line)), line)
continue
if column > indents[-1]: # count indents or dedents
indents.append(column)
- tokeneater(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
+ yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
while column < indents[-1]:
indents = indents[:-1]
- tokeneater(DEDENT, '', (lnum, pos), (lnum, pos), line)
+ yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
else: # continued statement
if not line:
@@ -181,12 +186,12 @@ def tokenize_loop(readline, tokeneater):
if initial in numchars or \
(initial == '.' and token != '.'): # ordinary number
- tokeneater(NUMBER, token, spos, epos, line)
+ yield (NUMBER, token, spos, epos, line)
elif initial in '\r\n':
- tokeneater(parenlev > 0 and NL or NEWLINE,
+ yield (parenlev > 0 and NL or NEWLINE,
token, spos, epos, line)
elif initial == '#':
- tokeneater(COMMENT, token, spos, epos, line)
+ yield (COMMENT, token, spos, epos, line)
elif token in ("'''", '"""', # triple-quoted
"r'''", 'r"""', "R'''", 'R"""',
"u'''", 'u"""', "U'''", 'U"""',
@@ -197,7 +202,7 @@ def tokenize_loop(readline, tokeneater):
if endmatch: # all on one line
pos = endmatch.end(0)
token = line[start:pos]
- tokeneater(STRING, token, spos, (lnum, pos), line)
+ yield (STRING, token, spos, (lnum, pos), line)
else:
strstart = (lnum, start) # multiple lines
contstr = line[start:]
@@ -216,23 +221,23 @@ def tokenize_loop(readline, tokeneater):
contline = line
break
else: # ordinary string
- tokeneater(STRING, token, spos, epos, line)
+ yield (STRING, token, spos, epos, line)
elif initial in namechars: # ordinary name
- tokeneater(NAME, token, spos, epos, line)
+ yield (NAME, token, spos, epos, line)
elif initial == '\\': # continued stmt
continued = 1
else:
if initial in '([{': parenlev = parenlev + 1
elif initial in ')]}': parenlev = parenlev - 1
- tokeneater(OP, token, spos, epos, line)
+ yield (OP, token, spos, epos, line)
else:
- tokeneater(ERRORTOKEN, line[pos],
+ yield (ERRORTOKEN, line[pos],
(lnum, pos), (lnum, pos+1), line)
pos = pos + 1
for indent in indents[1:]: # pop remaining indent levels
- tokeneater(DEDENT, '', (lnum, 0), (lnum, 0), '')
- tokeneater(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
+ yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
+ yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
if __name__ == '__main__': # testing
import sys