Merging the gen-branch into the main line, at Guido's direction. Yay!

Bugfix candidate in inspect.py: it was referencing "self" outside of a method.
author: Tim Peters <tim.peters@gmail.com> 2001-06-18 22:08:13 (GMT)
committer: Tim Peters <tim.peters@gmail.com> 2001-06-18 22:08:13 (GMT)
commit: 5ca576ed0a0c697c7e7547adfd0b3af010fd2053 (patch)
tree: 0b0db361191363b3c168a6c105030f53e181d3e5 /Lib
parent: 1dad6a86de55c38da5c356c2c6d81be8ff7884b1 (diff)
download: cpython-5ca576ed0a0c697c7e7547adfd0b3af010fd2053.zip
cpython-5ca576ed0a0c697c7e7547adfd0b3af010fd2053.tar.gz
cpython-5ca576ed0a0c697c7e7547adfd0b3af010fd2053.tar.bz2
4 files changed, 49 insertions, 113 deletions
diff --git a/Lib/dis.py b/Lib/dis.py
index fb97369..48ca176 100644
--- a/Lib/dis.py
+++ b/Lib/dis.py
@@ -223,6 +223,7 @@ def_op('LOAD_LOCALS', 82)
 def_op('RETURN_VALUE', 83)
 def_op('IMPORT_STAR', 84)
 def_op('EXEC_STMT', 85)
+def_op('YIELD_STMT', 86)
 
 def_op('POP_BLOCK', 87)
 def_op('END_FINALLY', 88)
diff --git a/Lib/inspect.py b/Lib/inspect.py
index 2d88bc1..eeb54d2 100644
--- a/Lib/inspect.py
+++ b/Lib/inspect.py
@@ -349,32 +349,28 @@ class ListReader:
             return self.lines[i]
         else: return ''
 
-class EndOfBlock(Exception): pass
-
-class BlockFinder:
-    """Provide a tokeneater() method to detect the end of a code block."""
-    def __init__(self):
-        self.indent = 0
-        self.started = 0
-        self.last = 0
-
-    def tokeneater(self, type, token, (srow, scol), (erow, ecol), line):
-        if not self.started:
-            if type == tokenize.NAME: self.started = 1
+def getblock(lines):
+    """Extract the block of code at the top of the given list of lines."""
+
+    indent = 0
+    started = 0
+    last = 0
+    tokens = tokenize.generate_tokens(ListReader(lines).readline)
+
+    for (type, token, (srow, scol), (erow, ecol), line) in tokens:
+        if not started:
+            if type == tokenize.NAME:
+                started = 1
         elif type == tokenize.NEWLINE:
-            self.last = srow
+            last = srow
         elif type == tokenize.INDENT:
-            self.indent = self.indent + 1
+            indent = indent + 1
         elif type == tokenize.DEDENT:
-            self.indent = self.indent - 1
-            if self.indent == 0: raise EndOfBlock, self.last
-
-def getblock(lines):
-    """Extract the block of code at the top of the given list of lines."""
-    try:
-        tokenize.tokenize(ListReader(lines).readline, BlockFinder().tokeneater)
-    except EndOfBlock, eob:
-        return lines[:eob.args[0]]
+            indent = indent - 1
+            if indent == 0:
+                return lines[:last]
+    else:
+        raise ValueError, "unable to find block"
 
 def getsourcelines(object):
     """Return a list of source lines and starting line number for an object.
diff --git a/Lib/tabnanny.py b/Lib/tabnanny.py
index 30f2e4b..8323a33 100755
--- a/Lib/tabnanny.py
+++ b/Lib/tabnanny.py
@@ -77,9 +77,8 @@ def check(file):
     if verbose > 1:
         print "checking", `file`, "..."
 
-    reset_globals()
     try:
-        tokenize.tokenize(f.readline, tokeneater)
+        process_tokens(tokenize.generate_tokens(f.readline))
 
     except tokenize.TokenError, msg:
         errprint("%s: Token Error: %s" % (`file`, str(msg)))
@@ -244,28 +243,19 @@ def format_witnesses(w):
         prefix = prefix + "s"
     return prefix + " " + string.join(firsts, ', ')
 
-# The collection of globals, the reset_globals() function, and the
-# tokeneater() function, depend on which version of tokenize is
-# in use.
+# Need Guido's enhancement
+assert hasattr(tokenize, 'NL'), "tokenize module too old"
 
-if hasattr(tokenize, 'NL'):
-    # take advantage of Guido's patch!
-
-    indents = []
-    check_equal = 0
-
-    def reset_globals():
-        global indents, check_equal
-        check_equal = 0
-        indents = [Whitespace("")]
-
-    def tokeneater(type, token, start, end, line,
+def process_tokens(tokens,
                    INDENT=tokenize.INDENT,
                    DEDENT=tokenize.DEDENT,
                    NEWLINE=tokenize.NEWLINE,
-                   JUNK=(tokenize.COMMENT, tokenize.NL) ):
-        global indents, check_equal
+                   JUNK=(tokenize.COMMENT, tokenize.NL)):
 
+    indents = [Whitespace("")]
+    check_equal = 0
+
+    for (type, token, start, end, line) in tokens:
         if type == NEWLINE:
             # a program statement, or ENDMARKER, will eventually follow,
             # after some (possibly empty) run of tokens of the form
@@ -311,62 +301,6 @@ if hasattr(tokenize, 'NL'):
                 msg = "indent not equal e.g. " + format_witnesses(witness)
                 raise NannyNag(start[0], msg, line)
 
-else:
-    # unpatched version of tokenize
-
-    nesting_level = 0
-    indents = []
-    check_equal = 0
-
-    def reset_globals():
-        global nesting_level, indents, check_equal
-        nesting_level = check_equal = 0
-        indents = [Whitespace("")]
-
-    def tokeneater(type, token, start, end, line,
-                   INDENT=tokenize.INDENT,
-                   DEDENT=tokenize.DEDENT,
-                   NEWLINE=tokenize.NEWLINE,
-                   COMMENT=tokenize.COMMENT,
-                   OP=tokenize.OP):
-        global nesting_level, indents, check_equal
-
-        if type == INDENT:
-            check_equal = 0
-            thisguy = Whitespace(token)
-            if not indents[-1].less(thisguy):
-                witness = indents[-1].not_less_witness(thisguy)
-                msg = "indent not greater e.g. " + format_witnesses(witness)
-                raise NannyNag(start[0], msg, line)
-            indents.append(thisguy)
-
-        elif type == DEDENT:
-            del indents[-1]
-
-        elif type == NEWLINE:
-            if nesting_level == 0:
-                check_equal = 1
-
-        elif type == COMMENT:
-            pass
-
-        elif check_equal:
-            check_equal = 0
-            thisguy = Whitespace(line)
-            if not indents[-1].equal(thisguy):
-                witness = indents[-1].not_equal_witness(thisguy)
-                msg = "indent not equal e.g. " + format_witnesses(witness)
-                raise NannyNag(start[0], msg, line)
-
-        if type == OP and token in ('{', '[', '('):
-            nesting_level = nesting_level + 1
-
-        elif type == OP and token in ('}', ']', ')'):
-            if nesting_level == 0:
-                raise NannyNag(start[0],
-                               "unbalanced bracket '" + token + "'",
-                               line)
-            nesting_level = nesting_level - 1
 
 if __name__ == '__main__':
     main()
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 2af595d..b3ee4a8 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -111,7 +111,12 @@ def tokenize(readline, tokeneater=printtoken):
     except StopTokenizing:
         pass
 
+# backwards compatible interface, probably not used
 def tokenize_loop(readline, tokeneater):
+    for token_info in generate_tokens(readline):
+        apply(tokeneater, token_info)
+
+def generate_tokens(readline):
     lnum = parenlev = continued = 0
     namechars, numchars = string.letters + '_', string.digits
     contstr, needcont = '', 0
@@ -129,12 +134,12 @@ def tokenize_loop(readline, tokeneater):
             endmatch = endprog.match(line)
             if endmatch:
                 pos = end = endmatch.end(0)
-                tokeneater(STRING, contstr + line[:end],
+                yield (STRING, contstr + line[:end],
                            strstart, (lnum, end), contline + line)
                 contstr, needcont = '', 0
                 contline = None
             elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
-                tokeneater(ERRORTOKEN, contstr + line,
+                yield (ERRORTOKEN, contstr + line,
                            strstart, (lnum, len(line)), contline)
                 contstr = ''
                 contline = None
@@ -156,16 +161,16 @@ def tokenize_loop(readline, tokeneater):
             if pos == max: break
 
             if line[pos] in '#\r\n':           # skip comments or blank lines
-                tokeneater((NL, COMMENT)[line[pos] == '#'], line[pos:],
+                yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],
                            (lnum, pos), (lnum, len(line)), line)
                 continue
 
             if column > indents[-1]:           # count indents or dedents
                 indents.append(column)
-                tokeneater(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
+                yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
             while column < indents[-1]:
                 indents = indents[:-1]
-                tokeneater(DEDENT, '', (lnum, pos), (lnum, pos), line)
+                yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
 
         else:                                  # continued statement
             if not line:
@@ -181,12 +186,12 @@ def tokenize_loop(readline, tokeneater):
 
                 if initial in numchars or \
                    (initial == '.' and token != '.'):      # ordinary number
-                    tokeneater(NUMBER, token, spos, epos, line)
+                    yield (NUMBER, token, spos, epos, line)
                 elif initial in '\r\n':
-                    tokeneater(parenlev > 0 and NL or NEWLINE,
+                    yield (parenlev > 0 and NL or NEWLINE,
                                token, spos, epos, line)
                 elif initial == '#':
-                    tokeneater(COMMENT, token, spos, epos, line)
+                    yield (COMMENT, token, spos, epos, line)
                 elif token in ("'''", '"""',               # triple-quoted
                                "r'''", 'r"""', "R'''", 'R"""',
                                "u'''", 'u"""', "U'''", 'U"""',
@@ -197,7 +202,7 @@ def tokenize_loop(readline, tokeneater):
                     if endmatch:                           # all on one line
                         pos = endmatch.end(0)
                         token = line[start:pos]
-                        tokeneater(STRING, token, spos, (lnum, pos), line)
+                        yield (STRING, token, spos, (lnum, pos), line)
                     else:
                         strstart = (lnum, start)           # multiple lines
                         contstr = line[start:]
@@ -216,23 +221,23 @@ def tokenize_loop(readline, tokeneater):
                         contline = line
                         break
                     else:                                  # ordinary string
-                        tokeneater(STRING, token, spos, epos, line)
+                        yield (STRING, token, spos, epos, line)
                 elif initial in namechars:                 # ordinary name
-                    tokeneater(NAME, token, spos, epos, line)
+                    yield (NAME, token, spos, epos, line)
                 elif initial == '\\':                      # continued stmt
                     continued = 1
                 else:
                     if initial in '([{': parenlev = parenlev + 1
                     elif initial in ')]}': parenlev = parenlev - 1
-                    tokeneater(OP, token, spos, epos, line)
+                    yield (OP, token, spos, epos, line)
             else:
-                tokeneater(ERRORTOKEN, line[pos],
+                yield (ERRORTOKEN, line[pos],
                            (lnum, pos), (lnum, pos+1), line)
                 pos = pos + 1
 
     for indent in indents[1:]:                 # pop remaining indent levels
-        tokeneater(DEDENT, '', (lnum, 0), (lnum, 0), '')
-    tokeneater(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
+        yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
+    yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
 
 if __name__ == '__main__':                     # testing
     import sys
author	Tim Peters <tim.peters@gmail.com>	2001-06-18 22:08:13 (GMT)
committer	Tim Peters <tim.peters@gmail.com>	2001-06-18 22:08:13 (GMT)
commit	5ca576ed0a0c697c7e7547adfd0b3af010fd2053 (patch)
tree	0b0db361191363b3c168a6c105030f53e181d3e5 /Lib
parent	1dad6a86de55c38da5c356c2c6d81be8ff7884b1 (diff)
download	cpython-5ca576ed0a0c697c7e7547adfd0b3af010fd2053.zip cpython-5ca576ed0a0c697c7e7547adfd0b3af010fd2053.tar.gz cpython-5ca576ed0a0c697c7e7547adfd0b3af010fd2053.tar.bz2