diff options
Diffstat (limited to 'Lib')
-rwxr-xr-x | Lib/test/regrtest.py | 4 | ||||
-rw-r--r-- | Lib/test/test_tokenize.py | 76 | ||||
-rw-r--r-- | Lib/tokenize.py | 52 |
3 files changed, 125 insertions, 7 deletions
diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py index 6160b3d..85e784b 100755 --- a/Lib/test/regrtest.py +++ b/Lib/test/regrtest.py @@ -91,7 +91,9 @@ resources to test. Currently only the following are defined: compiler - Test the compiler package by compiling all the source in the standard library and test suite. This takes - a long time. + a long time. Enabling this resource also allows + test_tokenize to verify round-trip lexing on every + file in the test library. subprocess Run all tests for the subprocess module. diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index d217404..2ce435f 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1,12 +1,82 @@ -from test.test_support import verbose, findfile -import tokenize, os, sys +from test.test_support import verbose, findfile, is_resource_enabled +import os, glob, random +from tokenize import (tokenize, generate_tokens, untokenize, + NUMBER, NAME, OP, STRING) if verbose: print 'starting...' f = file(findfile('tokenize_tests' + os.extsep + 'txt')) -tokenize.tokenize(f.readline) +tokenize(f.readline) f.close() + + +###### Test roundtrip for untokenize ########################## + +def test_roundtrip(f): + ## print 'Testing:', f + f = file(f) + try: + fulltok = list(generate_tokens(f.readline)) + finally: + f.close() + + t1 = [tok[:2] for tok in fulltok] + newtext = untokenize(t1) + readline = iter(newtext.splitlines(1)).next + t2 = [tok[:2] for tok in generate_tokens(readline)] + assert t1 == t2 + + +f = findfile('tokenize_tests' + os.extsep + 'txt') +test_roundtrip(f) + +testdir = os.path.dirname(f) or os.curdir +testfiles = glob.glob(testdir + os.sep + 'test*.py') +if not is_resource_enabled('compiler'): + testfiles = random.sample(testfiles, 10) + +for f in testfiles: + test_roundtrip(f) + + + +###### Test example in the docs ############################### + +from decimal import Decimal +from cStringIO import StringIO + +def decistmt(s): + """Substitute Decimals for floats in a string of statements. + + >>> from decimal import Decimal + >>> s = 'print +21.3e-5*-.1234/81.7' + >>> decistmt(s) + "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')" + + >>> exec(s) + -3.21716034272e-007 + >>> exec(decistmt(s)) + -3.217160342717258261933904529E-7 + + """ + result = [] + g = generate_tokens(StringIO(s).readline) # tokenize the string + for toknum, tokval, _, _, _ in g: + if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens + result.extend([ + (NAME, 'Decimal'), + (OP, '('), + (STRING, repr(tokval)), + (OP, ')') + ]) + else: + result.append((toknum, tokval)) + return untokenize(result) + +import doctest +doctest.testmod() + if verbose: print 'finished' diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 9087e84..b29da6b 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -31,7 +31,7 @@ from token import * import token __all__ = [x for x in dir(token) if x[0] != '_'] + ["COMMENT", "tokenize", - "generate_tokens", "NL"] + "generate_tokens", "NL", "untokenize"] del x del token @@ -159,12 +159,55 @@ def tokenize_loop(readline, tokeneater): for token_info in generate_tokens(readline): tokeneater(*token_info) + +def untokenize(iterable): + """Transform tokens back into Python source code. + + Each element returned by the iterable must be a token sequence + with at least two elements, a token number and token value. + + Round-trip invariant: + # Output text will tokenize the back to the input + t1 = [tok[:2] for tok in generate_tokens(f.readline)] + newcode = untokenize(t1) + readline = iter(newcode.splitlines(1)).next + t2 = [tok[:2] for tokin generate_tokens(readline)] + assert t1 == t2 + """ + + startline = False + indents = [] + toks = [] + toks_append = toks.append + for tok in iterable: + toknum, tokval = tok[:2] + + if toknum == NAME: + tokval += ' ' + + if toknum == INDENT: + indents.append(tokval) + continue + elif toknum == DEDENT: + indents.pop() + continue + elif toknum in (NEWLINE, COMMENT, NL): + startline = True + elif startline and indents: + toks_append(indents[-1]) + startline = False + toks_append(tokval) + return ''.join(toks) + + def generate_tokens(readline): """ The generate_tokens() generator requires one argment, readline, which must be a callable object which provides the same interface as the readline() method of built-in file objects. Each call to the function - should return one line of input as a string. + should return one line of input as a string. Alternately, readline + can be a callable function terminating with StopIteration: + readline = open(myfile).next # Example of alternate readline The generator produces 5-tuples with these members: the token type; the token string; a 2-tuple (srow, scol) of ints specifying the row and @@ -180,7 +223,10 @@ def generate_tokens(readline): indents = [0] while 1: # loop over lines in stream - line = readline() + try: + line = readline() + except StopIteration: + line = '' lnum = lnum + 1 pos, max = 0, len(line) |