diff options
Diffstat (limited to 'Lib/test/test_tokenize.py')
-rw-r--r-- | Lib/test/test_tokenize.py | 117 |
1 files changed, 113 insertions, 4 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index b064967..be6c18d 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1,9 +1,93 @@ -import os, glob, random +"""Tests for the tokenize module. + +The tests were originally written in the old Python style, where the +test output was compared to a golden file. This docstring represents +the first steps towards rewriting the entire test as a doctest. + +The tests can be really simple. Given a small fragment of source +code, print out a table with the tokens. The ENDMARK is omitted for +brevity. + +>>> dump_tokens("1 + 1") +NUMBER '1' (1, 0) (1, 1) +OP '+' (1, 2) (1, 3) +NUMBER '1' (1, 4) (1, 5) + +A comment generates a token here, unlike in the parser module. The +comment token is followed by an NL or a NEWLINE token, depending on +whether the line contains the completion of a statement. + +>>> dump_tokens("if False:\\n" +... " # NL\\n" +... " True = False # NEWLINE\\n") +NAME 'if' (1, 0) (1, 2) +NAME 'False' (1, 3) (1, 8) +OP ':' (1, 8) (1, 9) +NEWLINE '\\n' (1, 9) (1, 10) +COMMENT '# NL' (2, 4) (2, 8) +NL '\\n' (2, 8) (2, 9) +INDENT ' ' (3, 0) (3, 4) +NAME 'True' (3, 4) (3, 8) +OP '=' (3, 9) (3, 10) +NAME 'False' (3, 11) (3, 16) +COMMENT '# NEWLINE' (3, 17) (3, 26) +NEWLINE '\\n' (3, 26) (3, 27) +DEDENT '' (4, 0) (4, 0) + + +There will be a bunch more tests of specific source patterns. + +The tokenize module also defines an untokenize function that should +regenerate the original program text from the tokens. + +There are some standard formatting practices that are easy to get right. + +>>> roundtrip("if x == 1:\\n" +... " print x\\n") +if x == 1: + print x + +Some people use different formatting conventions, which makes +untokenize a little trickier. Note that this test involves trailing +whitespace after the colon. Note that we use hex escapes to make the +two trailing blanks apparent in the expected output. + +>>> roundtrip("if x == 1 : \\n" +... " print x\\n") +if x == 1 :\x20\x20 + print x + +Comments need to go in the right place. + +>>> roundtrip("if x == 1:\\n" +... " # A comment by itself.\\n" +... " print x # Comment here, too.\\n" +... " # Another comment.\\n" +... "after_if = True\\n") +if x == 1: + # A comment by itself. + print x # Comment here, too. + # Another comment. +after_if = True + +>>> roundtrip("if (x # The comments need to go in the right place\\n" +... " == 1):\\n" +... " print 'x == 1'\\n") +if (x # The comments need to go in the right place + == 1): + print 'x == 1' + +""" + +import os, glob, random, time, sys from cStringIO import StringIO from test.test_support import (verbose, findfile, is_resource_enabled, TestFailed) -from tokenize import (tokenize, generate_tokens, untokenize, - NUMBER, NAME, OP, STRING) +from tokenize import (tokenize, generate_tokens, untokenize, tok_name, + ENDMARKER, NUMBER, NAME, OP, STRING, COMMENT) + +# How much time in seconds can pass before we print a 'Still working' message. +_PRINT_WORKING_MSG_INTERVAL = 5 * 60 # Test roundtrip for `untokenize`. `f` is a file path. The source code in f # is tokenized, converted back to source code via tokenize.untokenize(), @@ -24,6 +108,23 @@ def test_roundtrip(f): if t1 != t2: raise TestFailed("untokenize() roundtrip failed for %r" % f) +def dump_tokens(s): + """Print out the tokens in s in a table format. + + The ENDMARKER is omitted. + """ + f = StringIO(s) + for type, token, start, end, line in generate_tokens(f.readline): + if type == ENDMARKER: + break + type = tok_name[type] + print "%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals() + +def roundtrip(s): + f = StringIO(s) + source = untokenize(generate_tokens(f.readline)) + print source, + # This is an example from the docs, set up as a doctest. def decistmt(s): """Substitute Decimals for floats in a string of statements. @@ -66,6 +167,8 @@ def test_main(): if verbose: print 'starting...' + next_time = time.time() + _PRINT_WORKING_MSG_INTERVAL + # This displays the tokenization of tokenize_tests.py to stdout, and # regrtest.py checks that this equals the expected output (in the # test/output/ directory). @@ -85,6 +188,12 @@ def test_main(): testfiles = random.sample(testfiles, 10) for f in testfiles: + # Print still working message since this test can be really slow + if next_time <= time.time(): + next_time = time.time() + _PRINT_WORKING_MSG_INTERVAL + print >>sys.__stdout__, ' test_main still working, be patient...' + sys.__stdout__.flush() + test_roundtrip(f) # Test detecton of IndentationError. @@ -105,7 +214,7 @@ def foo(): # Run the doctests in this module. from test import test_tokenize # i.e., this module from test.test_support import run_doctest - run_doctest(test_tokenize) + run_doctest(test_tokenize, verbose) if verbose: print 'finished' |