summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_tokenize.py
diff options
context:
space:
mode:
authorThomas Wouters <thomas@python.org>2006-04-21 10:40:58 (GMT)
committerThomas Wouters <thomas@python.org>2006-04-21 10:40:58 (GMT)
commit49fd7fa4431da299196d74087df4a04f99f9c46f (patch)
tree35ace5fe78d3d52c7a9ab356ab9f6dbf8d4b71f4 /Lib/test/test_tokenize.py
parent9ada3d6e29d5165dadacbe6be07bcd35cfbef59d (diff)
downloadcpython-49fd7fa4431da299196d74087df4a04f99f9c46f.zip
cpython-49fd7fa4431da299196d74087df4a04f99f9c46f.tar.gz
cpython-49fd7fa4431da299196d74087df4a04f99f9c46f.tar.bz2
Merge p3yk branch with the trunk up to revision 45595. This breaks a fair
number of tests, all because of the codecs/_multibytecodecs issue described here (it's not a Py3K issue, just something Py3K discovers): http://mail.python.org/pipermail/python-dev/2006-April/064051.html Hye-Shik Chang promised to look for a fix, so no need to fix it here. The tests that are expected to break are: test_codecencodings_cn test_codecencodings_hk test_codecencodings_jp test_codecencodings_kr test_codecencodings_tw test_codecs test_multibytecodec This merge fixes an actual test failure (test_weakref) in this branch, though, so I believe merging is the right thing to do anyway.
Diffstat (limited to 'Lib/test/test_tokenize.py')
-rw-r--r--Lib/test/test_tokenize.py134
1 files changed, 74 insertions, 60 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index d3c1cc4..b064967 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1,70 +1,30 @@
-from test.test_support import verbose, findfile, is_resource_enabled, TestFailed
import os, glob, random
+from cStringIO import StringIO
+from test.test_support import (verbose, findfile, is_resource_enabled,
+ TestFailed)
from tokenize import (tokenize, generate_tokens, untokenize,
NUMBER, NAME, OP, STRING)
-if verbose:
- print 'starting...'
-
-f = file(findfile('tokenize_tests' + os.extsep + 'txt'))
-tokenize(f.readline)
-f.close()
-
-
-
-###### Test roundtrip for untokenize ##########################
-
+# Test roundtrip for `untokenize`. `f` is a file path. The source code in f
+# is tokenized, converted back to source code via tokenize.untokenize(),
+# and tokenized again from the latter. The test fails if the second
+# tokenization doesn't match the first.
def test_roundtrip(f):
## print 'Testing:', f
- f = file(f)
+ fobj = open(f)
try:
- fulltok = list(generate_tokens(f.readline))
+ fulltok = list(generate_tokens(fobj.readline))
finally:
- f.close()
+ fobj.close()
t1 = [tok[:2] for tok in fulltok]
newtext = untokenize(t1)
readline = iter(newtext.splitlines(1)).next
t2 = [tok[:2] for tok in generate_tokens(readline)]
- assert t1 == t2
-
-
-f = findfile('tokenize_tests' + os.extsep + 'txt')
-test_roundtrip(f)
-
-testdir = os.path.dirname(f) or os.curdir
-testfiles = glob.glob(testdir + os.sep + 'test*.py')
-if not is_resource_enabled('compiler'):
- testfiles = random.sample(testfiles, 10)
-
-for f in testfiles:
- test_roundtrip(f)
-
-
-###### Test detecton of IndentationError ######################
-
-from cStringIO import StringIO
-
-sampleBadText = """
-def foo():
- bar
- baz
-"""
-
-try:
- for tok in generate_tokens(StringIO(sampleBadText).readline):
- pass
-except IndentationError:
- pass
-else:
- raise TestFailed("Did not detect IndentationError:")
-
-
-###### Test example in the docs ###############################
-
-from decimal import Decimal
-from cStringIO import StringIO
+ if t1 != t2:
+ raise TestFailed("untokenize() roundtrip failed for %r" % f)
+# This is an example from the docs, set up as a doctest.
def decistmt(s):
"""Substitute Decimals for floats in a string of statements.
@@ -73,12 +33,21 @@ def decistmt(s):
>>> decistmt(s)
"print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')"
- >>> exec(s)
- -3.21716034272e-007
+ The format of the exponent is inherited from the platform C library.
+ Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
+ we're only showing 12 digits, and the 13th isn't close to 5, the
+ rest of the output should be platform-independent.
+
+ >>> exec(s) #doctest: +ELLIPSIS
+ -3.21716034272e-0...7
+
+ Output from calculations with Decimal should be identical across all
+ platforms.
+
>>> exec(decistmt(s))
-3.217160342717258261933904529E-7
-
"""
+
result = []
g = generate_tokens(StringIO(s).readline) # tokenize the string
for toknum, tokval, _, _, _ in g:
@@ -93,8 +62,53 @@ def decistmt(s):
result.append((toknum, tokval))
return untokenize(result)
-import doctest
-doctest.testmod()
+def test_main():
+ if verbose:
+ print 'starting...'
+
+ # This displays the tokenization of tokenize_tests.py to stdout, and
+ # regrtest.py checks that this equals the expected output (in the
+ # test/output/ directory).
+ f = open(findfile('tokenize_tests' + os.extsep + 'txt'))
+ tokenize(f.readline)
+ f.close()
+
+ # Now run test_roundtrip() over tokenize_test.py too, and over all
+ # (if the "compiler" resource is enabled) or a small random sample (if
+ # "compiler" is not enabled) of the test*.py files.
+ f = findfile('tokenize_tests' + os.extsep + 'txt')
+ test_roundtrip(f)
+
+ testdir = os.path.dirname(f) or os.curdir
+ testfiles = glob.glob(testdir + os.sep + 'test*.py')
+ if not is_resource_enabled('compiler'):
+ testfiles = random.sample(testfiles, 10)
+
+ for f in testfiles:
+ test_roundtrip(f)
+
+ # Test detecton of IndentationError.
+ sampleBadText = """\
+def foo():
+ bar
+ baz
+"""
+
+ try:
+ for tok in generate_tokens(StringIO(sampleBadText).readline):
+ pass
+ except IndentationError:
+ pass
+ else:
+ raise TestFailed("Did not detect IndentationError:")
+
+ # Run the doctests in this module.
+ from test import test_tokenize # i.e., this module
+ from test.test_support import run_doctest
+ run_doctest(test_tokenize)
+
+ if verbose:
+ print 'finished'
-if verbose:
- print 'finished'
+if __name__ == "__main__":
+ test_main()