diff options
author | Jason R. Coombs <jaraco@jaraco.com> | 2015-06-28 15:10:29 (GMT) |
---|---|---|
committer | Jason R. Coombs <jaraco@jaraco.com> | 2015-06-28 15:10:29 (GMT) |
commit | 50373e6c21e933d2fee7039204bdc51c4475d634 (patch) | |
tree | 9c0017ffce63f76148a467b7a256ff26d5e0379b /Lib | |
parent | 449e2be12b654a9b892648ff5496c6d7dfbb85f9 (diff) | |
parent | 80c29ac1ea1b3968a91d3328fab02084c59ca0e4 (diff) | |
download | cpython-50373e6c21e933d2fee7039204bdc51c4475d634.zip cpython-50373e6c21e933d2fee7039204bdc51c4475d634.tar.gz cpython-50373e6c21e933d2fee7039204bdc51c4475d634.tar.bz2 |
Issue #20387: Merge
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_tokenize.py | 21 | ||||
-rw-r--r-- | Lib/tokenize.py | 17 |
2 files changed, 37 insertions, 1 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 9842207..6506b67 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -5,6 +5,8 @@ The tests can be really simple. Given a small fragment of source code, print out a table with tokens. The ENDMARKER is omitted for brevity. + >>> import glob + >>> dump_tokens("1 + 1") ENCODING 'utf-8' (0, 0) (0, 0) NUMBER '1' (1, 0) (1, 1) @@ -647,7 +649,7 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP, open as tokenize_open, Untokenizer) from io import BytesIO from unittest import TestCase, mock -import os, sys, glob +import os import token def dump_tokens(s): @@ -1227,6 +1229,22 @@ class UntokenizeTest(TestCase): self.assertEqual(untokenize(iter(tokens)), b'Hello ') +class TestRoundtrip(TestCase): + def roundtrip(self, code): + if isinstance(code, str): + code = code.encode('utf-8') + return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8') + + def test_indentation_semantics_retained(self): + """ + Ensure that although whitespace might be mutated in a roundtrip, + the semantic meaning of the indentation remains consistent. + """ + code = "if False:\n\tx=3\n\tx=3\n" + codelines = self.roundtrip(code).split('\n') + self.assertEqual(codelines[1], codelines[2]) + + __test__ = {"doctests" : doctests, 'decistmt': decistmt} def test_main(): @@ -1237,6 +1255,7 @@ def test_main(): support.run_unittest(TestDetectEncoding) support.run_unittest(TestTokenize) support.run_unittest(UntokenizeTest) + support.run_unittest(TestRoundtrip) if __name__ == "__main__": test_main() diff --git a/Lib/tokenize.py b/Lib/tokenize.py index cf18bf9..4d93a83 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -244,6 +244,8 @@ class Untokenizer: def untokenize(self, iterable): it = iter(iterable) + indents = [] + startline = False for t in it: if len(t) == 2: self.compat(t, it) @@ -254,6 +256,21 @@ class Untokenizer: continue if tok_type == ENDMARKER: break + if tok_type == INDENT: + indents.append(token) + continue + elif tok_type == DEDENT: + indents.pop() + self.prev_row, self.prev_col = end + continue + elif tok_type in (NEWLINE, NL): + startline = True + elif startline and indents: + indent = indents[-1] + if start[1] >= len(indent): + self.tokens.append(indent) + self.prev_col = len(indent) + startline = False self.add_whitespace(start) self.tokens.append(token) self.prev_row, self.prev_col = end |