From 7cf36387e4a4e7f9686274cdfaeaeddc76ff5902 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 20 Jun 2015 19:13:50 -0400 Subject: Remove unused import and remove doctest-only import into doctests. --- Lib/test/test_tokenize.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 9842207..3e8a654 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -5,6 +5,8 @@ The tests can be really simple. Given a small fragment of source code, print out a table with tokens. The ENDMARKER is omitted for brevity. + >>> import glob + >>> dump_tokens("1 + 1") ENCODING 'utf-8' (0, 0) (0, 0) NUMBER '1' (1, 0) (1, 1) @@ -647,7 +649,7 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP, open as tokenize_open, Untokenizer) from io import BytesIO from unittest import TestCase, mock -import os, sys, glob +import os import token def dump_tokens(s): -- cgit v0.12 From 5713b3c5bf0c27d5443e6d3a1cd2ce3495778597 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 20 Jun 2015 19:52:22 -0400 Subject: Issue #20387: Add test capturing failure to roundtrip indented code in tokenize module. --- Lib/test/test_tokenize.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 3e8a654..00a2c2b 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1229,6 +1229,22 @@ class UntokenizeTest(TestCase): self.assertEqual(untokenize(iter(tokens)), b'Hello ') +class TestRoundtrip(TestCase): + def roundtrip(self, code): + if isinstance(code, str): + code = code.encode('utf-8') + return untokenize(tokenize(BytesIO(code).readline)) + + def test_indentation_semantics_retained(self): + """ + Ensure that although whitespace might be mutated in a roundtrip, + the semantic meaning of the indentation remains consistent. + """ + code = "if False:\n\tx=3\n\tx=3\n" + codelines = roundtrip(code).split('\n') + self.assertEqual(codelines[1], codelines[2]) + + __test__ = {"doctests" : doctests, 'decistmt': decistmt} def test_main(): @@ -1239,6 +1255,7 @@ def test_main(): support.run_unittest(TestDetectEncoding) support.run_unittest(TestTokenize) support.run_unittest(UntokenizeTest) + support.run_unittest(TestRoundtrip) if __name__ == "__main__": test_main() -- cgit v0.12 From b6d1cdda8e2160ac647b39776198bf48dc7e656f Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Thu, 25 Jun 2015 22:42:24 -0400 Subject: Issue #20387: Correct test to properly capture expectation. --- Lib/test/test_tokenize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 00a2c2b..6506b67 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1233,7 +1233,7 @@ class TestRoundtrip(TestCase): def roundtrip(self, code): if isinstance(code, str): code = code.encode('utf-8') - return untokenize(tokenize(BytesIO(code).readline)) + return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8') def test_indentation_semantics_retained(self): """ @@ -1241,7 +1241,7 @@ class TestRoundtrip(TestCase): the semantic meaning of the indentation remains consistent. """ code = "if False:\n\tx=3\n\tx=3\n" - codelines = roundtrip(code).split('\n') + codelines = self.roundtrip(code).split('\n') self.assertEqual(codelines[1], codelines[2]) -- cgit v0.12 From e411b6629fb5f7bc01bec89df75737875ce6d8f5 Mon Sep 17 00:00:00 2001 From: Dingyuan Wang Date: Mon, 22 Jun 2015 10:01:12 +0800 Subject: Issue #20387: Restore retention of indentation during untokenize. --- Lib/tokenize.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Lib/tokenize.py b/Lib/tokenize.py index cf18bf9..4d93a83 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -244,6 +244,8 @@ class Untokenizer: def untokenize(self, iterable): it = iter(iterable) + indents = [] + startline = False for t in it: if len(t) == 2: self.compat(t, it) @@ -254,6 +256,21 @@ class Untokenizer: continue if tok_type == ENDMARKER: break + if tok_type == INDENT: + indents.append(token) + continue + elif tok_type == DEDENT: + indents.pop() + self.prev_row, self.prev_col = end + continue + elif tok_type in (NEWLINE, NL): + startline = True + elif startline and indents: + indent = indents[-1] + if start[1] >= len(indent): + self.tokens.append(indent) + self.prev_col = len(indent) + startline = False self.add_whitespace(start) self.tokens.append(token) self.prev_row, self.prev_col = end -- cgit v0.12 From d1d628d552128d73d8876d9af9d6f6ef0ec22857 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 26 Jun 2015 17:45:09 -0400 Subject: Issue #20387: Update Misc/NEWS --- Misc/NEWS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Misc/NEWS b/Misc/NEWS index b2f4960..496a92e 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -60,6 +60,9 @@ Core and Builtins Library ------- +- Issue #20387: Restore semantic round-trip correctness in tokenize/untokenize + for tab-indented blocks. + - Issue #5633: Fixed timeit when the statement is a string and the setup is not. - Issue #24326: Fixed audioop.ratecv() with non-default weightB argument. -- cgit v0.12