From 7cf36387e4a4e7f9686274cdfaeaeddc76ff5902 Mon Sep 17 00:00:00 2001
From: "Jason R. Coombs" <jaraco@jaraco.com>
Date: Sat, 20 Jun 2015 19:13:50 -0400
Subject: Remove unused import and remove doctest-only import into doctests.

---
 Lib/test/test_tokenize.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 9842207..3e8a654 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -5,6 +5,8 @@ The tests can be really simple. Given a small fragment of source
 code, print out a table with tokens. The ENDMARKER is omitted for
 brevity.
 
+    >>> import glob
+
     >>> dump_tokens("1 + 1")
     ENCODING   'utf-8'       (0, 0) (0, 0)
     NUMBER     '1'           (1, 0) (1, 1)
@@ -647,7 +649,7 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
                      open as tokenize_open, Untokenizer)
 from io import BytesIO
 from unittest import TestCase, mock
-import os, sys, glob
+import os
 import token
 
 def dump_tokens(s):
-- 
cgit v0.12


From 5713b3c5bf0c27d5443e6d3a1cd2ce3495778597 Mon Sep 17 00:00:00 2001
From: "Jason R. Coombs" <jaraco@jaraco.com>
Date: Sat, 20 Jun 2015 19:52:22 -0400
Subject: Issue #20387: Add test capturing failure to roundtrip indented code
 in tokenize module.

---
 Lib/test/test_tokenize.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 3e8a654..00a2c2b 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1229,6 +1229,22 @@ class UntokenizeTest(TestCase):
         self.assertEqual(untokenize(iter(tokens)), b'Hello ')
 
 
+class TestRoundtrip(TestCase):
+    def roundtrip(self, code):
+        if isinstance(code, str):
+            code = code.encode('utf-8')
+        return untokenize(tokenize(BytesIO(code).readline))
+
+    def test_indentation_semantics_retained(self):
+        """
+        Ensure that although whitespace might be mutated in a roundtrip,
+        the semantic meaning of the indentation remains consistent.
+        """
+        code = "if False:\n\tx=3\n\tx=3\n"
+        codelines = roundtrip(code).split('\n')
+        self.assertEqual(codelines[1], codelines[2])
+
+
 __test__ = {"doctests" : doctests, 'decistmt': decistmt}
 
 def test_main():
@@ -1239,6 +1255,7 @@ def test_main():
     support.run_unittest(TestDetectEncoding)
     support.run_unittest(TestTokenize)
     support.run_unittest(UntokenizeTest)
+    support.run_unittest(TestRoundtrip)
 
 if __name__ == "__main__":
     test_main()
-- 
cgit v0.12


From b6d1cdda8e2160ac647b39776198bf48dc7e656f Mon Sep 17 00:00:00 2001
From: "Jason R. Coombs" <jaraco@jaraco.com>
Date: Thu, 25 Jun 2015 22:42:24 -0400
Subject: Issue #20387: Correct test to properly capture expectation.

---
 Lib/test/test_tokenize.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 00a2c2b..6506b67 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1233,7 +1233,7 @@ class TestRoundtrip(TestCase):
     def roundtrip(self, code):
         if isinstance(code, str):
             code = code.encode('utf-8')
-        return untokenize(tokenize(BytesIO(code).readline))
+        return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
 
     def test_indentation_semantics_retained(self):
         """
@@ -1241,7 +1241,7 @@ class TestRoundtrip(TestCase):
         the semantic meaning of the indentation remains consistent.
         """
         code = "if False:\n\tx=3\n\tx=3\n"
-        codelines = roundtrip(code).split('\n')
+        codelines = self.roundtrip(code).split('\n')
         self.assertEqual(codelines[1], codelines[2])
 
 
-- 
cgit v0.12


From e411b6629fb5f7bc01bec89df75737875ce6d8f5 Mon Sep 17 00:00:00 2001
From: Dingyuan Wang <abcdoyle888@gmail.com>
Date: Mon, 22 Jun 2015 10:01:12 +0800
Subject: Issue #20387: Restore retention of indentation during untokenize.

---
 Lib/tokenize.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index cf18bf9..4d93a83 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -244,6 +244,8 @@ class Untokenizer:
 
     def untokenize(self, iterable):
         it = iter(iterable)
+        indents = []
+        startline = False
         for t in it:
             if len(t) == 2:
                 self.compat(t, it)
@@ -254,6 +256,21 @@ class Untokenizer:
                 continue
             if tok_type == ENDMARKER:
                 break
+            if tok_type == INDENT:
+                indents.append(token)
+                continue
+            elif tok_type == DEDENT:
+                indents.pop()
+                self.prev_row, self.prev_col = end
+                continue
+            elif tok_type in (NEWLINE, NL):
+                startline = True
+            elif startline and indents:
+                indent = indents[-1]
+                if start[1] >= len(indent):
+                    self.tokens.append(indent)
+                    self.prev_col = len(indent)
+                startline = False
             self.add_whitespace(start)
             self.tokens.append(token)
             self.prev_row, self.prev_col = end
-- 
cgit v0.12


From d1d628d552128d73d8876d9af9d6f6ef0ec22857 Mon Sep 17 00:00:00 2001
From: "Jason R. Coombs" <jaraco@jaraco.com>
Date: Fri, 26 Jun 2015 17:45:09 -0400
Subject: Issue #20387: Update Misc/NEWS

---
 Misc/NEWS | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Misc/NEWS b/Misc/NEWS
index b2f4960..496a92e 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -60,6 +60,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #20387: Restore semantic round-trip correctness in tokenize/untokenize
+  for tab-indented blocks.
+
 - Issue #5633: Fixed timeit when the statement is a string and the setup is not.
 
 - Issue #24326: Fixed audioop.ratecv() with non-default weightB argument.
-- 
cgit v0.12