Issue2495: tokenize.untokenize did not insert space between two consecutive string literals:

"" "" => """", which is invalid code. Will backport
author: Amaury Forgeot d'Arc <amauryfa@gmail.com> 2008-03-27 23:23:54 (GMT)
committer: Amaury Forgeot d'Arc <amauryfa@gmail.com> 2008-03-27 23:23:54 (GMT)
commit: da0c025a43bd1c7c9279475ebd8f9edee9e41a0b (patch)
tree: 4799462e1ad68f4ddaae2e7cc793db406a384b1e
parent: 853e44ca8c60318ed6189eedca21e28570d79e83 (diff)
download: cpython-da0c025a43bd1c7c9279475ebd8f9edee9e41a0b.zip
cpython-da0c025a43bd1c7c9279475ebd8f9edee9e41a0b.tar.gz
cpython-da0c025a43bd1c7c9279475ebd8f9edee9e41a0b.tar.bz2
3 files changed, 22 insertions, 4 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index c29728f..ae5f410 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -487,13 +487,18 @@ Backslash means line continuation, except for comments
     >>> roundtrip("# Comment \\\\nx = 0")
     True
 
+Two string literals on the same line
+
+    >>> roundtrip("'' ''")
+    True
+
+Test roundtrip on random python modules.
+pass the '-ucompiler' option to process the full directory.
+
     >>>
     >>> tempdir = os.path.dirname(f) or os.curdir
     >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
 
-    XXX: tokenize doesn not support __future__.unicode_literals yet
-    >>> blacklist = ("test_future4.py",)
-    >>> testfiles = [f for f in testfiles if not f.endswith(blacklist)]
     >>> if not test_support.is_resource_enabled("compiler"):
     ...     testfiles = random.sample(testfiles, 10)
     ...
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 76ab430..0f68b40 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -210,12 +210,21 @@ class Untokenizer:
             tokval += ' '
         if toknum in (NEWLINE, NL):
             startline = True
+        prevstring = False
         for tok in iterable:
             toknum, tokval = tok[:2]
 
             if toknum in (NAME, NUMBER):
                 tokval += ' '
 
+            # Insert a space between two consecutive strings
+            if toknum == STRING:
+                if prevstring:
+                    tokval = ' ' + tokval
+                prevstring = True
+            else:
+                prevstring = False
+
             if toknum == INDENT:
                 indents.append(tokval)
                 continue
@@ -244,7 +253,7 @@ def untokenize(iterable):
         t1 = [tok[:2] for tok in generate_tokens(f.readline)]
         newcode = untokenize(t1)
         readline = iter(newcode.splitlines(1)).next
-        t2 = [tok[:2] for tokin generate_tokens(readline)]
+        t2 = [tok[:2] for tok in generate_tokens(readline)]
         assert t1 == t2
     """
     ut = Untokenizer()
diff --git a/Misc/NEWS b/Misc/NEWS
index ec65705..7264dca 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -76,6 +76,10 @@ Extensions Modules
 Library
 -------
 
+- Issue #2495: tokenize.untokenize now inserts a space between two consecutive
+  string literals; previously, ["" ""] was rendered as [""""], which is
+  incorrect python code.
+
 - Issue #2248: return the result of the QUIT command. from SMTP.quit().
 
 - Backport of Python 3.0's io module.
author	Amaury Forgeot d'Arc <amauryfa@gmail.com>	2008-03-27 23:23:54 (GMT)
committer	Amaury Forgeot d'Arc <amauryfa@gmail.com>	2008-03-27 23:23:54 (GMT)
commit	da0c025a43bd1c7c9279475ebd8f9edee9e41a0b (patch)
tree	4799462e1ad68f4ddaae2e7cc793db406a384b1e
parent	853e44ca8c60318ed6189eedca21e28570d79e83 (diff)
download	cpython-da0c025a43bd1c7c9279475ebd8f9edee9e41a0b.zip cpython-da0c025a43bd1c7c9279475ebd8f9edee9e41a0b.tar.gz cpython-da0c025a43bd1c7c9279475ebd8f9edee9e41a0b.tar.bz2