From da0c025a43bd1c7c9279475ebd8f9edee9e41a0b Mon Sep 17 00:00:00 2001 From: Amaury Forgeot d'Arc Date: Thu, 27 Mar 2008 23:23:54 +0000 Subject: Issue2495: tokenize.untokenize did not insert space between two consecutive string literals: "" "" => """", which is invalid code. Will backport --- Lib/test/test_tokenize.py | 11 ++++++++--- Lib/tokenize.py | 11 ++++++++++- Misc/NEWS | 4 ++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index c29728f..ae5f410 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -487,13 +487,18 @@ Backslash means line continuation, except for comments >>> roundtrip("# Comment \\\\nx = 0") True +Two string literals on the same line + + >>> roundtrip("'' ''") + True + +Test roundtrip on random python modules. +pass the '-ucompiler' option to process the full directory. + >>> >>> tempdir = os.path.dirname(f) or os.curdir >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py")) - XXX: tokenize doesn not support __future__.unicode_literals yet - >>> blacklist = ("test_future4.py",) - >>> testfiles = [f for f in testfiles if not f.endswith(blacklist)] >>> if not test_support.is_resource_enabled("compiler"): ... testfiles = random.sample(testfiles, 10) ... diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 76ab430..0f68b40 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -210,12 +210,21 @@ class Untokenizer: tokval += ' ' if toknum in (NEWLINE, NL): startline = True + prevstring = False for tok in iterable: toknum, tokval = tok[:2] if toknum in (NAME, NUMBER): tokval += ' ' + # Insert a space between two consecutive strings + if toknum == STRING: + if prevstring: + tokval = ' ' + tokval + prevstring = True + else: + prevstring = False + if toknum == INDENT: indents.append(tokval) continue @@ -244,7 +253,7 @@ def untokenize(iterable): t1 = [tok[:2] for tok in generate_tokens(f.readline)] newcode = untokenize(t1) readline = iter(newcode.splitlines(1)).next - t2 = [tok[:2] for tokin generate_tokens(readline)] + t2 = [tok[:2] for tok in generate_tokens(readline)] assert t1 == t2 """ ut = Untokenizer() diff --git a/Misc/NEWS b/Misc/NEWS index ec65705..7264dca 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -76,6 +76,10 @@ Extensions Modules Library ------- +- Issue #2495: tokenize.untokenize now inserts a space between two consecutive + string literals; previously, ["" ""] was rendered as [""""], which is + incorrect python code. + - Issue #2248: return the result of the QUIT command. from SMTP.quit(). - Backport of Python 3.0's io module. -- cgit v0.12