From 8d5c0b8c198374d0b88f30f04dd29d1f19c1c913 Mon Sep 17 00:00:00 2001 From: Meador Inge Date: Sat, 16 Jun 2012 21:49:08 -0500 Subject: Issue #15054: Fix incorrect tokenization of 'b' string literals. Patch by Serhiy Storchaka. --- Lib/test/test_tokenize.py | 76 +++++++++++++++++++++++++++++++++++++++++++++++ Lib/tokenize.py | 2 +- Misc/NEWS | 4 +++ 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 915eda9..4c2e4e2 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -289,6 +289,82 @@ String literals OP '+' (1, 29) (1, 30) STRING 'R"ABC"' (1, 31) (1, 37) + >>> dump_tokens("u'abc' + U'abc'") + ENCODING 'utf-8' (0, 0) (0, 0) + STRING "u'abc'" (1, 0) (1, 6) + OP '+' (1, 7) (1, 8) + STRING "U'abc'" (1, 9) (1, 15) + >>> dump_tokens('u"abc" + U"abc"') + ENCODING 'utf-8' (0, 0) (0, 0) + STRING 'u"abc"' (1, 0) (1, 6) + OP '+' (1, 7) (1, 8) + STRING 'U"abc"' (1, 9) (1, 15) + >>> dump_tokens("ur'abc' + uR'abc' + Ur'abc' + UR'abc'") + ENCODING 'utf-8' (0, 0) (0, 0) + STRING "ur'abc'" (1, 0) (1, 7) + OP '+' (1, 8) (1, 9) + STRING "uR'abc'" (1, 10) (1, 17) + OP '+' (1, 18) (1, 19) + STRING "Ur'abc'" (1, 20) (1, 27) + OP '+' (1, 28) (1, 29) + STRING "UR'abc'" (1, 30) (1, 37) + >>> dump_tokens('ur"abc" + uR"abc" + Ur"abc" + UR"abc"') + ENCODING 'utf-8' (0, 0) (0, 0) + STRING 'ur"abc"' (1, 0) (1, 7) + OP '+' (1, 8) (1, 9) + STRING 'uR"abc"' (1, 10) (1, 17) + OP '+' (1, 18) (1, 19) + STRING 'Ur"abc"' (1, 20) (1, 27) + OP '+' (1, 28) (1, 29) + STRING 'UR"abc"' (1, 30) (1, 37) + + >>> dump_tokens("b'abc' + B'abc'") + ENCODING 'utf-8' (0, 0) (0, 0) + STRING "b'abc'" (1, 0) (1, 6) + OP '+' (1, 7) (1, 8) + STRING "B'abc'" (1, 9) (1, 15) + >>> dump_tokens('b"abc" + B"abc"') + ENCODING 'utf-8' (0, 0) (0, 0) + STRING 'b"abc"' (1, 0) (1, 6) + OP '+' (1, 7) (1, 8) + STRING 'B"abc"' (1, 9) (1, 15) + >>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'") + ENCODING 'utf-8' (0, 0) (0, 0) + STRING "br'abc'" (1, 0) (1, 7) + OP '+' (1, 8) (1, 9) + STRING "bR'abc'" (1, 10) (1, 17) + OP '+' (1, 18) (1, 19) + STRING "Br'abc'" (1, 20) (1, 27) + OP '+' (1, 28) (1, 29) + STRING "BR'abc'" (1, 30) (1, 37) + >>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"') + ENCODING 'utf-8' (0, 0) (0, 0) + STRING 'br"abc"' (1, 0) (1, 7) + OP '+' (1, 8) (1, 9) + STRING 'bR"abc"' (1, 10) (1, 17) + OP '+' (1, 18) (1, 19) + STRING 'Br"abc"' (1, 20) (1, 27) + OP '+' (1, 28) (1, 29) + STRING 'BR"abc"' (1, 30) (1, 37) + >>> dump_tokens("rb'abc' + rB'abc' + Rb'abc' + RB'abc'") + ENCODING 'utf-8' (0, 0) (0, 0) + STRING "rb'abc'" (1, 0) (1, 7) + OP '+' (1, 8) (1, 9) + STRING "rB'abc'" (1, 10) (1, 17) + OP '+' (1, 18) (1, 19) + STRING "Rb'abc'" (1, 20) (1, 27) + OP '+' (1, 28) (1, 29) + STRING "RB'abc'" (1, 30) (1, 37) + >>> dump_tokens('rb"abc" + rB"abc" + Rb"abc" + RB"abc"') + ENCODING 'utf-8' (0, 0) (0, 0) + STRING 'rb"abc"' (1, 0) (1, 7) + OP '+' (1, 8) (1, 9) + STRING 'rB"abc"' (1, 10) (1, 17) + OP '+' (1, 18) (1, 19) + STRING 'Rb"abc"' (1, 20) (1, 27) + OP '+' (1, 28) (1, 29) + STRING 'RB"abc"' (1, 30) (1, 37) + Operators >>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass") diff --git a/Lib/tokenize.py b/Lib/tokenize.py index e4c9d3c..e41cd6e 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -127,7 +127,7 @@ Floatnumber = group(Pointfloat, Expfloat) Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]') Number = group(Imagnumber, Floatnumber, Intnumber) -StringPrefix = r'(?:[uU][rR]?|[bB][rR]|[rR][bB]|[rR]|[uU])?' +StringPrefix = r'(?:[uUbB][rR]?|[rR][bB]?)?' # Tail end of ' string. Single = r"[^'\\]*(?:\\.[^'\\]*)*'" diff --git a/Misc/NEWS b/Misc/NEWS index 46f87bc..6c587af 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -27,6 +27,10 @@ Core and Builtins Library ------- +- Issue #15054: A bug in tokenize.tokenize that caused string literals + with 'b' prefixes to be incorrectly tokenized has been fixed. + Patch by Serhiy Storchaka. + - Issue #15006: Allow equality comparison between naive and aware time or datetime objects. -- cgit v0.12