diff options
author | Meador Inge <meadori@gmail.com> | 2012-06-17 02:49:08 (GMT) |
---|---|---|
committer | Meador Inge <meadori@gmail.com> | 2012-06-17 02:49:08 (GMT) |
commit | 8d5c0b8c198374d0b88f30f04dd29d1f19c1c913 (patch) | |
tree | 82c76c228a2074c074e58249dc57b05f0d2776ef | |
parent | 2d9db1dfceeb4e9b2c9572bd1abdbc3bc2b663e5 (diff) | |
download | cpython-8d5c0b8c198374d0b88f30f04dd29d1f19c1c913.zip cpython-8d5c0b8c198374d0b88f30f04dd29d1f19c1c913.tar.gz cpython-8d5c0b8c198374d0b88f30f04dd29d1f19c1c913.tar.bz2 |
Issue #15054: Fix incorrect tokenization of 'b' string literals.
Patch by Serhiy Storchaka.
-rw-r--r-- | Lib/test/test_tokenize.py | 76 | ||||
-rw-r--r-- | Lib/tokenize.py | 2 | ||||
-rw-r--r-- | Misc/NEWS | 4 |
3 files changed, 81 insertions, 1 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 915eda9..4c2e4e2 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -289,6 +289,82 @@ String literals OP '+' (1, 29) (1, 30) STRING 'R"ABC"' (1, 31) (1, 37) + >>> dump_tokens("u'abc' + U'abc'") + ENCODING 'utf-8' (0, 0) (0, 0) + STRING "u'abc'" (1, 0) (1, 6) + OP '+' (1, 7) (1, 8) + STRING "U'abc'" (1, 9) (1, 15) + >>> dump_tokens('u"abc" + U"abc"') + ENCODING 'utf-8' (0, 0) (0, 0) + STRING 'u"abc"' (1, 0) (1, 6) + OP '+' (1, 7) (1, 8) + STRING 'U"abc"' (1, 9) (1, 15) + >>> dump_tokens("ur'abc' + uR'abc' + Ur'abc' + UR'abc'") + ENCODING 'utf-8' (0, 0) (0, 0) + STRING "ur'abc'" (1, 0) (1, 7) + OP '+' (1, 8) (1, 9) + STRING "uR'abc'" (1, 10) (1, 17) + OP '+' (1, 18) (1, 19) + STRING "Ur'abc'" (1, 20) (1, 27) + OP '+' (1, 28) (1, 29) + STRING "UR'abc'" (1, 30) (1, 37) + >>> dump_tokens('ur"abc" + uR"abc" + Ur"abc" + UR"abc"') + ENCODING 'utf-8' (0, 0) (0, 0) + STRING 'ur"abc"' (1, 0) (1, 7) + OP '+' (1, 8) (1, 9) + STRING 'uR"abc"' (1, 10) (1, 17) + OP '+' (1, 18) (1, 19) + STRING 'Ur"abc"' (1, 20) (1, 27) + OP '+' (1, 28) (1, 29) + STRING 'UR"abc"' (1, 30) (1, 37) + + >>> dump_tokens("b'abc' + B'abc'") + ENCODING 'utf-8' (0, 0) (0, 0) + STRING "b'abc'" (1, 0) (1, 6) + OP '+' (1, 7) (1, 8) + STRING "B'abc'" (1, 9) (1, 15) + >>> dump_tokens('b"abc" + B"abc"') + ENCODING 'utf-8' (0, 0) (0, 0) + STRING 'b"abc"' (1, 0) (1, 6) + OP '+' (1, 7) (1, 8) + STRING 'B"abc"' (1, 9) (1, 15) + >>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'") + ENCODING 'utf-8' (0, 0) (0, 0) + STRING "br'abc'" (1, 0) (1, 7) + OP '+' (1, 8) (1, 9) + STRING "bR'abc'" (1, 10) (1, 17) + OP '+' (1, 18) (1, 19) + STRING "Br'abc'" (1, 20) (1, 27) + OP '+' (1, 28) (1, 29) + STRING "BR'abc'" (1, 30) (1, 37) + >>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"') + ENCODING 'utf-8' (0, 0) (0, 0) + STRING 'br"abc"' (1, 0) (1, 7) + OP '+' (1, 8) (1, 9) + STRING 'bR"abc"' (1, 10) (1, 17) + OP '+' (1, 18) (1, 19) + STRING 'Br"abc"' (1, 20) (1, 27) + OP '+' (1, 28) (1, 29) + STRING 'BR"abc"' (1, 30) (1, 37) + >>> dump_tokens("rb'abc' + rB'abc' + Rb'abc' + RB'abc'") + ENCODING 'utf-8' (0, 0) (0, 0) + STRING "rb'abc'" (1, 0) (1, 7) + OP '+' (1, 8) (1, 9) + STRING "rB'abc'" (1, 10) (1, 17) + OP '+' (1, 18) (1, 19) + STRING "Rb'abc'" (1, 20) (1, 27) + OP '+' (1, 28) (1, 29) + STRING "RB'abc'" (1, 30) (1, 37) + >>> dump_tokens('rb"abc" + rB"abc" + Rb"abc" + RB"abc"') + ENCODING 'utf-8' (0, 0) (0, 0) + STRING 'rb"abc"' (1, 0) (1, 7) + OP '+' (1, 8) (1, 9) + STRING 'rB"abc"' (1, 10) (1, 17) + OP '+' (1, 18) (1, 19) + STRING 'Rb"abc"' (1, 20) (1, 27) + OP '+' (1, 28) (1, 29) + STRING 'RB"abc"' (1, 30) (1, 37) + Operators >>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass") diff --git a/Lib/tokenize.py b/Lib/tokenize.py index e4c9d3c..e41cd6e 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -127,7 +127,7 @@ Floatnumber = group(Pointfloat, Expfloat) Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]') Number = group(Imagnumber, Floatnumber, Intnumber) -StringPrefix = r'(?:[uU][rR]?|[bB][rR]|[rR][bB]|[rR]|[uU])?' +StringPrefix = r'(?:[uUbB][rR]?|[rR][bB]?)?' # Tail end of ' string. Single = r"[^'\\]*(?:\\.[^'\\]*)*'" @@ -27,6 +27,10 @@ Core and Builtins Library ------- +- Issue #15054: A bug in tokenize.tokenize that caused string literals + with 'b' prefixes to be incorrectly tokenized has been fixed. + Patch by Serhiy Storchaka. + - Issue #15006: Allow equality comparison between naive and aware time or datetime objects. |