diff options
author | Meador Inge <meadori@gmail.com> | 2012-06-17 02:05:50 (GMT) |
---|---|---|
committer | Meador Inge <meadori@gmail.com> | 2012-06-17 02:05:50 (GMT) |
commit | 43f42fc3cb67433c88e31268767c0cab36422351 (patch) | |
tree | 5f378d62132769aa2c1c2fe4d5d9cf94784c3360 /Lib | |
parent | 7cf66996992eeb7f3ad4c19f960b967e1beb5fa3 (diff) | |
download | cpython-43f42fc3cb67433c88e31268767c0cab36422351.zip cpython-43f42fc3cb67433c88e31268767c0cab36422351.tar.gz cpython-43f42fc3cb67433c88e31268767c0cab36422351.tar.bz2 |
Issue #15054: Fix incorrect tokenization of 'b' and 'br' string literals.
Patch by Serhiy Storchaka.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_tokenize.py | 25 | ||||
-rw-r--r-- | Lib/tokenize.py | 10 |
2 files changed, 30 insertions, 5 deletions
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 38da106..a51e781 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -278,6 +278,31 @@ String literals OP '+' (1, 32) (1, 33) STRING 'UR"ABC"' (1, 34) (1, 41) + >>> dump_tokens("b'abc' + B'abc'") + STRING "b'abc'" (1, 0) (1, 6) + OP '+' (1, 7) (1, 8) + STRING "B'abc'" (1, 9) (1, 15) + >>> dump_tokens('b"abc" + B"abc"') + STRING 'b"abc"' (1, 0) (1, 6) + OP '+' (1, 7) (1, 8) + STRING 'B"abc"' (1, 9) (1, 15) + >>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'") + STRING "br'abc'" (1, 0) (1, 7) + OP '+' (1, 8) (1, 9) + STRING "bR'abc'" (1, 10) (1, 17) + OP '+' (1, 18) (1, 19) + STRING "Br'abc'" (1, 20) (1, 27) + OP '+' (1, 28) (1, 29) + STRING "BR'abc'" (1, 30) (1, 37) + >>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"') + STRING 'br"abc"' (1, 0) (1, 7) + OP '+' (1, 8) (1, 9) + STRING 'bR"abc"' (1, 10) (1, 17) + OP '+' (1, 18) (1, 19) + STRING 'Br"abc"' (1, 20) (1, 27) + OP '+' (1, 28) (1, 29) + STRING 'BR"abc"' (1, 30) (1, 37) + Operators >>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass") diff --git a/Lib/tokenize.py b/Lib/tokenize.py index ae3de54..1cba6e5 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -70,10 +70,10 @@ Double = r'[^"\\]*(?:\\.[^"\\]*)*"' Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" # Tail end of """ string. Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' -Triple = group("[uU]?[rR]?'''", '[uU]?[rR]?"""') +Triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""') # Single-line ' or " string. -String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'", - r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"') +String = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'", + r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"') # Because of leftmost-then-longest match semantics, be sure to put the # longest operators first (e.g., if = came before ==, == would get @@ -91,9 +91,9 @@ PlainToken = group(Number, Funny, String, Name) Token = Ignore + PlainToken # First (or only) line of ' or " string. -ContStr = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" + +ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r'\\\r?\n'), - r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' + + r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r'\\\r?\n')) PseudoExtras = group(r'\\\r?\n', Comment, Triple) PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) |