From 0c4aca54dcf0c54f299c78aa71fe8f48ff04f9d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Mon, 22 May 2017 15:19:09 -0700 Subject: Make rb'' strings work in lib2to3 (#1724) This partially solves bpo-23894. --- Lib/lib2to3/pgen2/tokenize.py | 23 ++++++++++++++++------- Lib/lib2to3/tests/test_parser.py | 21 +++++++++++++++++++++ 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/Lib/lib2to3/pgen2/tokenize.py b/Lib/lib2to3/pgen2/tokenize.py index fba0fa2..9a0cc1e 100644 --- a/Lib/lib2to3/pgen2/tokenize.py +++ b/Lib/lib2to3/pgen2/tokenize.py @@ -74,10 +74,11 @@ Double = r'[^"\\]*(?:\\.[^"\\]*)*"' Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" # Tail end of """ string. Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' -Triple = group("[ubUB]?[rR]?'''", '[ubUB]?[rR]?"""') +_litprefix = r"(?:[uUrRbB]|[rR][bB]|[bBuU][rR])?" +Triple = group(_litprefix + "'''", _litprefix + '"""') # Single-line ' or " string. -String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'", - r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"') +String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", + _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"') # Because of leftmost-then-longest match semantics, be sure to put the # longest operators first (e.g., if = came before ==, == would get @@ -95,9 +96,9 @@ PlainToken = group(Number, Funny, String, Name) Token = Ignore + PlainToken # First (or only) line of ' or " string. -ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" + +ContStr = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r'\\\r?\n'), - r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' + + _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r'\\\r?\n')) PseudoExtras = group(r'\\\r?\n', Comment, Triple) PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) @@ -111,6 +112,7 @@ endprogs = {"'": re.compile(Single), '"': re.compile(Double), "b'''": single3prog, 'b"""': double3prog, "ur'''": single3prog, 'ur"""': double3prog, "br'''": single3prog, 'br"""': double3prog, + "rb'''": single3prog, 'rb"""': double3prog, "R'''": single3prog, 'R"""': double3prog, "U'''": single3prog, 'U"""': double3prog, "B'''": single3prog, 'B"""': double3prog, @@ -120,6 +122,9 @@ endprogs = {"'": re.compile(Single), '"': re.compile(Double), "bR'''": single3prog, 'bR"""': double3prog, "Br'''": single3prog, 'Br"""': double3prog, "BR'''": single3prog, 'BR"""': double3prog, + "rB'''": single3prog, 'rB"""': double3prog, + "Rb'''": single3prog, 'Rb"""': double3prog, + "RB'''": single3prog, 'RB"""': double3prog, 'r': None, 'R': None, 'u': None, 'U': None, 'b': None, 'B': None} @@ -132,7 +137,9 @@ for t in ("'''", '"""', "ur'''", 'ur"""', "Ur'''", 'Ur"""', "uR'''", 'uR"""', "UR'''", 'UR"""', "br'''", 'br"""', "Br'''", 'Br"""', - "bR'''", 'bR"""', "BR'''", 'BR"""',): + "bR'''", 'bR"""', "BR'''", 'BR"""', + "rb'''", 'rb"""', "Rb'''", 'Rb"""', + "rB'''", 'rB"""', "RB'''", 'RB"""',): triple_quoted[t] = t single_quoted = {} for t in ("'", '"', @@ -142,7 +149,9 @@ for t in ("'", '"', "ur'", 'ur"', "Ur'", 'Ur"', "uR'", 'uR"', "UR'", 'UR"', "br'", 'br"', "Br'", 'Br"', - "bR'", 'bR"', "BR'", 'BR"', ): + "bR'", 'bR"', "BR'", 'BR"', + "rb'", 'rb"', "Rb'", 'Rb"', + "rB'", 'rB"', "RB'", 'RB"',): single_quoted[t] = t tabsize = 8 diff --git a/Lib/lib2to3/tests/test_parser.py b/Lib/lib2to3/tests/test_parser.py index 9a969e8..c79611d 100644 --- a/Lib/lib2to3/tests/test_parser.py +++ b/Lib/lib2to3/tests/test_parser.py @@ -320,6 +320,7 @@ class TestVarAnnotations(GrammarTest): def test_6(self): self.validate("lst: List[int] = []") + class TestExcept(GrammarTest): def test_new(self): s = """ @@ -338,6 +339,26 @@ class TestExcept(GrammarTest): self.validate(s) +class TestStringLiterals(GrammarTest): + prefixes = ("'", '"', + "r'", 'r"', "R'", 'R"', + "u'", 'u"', "U'", 'U"', + "b'", 'b"', "B'", 'B"', + "ur'", 'ur"', "Ur'", 'Ur"', + "uR'", 'uR"', "UR'", 'UR"', + "br'", 'br"', "Br'", 'Br"', + "bR'", 'bR"', "BR'", 'BR"', + "rb'", 'rb"', "Rb'", 'Rb"', + "rB'", 'rB"', "RB'", 'RB"',) + + def test_lit(self): + for pre in self.prefixes: + single = "{p}spamspamspam{s}".format(p=pre, s=pre[-1]) + self.validate(single) + triple = "{p}{s}{s}eggs{s}{s}{s}".format(p=pre, s=pre[-1]) + self.validate(triple) + + # Adapted from Python 3's Lib/test/test_grammar.py:GrammarTests.testAtoms class TestSetLiteral(GrammarTest): def test_1(self): -- cgit v0.12