summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_strlit.py9
-rw-r--r--Lib/test/test_tokenize.py22
-rw-r--r--Lib/tokenize.py12
3 files changed, 14 insertions, 29 deletions
diff --git a/Lib/test/test_strlit.py b/Lib/test/test_strlit.py
index 1f041c8..07bc488 100644
--- a/Lib/test/test_strlit.py
+++ b/Lib/test/test_strlit.py
@@ -123,6 +123,15 @@ class TestLiterals(unittest.TestCase):
self.assertRaises(SyntaxError, eval, """ rrb'' """)
self.assertRaises(SyntaxError, eval, """ rbb'' """)
+ def test_eval_str_u(self):
+ self.assertEqual(eval(""" u'x' """), 'x')
+ self.assertEqual(eval(""" U'\u00e4' """), 'ä')
+ self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
+ self.assertRaises(SyntaxError, eval, """ ur'' """)
+ self.assertRaises(SyntaxError, eval, """ ru'' """)
+ self.assertRaises(SyntaxError, eval, """ bu'' """)
+ self.assertRaises(SyntaxError, eval, """ ub'' """)
+
def check_encoding(self, encoding, extra=""):
modname = "xx_" + encoding.replace("-", "_")
fn = os.path.join(self.tmpdir, modname + ".py")
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 4c2e4e2..4e798d7 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -299,24 +299,6 @@ String literals
STRING 'u"abc"' (1, 0) (1, 6)
OP '+' (1, 7) (1, 8)
STRING 'U"abc"' (1, 9) (1, 15)
- >>> dump_tokens("ur'abc' + uR'abc' + Ur'abc' + UR'abc'")
- ENCODING 'utf-8' (0, 0) (0, 0)
- STRING "ur'abc'" (1, 0) (1, 7)
- OP '+' (1, 8) (1, 9)
- STRING "uR'abc'" (1, 10) (1, 17)
- OP '+' (1, 18) (1, 19)
- STRING "Ur'abc'" (1, 20) (1, 27)
- OP '+' (1, 28) (1, 29)
- STRING "UR'abc'" (1, 30) (1, 37)
- >>> dump_tokens('ur"abc" + uR"abc" + Ur"abc" + UR"abc"')
- ENCODING 'utf-8' (0, 0) (0, 0)
- STRING 'ur"abc"' (1, 0) (1, 7)
- OP '+' (1, 8) (1, 9)
- STRING 'uR"abc"' (1, 10) (1, 17)
- OP '+' (1, 18) (1, 19)
- STRING 'Ur"abc"' (1, 20) (1, 27)
- OP '+' (1, 28) (1, 29)
- STRING 'UR"abc"' (1, 30) (1, 37)
>>> dump_tokens("b'abc' + B'abc'")
ENCODING 'utf-8' (0, 0) (0, 0)
@@ -642,7 +624,7 @@ Non-ascii identifiers
Legacy unicode literals:
- >>> dump_tokens("Örter = u'places'\\ngrün = UR'green'")
+ >>> dump_tokens("Örter = u'places'\\ngrün = U'green'")
ENCODING 'utf-8' (0, 0) (0, 0)
NAME 'Örter' (1, 0) (1, 5)
OP '=' (1, 6) (1, 7)
@@ -650,7 +632,7 @@ Legacy unicode literals:
NEWLINE '\\n' (1, 17) (1, 18)
NAME 'grün' (2, 0) (2, 4)
OP '=' (2, 5) (2, 6)
- STRING "UR'green'" (2, 7) (2, 16)
+ STRING "U'green'" (2, 7) (2, 15)
"""
from test import support
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index e41cd6e..0a53435 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -127,7 +127,7 @@ Floatnumber = group(Pointfloat, Expfloat)
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
Number = group(Imagnumber, Floatnumber, Intnumber)
-StringPrefix = r'(?:[uUbB][rR]?|[rR][bB]?)?'
+StringPrefix = r'(?:[bB][rR]?|[rR][bB]?|[uU])?'
# Tail end of ' string.
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
@@ -183,12 +183,8 @@ endpats = {"'": Single, '"': Double,
"rB'''": Single3, 'rB"""': Double3,
"RB'''": Single3, 'RB"""': Double3,
"u'''": Single3, 'u"""': Double3,
- "ur'''": Single3, 'ur"""': Double3,
"R'''": Single3, 'R"""': Double3,
"U'''": Single3, 'U"""': Double3,
- "uR'''": Single3, 'uR"""': Double3,
- "Ur'''": Single3, 'Ur"""': Double3,
- "UR'''": Single3, 'UR"""': Double3,
'r': None, 'R': None, 'b': None, 'B': None,
'u': None, 'U': None}
@@ -201,8 +197,7 @@ for t in ("'''", '"""',
"rb'''", 'rb"""', "rB'''", 'rB"""',
"Rb'''", 'Rb"""', "RB'''", 'RB"""',
"u'''", 'u"""', "U'''", 'U"""',
- "ur'''", 'ur"""', "Ur'''", 'Ur"""',
- "uR'''", 'uR"""', "UR'''", 'UR"""'):
+ ):
triple_quoted[t] = t
single_quoted = {}
for t in ("'", '"',
@@ -213,8 +208,7 @@ for t in ("'", '"',
"rb'", 'rb"', "rB'", 'rB"',
"Rb'", 'Rb"', "RB'", 'RB"' ,
"u'", 'u"', "U'", 'U"',
- "ur'", 'ur"', "Ur'", 'Ur"',
- "uR'", 'uR"', "UR'", 'UR"' ):
+ ):
single_quoted[t] = t
tabsize = 8