diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-10-03 09:10:49 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-10-03 09:10:49 (GMT) |
commit | 026af2a597d94eb61310dd5360cf901529ef33a2 (patch) | |
tree | e51186f9ea496c40f7759ff160dd5aacb480a620 | |
parent | f55697ca6d430d85437767db9d4ee67e1e041b5c (diff) | |
parent | def0a4c298358c50b0b53a8113551e9aee3532e5 (diff) | |
download | cpython-026af2a597d94eb61310dd5360cf901529ef33a2.zip cpython-026af2a597d94eb61310dd5360cf901529ef33a2.tar.gz cpython-026af2a597d94eb61310dd5360cf901529ef33a2.tar.bz2 |
Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.
-rw-r--r-- | Lib/lib2to3/fixes/fix_unicode.py | 32 | ||||
-rw-r--r-- | Lib/lib2to3/tests/test_fixers.py | 37 | ||||
-rw-r--r-- | Misc/NEWS | 2 |
3 files changed, 64 insertions, 7 deletions
diff --git a/Lib/lib2to3/fixes/fix_unicode.py b/Lib/lib2to3/fixes/fix_unicode.py index d2b3cee..6555397 100644 --- a/Lib/lib2to3/fixes/fix_unicode.py +++ b/Lib/lib2to3/fixes/fix_unicode.py @@ -1,25 +1,43 @@ -"""Fixer that changes unicode to str, unichr to chr, and u"..." into "...". +r"""Fixer for unicode. + +* Changes unicode to str and unichr to chr. + +* If "...\u..." is not unicode literal change it into "...\\u...". + +* Change u"..." into "...". """ -import re from ..pgen2 import token from .. import fixer_base _mapping = {"unichr" : "chr", "unicode" : "str"} -_literal_re = re.compile(r"[uU][rR]?[\'\"]") class FixUnicode(fixer_base.BaseFix): BM_compatible = True PATTERN = "STRING | 'unicode' | 'unichr'" + def start_tree(self, tree, filename): + super(FixUnicode, self).start_tree(tree, filename) + self.unicode_literals = 'unicode_literals' in tree.future_features + def transform(self, node, results): if node.type == token.NAME: new = node.clone() new.value = _mapping[node.value] return new elif node.type == token.STRING: - if _literal_re.match(node.value): - new = node.clone() - new.value = new.value[1:] - return new + val = node.value + if (not self.unicode_literals and val[0] in 'rR\'"' and + '\\' in val): + val = r'\\'.join([ + v.replace('\\u', r'\\u').replace('\\U', r'\\U') + for v in val.split(r'\\') + ]) + if val[0] in 'uU': + val = val[1:] + if val == node.value: + return node + new = node.clone() + new.value = val + return new diff --git a/Lib/lib2to3/tests/test_fixers.py b/Lib/lib2to3/tests/test_fixers.py index 8b19c0a..912bd4c 100644 --- a/Lib/lib2to3/tests/test_fixers.py +++ b/Lib/lib2to3/tests/test_fixers.py @@ -2883,6 +2883,43 @@ class Test_unicode(FixerTestCase): a = """R'''x''' """ self.check(b, a) + def test_native_literal_escape_u(self): + b = r"""'\\\u20ac\U0001d121\\u20ac'""" + a = r"""'\\\\u20ac\\U0001d121\\u20ac'""" + self.check(b, a) + + b = r"""r'\\\u20ac\U0001d121\\u20ac'""" + a = r"""r'\\\\u20ac\\U0001d121\\u20ac'""" + self.check(b, a) + + def test_bytes_literal_escape_u(self): + b = r"""b'\\\u20ac\U0001d121\\u20ac'""" + a = r"""b'\\\u20ac\U0001d121\\u20ac'""" + self.check(b, a) + + b = r"""br'\\\u20ac\U0001d121\\u20ac'""" + a = r"""br'\\\u20ac\U0001d121\\u20ac'""" + self.check(b, a) + + def test_unicode_literal_escape_u(self): + b = r"""u'\\\u20ac\U0001d121\\u20ac'""" + a = r"""'\\\u20ac\U0001d121\\u20ac'""" + self.check(b, a) + + b = r"""ur'\\\u20ac\U0001d121\\u20ac'""" + a = r"""r'\\\u20ac\U0001d121\\u20ac'""" + self.check(b, a) + + def test_native_unicode_literal_escape_u(self): + f = 'from __future__ import unicode_literals\n' + b = f + r"""'\\\u20ac\U0001d121\\u20ac'""" + a = f + r"""'\\\u20ac\U0001d121\\u20ac'""" + self.check(b, a) + + b = f + r"""r'\\\u20ac\U0001d121\\u20ac'""" + a = f + r"""r'\\\u20ac\U0001d121\\u20ac'""" + self.check(b, a) + class Test_callable(FixerTestCase): fixer = "callable" @@ -20,6 +20,8 @@ Core and Builtins Library ------- +- Issue #18037: 2to3 now escapes '\u' and '\U' in native strings. + - Issue #17839: base64.decodebytes and base64.encodebytes now accept any object that exports a 1 dimensional array of bytes (this means the same is now also true for base64_codec) |