summaryrefslogtreecommitdiffstats
path: root/Lib/lib2to3/fixes/fix_unicode.py
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2013-10-03 09:08:38 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2013-10-03 09:08:38 (GMT)
commitdef0a4c298358c50b0b53a8113551e9aee3532e5 (patch)
tree7fc497bed47259280aa94fc9d6fcd34388a93882 /Lib/lib2to3/fixes/fix_unicode.py
parent2a8b3f26b91d6f1774c73fe4f545d2eb94475525 (diff)
downloadcpython-def0a4c298358c50b0b53a8113551e9aee3532e5.zip
cpython-def0a4c298358c50b0b53a8113551e9aee3532e5.tar.gz
cpython-def0a4c298358c50b0b53a8113551e9aee3532e5.tar.bz2
Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.
Diffstat (limited to 'Lib/lib2to3/fixes/fix_unicode.py')
-rw-r--r--Lib/lib2to3/fixes/fix_unicode.py32
1 files changed, 25 insertions, 7 deletions
diff --git a/Lib/lib2to3/fixes/fix_unicode.py b/Lib/lib2to3/fixes/fix_unicode.py
index d2b3cee..6555397 100644
--- a/Lib/lib2to3/fixes/fix_unicode.py
+++ b/Lib/lib2to3/fixes/fix_unicode.py
@@ -1,25 +1,43 @@
-"""Fixer that changes unicode to str, unichr to chr, and u"..." into "...".
+r"""Fixer for unicode.
+
+* Changes unicode to str and unichr to chr.
+
+* If "...\u..." is not unicode literal change it into "...\\u...".
+
+* Change u"..." into "...".
"""
-import re
from ..pgen2 import token
from .. import fixer_base
_mapping = {"unichr" : "chr", "unicode" : "str"}
-_literal_re = re.compile(r"[uU][rR]?[\'\"]")
class FixUnicode(fixer_base.BaseFix):
BM_compatible = True
PATTERN = "STRING | 'unicode' | 'unichr'"
+ def start_tree(self, tree, filename):
+ super(FixUnicode, self).start_tree(tree, filename)
+ self.unicode_literals = 'unicode_literals' in tree.future_features
+
def transform(self, node, results):
if node.type == token.NAME:
new = node.clone()
new.value = _mapping[node.value]
return new
elif node.type == token.STRING:
- if _literal_re.match(node.value):
- new = node.clone()
- new.value = new.value[1:]
- return new
+ val = node.value
+ if (not self.unicode_literals and val[0] in 'rR\'"' and
+ '\\' in val):
+ val = r'\\'.join([
+ v.replace('\\u', r'\\u').replace('\\U', r'\\U')
+ for v in val.split(r'\\')
+ ])
+ if val[0] in 'uU':
+ val = val[1:]
+ if val == node.value:
+ return node
+ new = node.clone()
+ new.value = val
+ return new