diff options
author | Ezio Melotti <ezio.melotti@gmail.com> | 2010-03-06 15:27:04 (GMT) |
---|---|---|
committer | Ezio Melotti <ezio.melotti@gmail.com> | 2010-03-06 15:27:04 (GMT) |
commit | dab886ab0fc719dc990f5d06dc99f7ec41807827 (patch) | |
tree | 09c9b52438420a97ac82d37147a53f42aacdd06a | |
parent | c0ddee54b066e937b151a8598be5dba7f45f2405 (diff) | |
download | cpython-dab886ab0fc719dc990f5d06dc99f7ec41807827.zip cpython-dab886ab0fc719dc990f5d06dc99f7ec41807827.tar.gz cpython-dab886ab0fc719dc990f5d06dc99f7ec41807827.tar.bz2 |
Merged revisions 78729 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k
........
r78729 | ezio.melotti | 2010-03-06 17:24:08 +0200 (Sat, 06 Mar 2010) | 1 line
#6509: fix re.sub to work properly when the pattern, the string, and the replacement were all bytes. Patch by Antoine Pitrou.
........
-rw-r--r-- | Lib/sre_parse.py | 8 | ||||
-rw-r--r-- | Lib/test/test_re.py | 18 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
3 files changed, 28 insertions, 1 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index bc71b58..13737ca 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -786,12 +786,18 @@ def parse_template(source, pattern): groups = [] groupsappend = groups.append literals = [None] * len(p) + if isinstance(source, str): + encode = lambda x: x + else: + # The tokenizer implicitly decodes bytes objects as latin-1, we must + # therefore re-encode the final representation. + encode = lambda x: x.encode('latin1') for c, s in p: if c is MARK: groupsappend((i, s)) # literal[i] is already None else: - literals[i] = s + literals[i] = encode(s) i = i + 1 return groups, literals diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 99cc47b..44b5dfe 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -696,6 +696,24 @@ class ReTests(unittest.TestCase): self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE) self.assertRaises(ValueError, re.compile, '(?au)\w') + def test_bug_6509(self): + # Replacement strings of both types must parse properly. + # all strings + pat = re.compile('a(\w)') + self.assertEqual(pat.sub('b\\1', 'ac'), 'bc') + pat = re.compile('a(.)') + self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234') + pat = re.compile('..') + self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str') + + # all bytes + pat = re.compile(b'a(\w)') + self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc') + pat = re.compile(b'a(.)') + self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD') + pat = re.compile(b'..') + self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes') + def test_dealloc(self): # issue 3299: check for segfault in debug build import _sre @@ -100,6 +100,9 @@ Core and Builtins Library ------- +- Issue #6509: fix re.sub to work properly when the pattern, the string, and + the replacement were all bytes. Patch by Antoine Pitrou. + - Issue #1054943: Fix unicodedata.normalize('NFC', text) for the Public Review Issue #29 |