diff options
author | Thomas Wouters <thomas@python.org> | 2008-03-18 20:19:54 (GMT) |
---|---|---|
committer | Thomas Wouters <thomas@python.org> | 2008-03-18 20:19:54 (GMT) |
commit | 40a088dc27865eb1236d6c728d2880ecd0022a65 (patch) | |
tree | 9489856463cd6b00e2f40ec200a2d640704d8b50 | |
parent | e8c3d266c8cea759a0eb1c17b9c5f989f87bb135 (diff) | |
download | cpython-40a088dc27865eb1236d6c728d2880ecd0022a65.zip cpython-40a088dc27865eb1236d6c728d2880ecd0022a65.tar.gz cpython-40a088dc27865eb1236d6c728d2880ecd0022a65.tar.bz2 |
Fix 're' to work on bytes. It could do with a few more tests, though.
-rw-r--r-- | Lib/sre_compile.py | 2 | ||||
-rw-r--r-- | Lib/sre_parse.py | 4 | ||||
-rw-r--r-- | Lib/test/test_re.py | 50 |
3 files changed, 22 insertions, 34 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index f3b415d..4f62416 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -472,7 +472,7 @@ def _compile_info(code, pattern, flags): code[skip] = len(code) - skip def isstring(obj): - return isinstance(obj, str) + return isinstance(obj, (str, bytes)) def _code(p, flags): diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index a04c343..6e70024 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -192,8 +192,8 @@ class Tokenizer: char = self.string[self.index:self.index+1] # Special case for the str8, since indexing returns a integer # XXX This is only needed for test_bug_926075 in test_re.py - if isinstance(self.string, bytes): - char = chr(char) + if char and isinstance(char, bytes): + char = chr(char[0]) if char == "\\": try: c = self.string[self.index + 1] diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 28e508c..7aa6996 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -83,33 +83,22 @@ class ReTests(unittest.TestCase): self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'), 'abc\ndef\n') -# This test makes no sense until re supports bytes, and should then probably -# test for the *in*ability to mix bytes and str this way :) -# -# def test_bug_1140(self): -# # re.sub(x, y, b'') should return b'', not '', and -# # re.sub(x, y, '') should return '', not b''. -# # Also: -# # re.sub(x, y, str(x)) should return str(y), and -# # re.sub(x, y, bytes(x)) should return -# # str(y) if isinstance(y, str) else unicode(y). -# for x in 'x', u'x': -# for y in 'y', u'y': -# z = re.sub(x, y, u'') -# self.assertEqual(z, u'') -# self.assertEqual(type(z), unicode) -# # -# z = re.sub(x, y, '') -# self.assertEqual(z, '') -# self.assertEqual(type(z), str) -# # -# z = re.sub(x, y, unicode(x)) -# self.assertEqual(z, y) -# self.assertEqual(type(z), unicode) -# # -# z = re.sub(x, y, str(x)) -# self.assertEqual(z, y) -# self.assertEqual(type(z), type(y)) + def test_bug_1140(self): + # re.sub(x, y, b'') should return b'', not '', and + # re.sub(x, y, '') should return '', not b''. + # Also: + # re.sub(x, y, str(x)) should return str(y), and + # re.sub(x, y, bytes(x)) should return + # str(y) if isinstance(y, str) else unicode(y). + for x in 'x', b'x': + for y in 'y', b'y': + z = re.sub(x, y, b'') + self.assertEqual(z, b'') + self.assertEqual(type(z), bytes) + # + z = re.sub(x, y, '') + self.assertEqual(z, '') + self.assertEqual(type(z), str) def test_bug_1661(self): # Verify that flags do not get silently ignored with compiled patterns @@ -599,10 +588,9 @@ class ReTests(unittest.TestCase): self.assertEqual([item.group(0) for item in iter], [":", "::", ":::"]) - # XXX This needs to be restored for str vs. bytes. -## def test_bug_926075(self): -## self.assert_(re.compile('bug_926075') is not -## re.compile(str8('bug_926075'))) + def test_bug_926075(self): + self.assert_(re.compile('bug_926075') is not + re.compile(b'bug_926075')) def test_bug_931848(self): pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"') |