diff options
author | Guido van Rossum <guido@python.org> | 2008-09-10 17:44:35 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2008-09-10 17:44:35 (GMT) |
commit | 698280df7c321b9986ee000054bb9dbbb32625af (patch) | |
tree | 258427b62da42e27228b837314ace46cde063aa1 | |
parent | 92f8f3e013ccb5bb94b1c6133b9b226590587dba (diff) | |
download | cpython-698280df7c321b9986ee000054bb9dbbb32625af.zip cpython-698280df7c321b9986ee000054bb9dbbb32625af.tar.gz cpython-698280df7c321b9986ee000054bb9dbbb32625af.tar.bz2 |
Issue #3756: make re.escape() handle bytes as well as str.
Patch by Andrew McNamara, reviewed and tweaked by myself.
-rw-r--r-- | Lib/re.py | 46 | ||||
-rw-r--r-- | Lib/test/test_re.py | 14 | ||||
-rw-r--r-- | Misc/NEWS | 2 |
3 files changed, 47 insertions, 15 deletions
@@ -211,23 +211,38 @@ def template(pattern, flags=0): "Compile a template pattern, returning a pattern object" return _compile(pattern, flags|T) -_alphanum = {} -for c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890': - _alphanum[c] = 1 -del c +_alphanum_str = frozenset( + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890") +_alphanum_bytes = frozenset( + b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890") def escape(pattern): "Escape all non-alphanumeric characters in pattern." - s = list(pattern) - alphanum = _alphanum - for i in range(len(pattern)): - c = pattern[i] - if c not in alphanum: - if c == "\000": - s[i] = "\\000" + if isinstance(pattern, str): + alphanum = _alphanum_str + s = list(pattern) + for i in range(len(pattern)): + c = pattern[i] + if c not in alphanum: + if c == "\000": + s[i] = "\\000" + else: + s[i] = "\\" + c + return "".join(s) + else: + alphanum = _alphanum_bytes + s = [] + esc = ord(b"\\") + for c in pattern: + if c in alphanum: + s.append(c) else: - s[i] = "\\" + c - return pattern[:0].join(s) + if c == 0: + s.extend(b"\\000") + else: + s.append(esc) + s.append(c) + return bytes(s) # -------------------------------------------------------------------- # internals @@ -248,7 +263,8 @@ def _compile(*key): pattern, flags = key if isinstance(pattern, _pattern_type): if flags: - raise ValueError('Cannot process flags argument with a compiled pattern') + raise ValueError( + "Cannot process flags argument with a compiled pattern") return pattern if not sre_compile.isstring(pattern): raise TypeError("first argument must be string or compiled pattern") @@ -325,7 +341,7 @@ class Scanner: if i == j: break action = self.lexicon[m.lastindex-1][1] - if hasattr(action, '__call__'): + if hasattr(action, "__call__"): self.match = m action = action(self, m.group()) if action is not None: diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 8229d4a..11fff78 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -416,6 +416,7 @@ class ReTests(unittest.TestCase): def test_re_escape(self): p="" + self.assertEqual(re.escape(p), p) for i in range(0, 256): p = p + chr(i) self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None, @@ -426,6 +427,19 @@ class ReTests(unittest.TestCase): self.assertEqual(pat.match(p) is not None, True) self.assertEqual(pat.match(p).span(), (0,256)) + def test_re_escape_byte(self): + p=b"" + self.assertEqual(re.escape(p), p) + for i in range(0, 256): + b = bytes([i]) + p += b + self.assertEqual(re.match(re.escape(b), b) is not None, True) + self.assertEqual(re.match(re.escape(b), b).span(), (0,1)) + + pat=re.compile(re.escape(p)) + self.assertEqual(pat.match(p) is not None, True) + self.assertEqual(pat.match(p).span(), (0,256)) + def pickle_test(self, pickle): oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)') s = pickle.dumps(oldpat) @@ -96,6 +96,8 @@ C API Library ------- +- Issue #3756: make re.escape() handle bytes as well as str. + - Issue #3800: fix filter() related bug in formatter.py. - Issue #874900: fix behaviour of threading module after a fork. |