summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2008-09-10 17:44:35 (GMT)
committerGuido van Rossum <guido@python.org>2008-09-10 17:44:35 (GMT)
commit698280df7c321b9986ee000054bb9dbbb32625af (patch)
tree258427b62da42e27228b837314ace46cde063aa1
parent92f8f3e013ccb5bb94b1c6133b9b226590587dba (diff)
downloadcpython-698280df7c321b9986ee000054bb9dbbb32625af.zip
cpython-698280df7c321b9986ee000054bb9dbbb32625af.tar.gz
cpython-698280df7c321b9986ee000054bb9dbbb32625af.tar.bz2
Issue #3756: make re.escape() handle bytes as well as str.
Patch by Andrew McNamara, reviewed and tweaked by myself.
-rw-r--r--Lib/re.py46
-rw-r--r--Lib/test/test_re.py14
-rw-r--r--Misc/NEWS2
3 files changed, 47 insertions, 15 deletions
diff --git a/Lib/re.py b/Lib/re.py
index 63a95fd..090ec8a 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -211,23 +211,38 @@ def template(pattern, flags=0):
"Compile a template pattern, returning a pattern object"
return _compile(pattern, flags|T)
-_alphanum = {}
-for c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890':
- _alphanum[c] = 1
-del c
+_alphanum_str = frozenset(
+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
+_alphanum_bytes = frozenset(
+ b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
def escape(pattern):
"Escape all non-alphanumeric characters in pattern."
- s = list(pattern)
- alphanum = _alphanum
- for i in range(len(pattern)):
- c = pattern[i]
- if c not in alphanum:
- if c == "\000":
- s[i] = "\\000"
+ if isinstance(pattern, str):
+ alphanum = _alphanum_str
+ s = list(pattern)
+ for i in range(len(pattern)):
+ c = pattern[i]
+ if c not in alphanum:
+ if c == "\000":
+ s[i] = "\\000"
+ else:
+ s[i] = "\\" + c
+ return "".join(s)
+ else:
+ alphanum = _alphanum_bytes
+ s = []
+ esc = ord(b"\\")
+ for c in pattern:
+ if c in alphanum:
+ s.append(c)
else:
- s[i] = "\\" + c
- return pattern[:0].join(s)
+ if c == 0:
+ s.extend(b"\\000")
+ else:
+ s.append(esc)
+ s.append(c)
+ return bytes(s)
# --------------------------------------------------------------------
# internals
@@ -248,7 +263,8 @@ def _compile(*key):
pattern, flags = key
if isinstance(pattern, _pattern_type):
if flags:
- raise ValueError('Cannot process flags argument with a compiled pattern')
+ raise ValueError(
+ "Cannot process flags argument with a compiled pattern")
return pattern
if not sre_compile.isstring(pattern):
raise TypeError("first argument must be string or compiled pattern")
@@ -325,7 +341,7 @@ class Scanner:
if i == j:
break
action = self.lexicon[m.lastindex-1][1]
- if hasattr(action, '__call__'):
+ if hasattr(action, "__call__"):
self.match = m
action = action(self, m.group())
if action is not None:
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 8229d4a..11fff78 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -416,6 +416,7 @@ class ReTests(unittest.TestCase):
def test_re_escape(self):
p=""
+ self.assertEqual(re.escape(p), p)
for i in range(0, 256):
p = p + chr(i)
self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
@@ -426,6 +427,19 @@ class ReTests(unittest.TestCase):
self.assertEqual(pat.match(p) is not None, True)
self.assertEqual(pat.match(p).span(), (0,256))
+ def test_re_escape_byte(self):
+ p=b""
+ self.assertEqual(re.escape(p), p)
+ for i in range(0, 256):
+ b = bytes([i])
+ p += b
+ self.assertEqual(re.match(re.escape(b), b) is not None, True)
+ self.assertEqual(re.match(re.escape(b), b).span(), (0,1))
+
+ pat=re.compile(re.escape(p))
+ self.assertEqual(pat.match(p) is not None, True)
+ self.assertEqual(pat.match(p).span(), (0,256))
+
def pickle_test(self, pickle):
oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
s = pickle.dumps(oldpat)
diff --git a/Misc/NEWS b/Misc/NEWS
index d22d038..21d269b 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -96,6 +96,8 @@ C API
Library
-------
+- Issue #3756: make re.escape() handle bytes as well as str.
+
- Issue #3800: fix filter() related bug in formatter.py.
- Issue #874900: fix behaviour of threading module after a fork.