summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorEzio Melotti <ezio.melotti@gmail.com>2011-03-25 12:25:36 (GMT)
committerEzio Melotti <ezio.melotti@gmail.com>2011-03-25 12:25:36 (GMT)
commit213eb96902387dd1f5af0f5f1b80f3d227fd0186 (patch)
tree21440775038c38ab46b423af0f7ea6ff1bcac18b /Lib
parent32a95a70c919be46f1fa65e0b4d98368cda768e6 (diff)
parentebbf1e67a8c56a79ee62280d32517e77b103bf8e (diff)
downloadcpython-213eb96902387dd1f5af0f5f1b80f3d227fd0186.zip
cpython-213eb96902387dd1f5af0f5f1b80f3d227fd0186.tar.gz
cpython-213eb96902387dd1f5af0f5f1b80f3d227fd0186.tar.bz2
#2650: Merge with 3.1.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/re.py3
-rw-r--r--Lib/test/test_re.py77
2 files changed, 56 insertions, 24 deletions
diff --git a/Lib/re.py b/Lib/re.py
index 92e4e4c..abd7ea2 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -224,8 +224,7 @@ def escape(pattern):
if isinstance(pattern, str):
alphanum = _alphanum_str
s = list(pattern)
- for i in range(len(pattern)):
- c = pattern[i]
+ for i, c in enumerate(pattern):
if c not in alphanum:
if c == "\000":
s[i] = "\\000"
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index e4b33c9..fe8bc34 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1,7 +1,9 @@
from test.support import verbose, run_unittest
import re
from re import Scanner
-import sys, traceback
+import sys
+import string
+import traceback
from weakref import proxy
# Misc tests from Tim Peters' re.doc
@@ -411,31 +413,62 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.search("\s(b)", " b").group(1), "b")
self.assertEqual(re.search("a\s", "a ").group(0), "a ")
+ def assertMatch(self, pattern, text, match=None, span=None,
+ matcher=re.match):
+ if match is None and span is None:
+ # the pattern matches the whole text
+ match = text
+ span = (0, len(text))
+ elif match is None or span is None:
+ raise ValueError('If match is not None, span should be specified '
+ '(and vice versa).')
+ m = matcher(pattern, text)
+ self.assertTrue(m)
+ self.assertEqual(m.group(), match)
+ self.assertEqual(m.span(), span)
+
def test_re_escape(self):
- p=""
- self.assertEqual(re.escape(p), p)
- for i in range(0, 256):
- p = p + chr(i)
- self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
- True)
- self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
-
- pat=re.compile(re.escape(p))
- self.assertEqual(pat.match(p) is not None, True)
- self.assertEqual(pat.match(p).span(), (0,256))
+ alnum_chars = string.ascii_letters + string.digits
+ p = ''.join(chr(i) for i in range(256))
+ for c in p:
+ if c in alnum_chars:
+ self.assertEqual(re.escape(c), c)
+ elif c == '\x00':
+ self.assertEqual(re.escape(c), '\\000')
+ else:
+ self.assertEqual(re.escape(c), '\\' + c)
+ self.assertMatch(re.escape(c), c)
+ self.assertMatch(re.escape(p), p)
def test_re_escape_byte(self):
- p=b""
- self.assertEqual(re.escape(p), p)
- for i in range(0, 256):
+ alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
+ p = bytes(range(256))
+ for i in p:
b = bytes([i])
- p += b
- self.assertEqual(re.match(re.escape(b), b) is not None, True)
- self.assertEqual(re.match(re.escape(b), b).span(), (0,1))
-
- pat=re.compile(re.escape(p))
- self.assertEqual(pat.match(p) is not None, True)
- self.assertEqual(pat.match(p).span(), (0,256))
+ if b in alnum_chars:
+ self.assertEqual(re.escape(b), b)
+ elif i == 0:
+ self.assertEqual(re.escape(b), b'\\000')
+ else:
+ self.assertEqual(re.escape(b), b'\\' + b)
+ self.assertMatch(re.escape(b), b)
+ self.assertMatch(re.escape(p), p)
+
+ def test_re_escape_non_ascii(self):
+ s = 'xxx\u2620\u2620\u2620xxx'
+ s_escaped = re.escape(s)
+ self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
+ self.assertMatch(s_escaped, s)
+ self.assertMatch('.%s+.' % re.escape('\u2620'), s,
+ 'x\u2620\u2620\u2620x', (2, 7), re.search)
+
+ def test_re_escape_non_ascii_bytes(self):
+ b = 'y\u2620y\u2620y'.encode('utf-8')
+ b_escaped = re.escape(b)
+ self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
+ self.assertMatch(b_escaped, b)
+ res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
+ self.assertEqual(len(res), 2)
def pickle_test(self, pickle):
oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')