From d2114ebd970ec2b9b704dfd6b665b3b2940209b7 Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Fri, 25 Mar 2011 14:08:44 +0200 Subject: #2650: Refactor the tests for re.escape. --- Lib/test/test_re.py | 62 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 22 deletions(-) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 268d66d..86eda54 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1,7 +1,10 @@ from test.support import verbose, run_unittest import re from re import Scanner -import sys, os, traceback +import os +import sys +import string +import traceback from weakref import proxy # Misc tests from Tim Peters' re.doc @@ -411,31 +414,46 @@ class ReTests(unittest.TestCase): self.assertEqual(re.search("\s(b)", " b").group(1), "b") self.assertEqual(re.search("a\s", "a ").group(0), "a ") + def assertMatch(self, pattern, text, match=None, span=None, + matcher=re.match): + if match is None and span is None: + # the pattern matches the whole text + match = text + span = (0, len(text)) + elif match is None or span is None: + raise ValueError('If match is not None, span should be specified ' + '(and vice versa).') + m = matcher(pattern, text) + self.assertTrue(m) + self.assertEqual(m.group(), match) + self.assertEqual(m.span(), span) + def test_re_escape(self): - p="" - self.assertEqual(re.escape(p), p) - for i in range(0, 256): - p = p + chr(i) - self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None, - True) - self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1)) - - pat=re.compile(re.escape(p)) - self.assertEqual(pat.match(p) is not None, True) - self.assertEqual(pat.match(p).span(), (0,256)) + alnum_chars = string.ascii_letters + string.digits + p = ''.join(chr(i) for i in range(256)) + for c in p: + if c in alnum_chars: + self.assertEqual(re.escape(c), c) + elif c == '\x00': + self.assertEqual(re.escape(c), '\\000') + else: + self.assertEqual(re.escape(c), '\\' + c) + self.assertMatch(re.escape(c), c) + self.assertMatch(re.escape(p), p) def test_re_escape_byte(self): - p=b"" - self.assertEqual(re.escape(p), p) - for i in range(0, 256): + alnum_chars = (string.ascii_letters + string.digits).encode('ascii') + p = bytes(range(256)) + for i in p: b = bytes([i]) - p += b - self.assertEqual(re.match(re.escape(b), b) is not None, True) - self.assertEqual(re.match(re.escape(b), b).span(), (0,1)) - - pat=re.compile(re.escape(p)) - self.assertEqual(pat.match(p) is not None, True) - self.assertEqual(pat.match(p).span(), (0,256)) + if b in alnum_chars: + self.assertEqual(re.escape(b), b) + elif i == 0: + self.assertEqual(re.escape(b), b'\\000') + else: + self.assertEqual(re.escape(b), b'\\' + b) + self.assertMatch(re.escape(b), b) + self.assertMatch(re.escape(p), p) def pickle_test(self, pickle): oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)') -- cgit v0.12 From 7b9e97b48765780ec71db330022bc68ba73a4b19 Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Fri, 25 Mar 2011 14:09:33 +0200 Subject: #2650: Add tests with non-ascii chars for re.escape. --- Lib/test/test_re.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 86eda54..5ad44dd 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -455,6 +455,22 @@ class ReTests(unittest.TestCase): self.assertMatch(re.escape(b), b) self.assertMatch(re.escape(p), p) + def test_re_escape_non_ascii(self): + s = 'xxx\u2620\u2620\u2620xxx' + s_escaped = re.escape(s) + self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx') + self.assertMatch(s_escaped, s) + self.assertMatch('.%s+.' % re.escape('\u2620'), s, + 'x\u2620\u2620\u2620x', (2, 7), re.search) + + def test_re_escape_non_ascii_bytes(self): + b = 'y\u2620y\u2620y'.encode('utf-8') + b_escaped = re.escape(b) + self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y') + self.assertMatch(b_escaped, b) + res = re.findall(re.escape('\u2620'.encode('utf-8')), b) + self.assertEqual(len(res), 2) + def pickle_test(self, pickle): oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)') s = pickle.dumps(oldpat) -- cgit v0.12 From ebbf1e67a8c56a79ee62280d32517e77b103bf8e Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Fri, 25 Mar 2011 14:19:30 +0200 Subject: #2650: Refactor re.escape to use enumerate(). --- Lib/re.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/re.py b/Lib/re.py index 9bd913a..309afef 100644 --- a/Lib/re.py +++ b/Lib/re.py @@ -223,8 +223,7 @@ def escape(pattern): if isinstance(pattern, str): alphanum = _alphanum_str s = list(pattern) - for i in range(len(pattern)): - c = pattern[i] + for i, c in enumerate(pattern): if c not in alphanum: if c == "\000": s[i] = "\\000" -- cgit v0.12