summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_re.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_re.py')
-rw-r--r--Lib/test/test_re.py147
1 files changed, 131 insertions, 16 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index f093812..8d63fac 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -3,10 +3,12 @@ from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
import io
import re
from re import Scanner
+import sre_compile
import sre_constants
import sys
import string
import traceback
+import unittest
from weakref import proxy
# Misc tests from Tim Peters' re.doc
@@ -15,10 +17,26 @@ from weakref import proxy
# what you're doing. Some of these tests were carefully modeled to
# cover most of the code.
-import unittest
+class S(str):
+ def __getitem__(self, index):
+ return S(super().__getitem__(index))
+
+class B(bytes):
+ def __getitem__(self, index):
+ return B(super().__getitem__(index))
class ReTests(unittest.TestCase):
+ def assertTypedEqual(self, actual, expect, msg=None):
+ self.assertEqual(actual, expect, msg)
+ def recurse(actual, expect):
+ if isinstance(expect, (tuple, list)):
+ for x, y in zip(actual, expect):
+ recurse(x, y)
+ else:
+ self.assertIs(type(actual), type(expect), msg)
+ recurse(actual, expect)
+
def test_keep_buffer(self):
# See bug 14212
b = bytearray(b'x')
@@ -53,6 +71,15 @@ class ReTests(unittest.TestCase):
return str(int_value + 1)
def test_basic_re_sub(self):
+ self.assertTypedEqual(re.sub('y', 'a', 'xyz'), 'xaz')
+ self.assertTypedEqual(re.sub('y', S('a'), S('xyz')), 'xaz')
+ self.assertTypedEqual(re.sub(b'y', b'a', b'xyz'), b'xaz')
+ self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz')
+ self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz')
+ self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')), b'xaz')
+ for y in ("\xe0", "\u0430", "\U0001d49c"):
+ self.assertEqual(re.sub(y, 'a', 'x%sz' % y), 'xaz')
+
self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
'9.3 -3 24x100y')
@@ -210,10 +237,29 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
def test_re_split(self):
- self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
- self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
- self.assertEqual(re.split("(:*)", ":a:b::c"),
- ['', ':', 'a', ':', 'b', '::', 'c'])
+ for string in ":a:b::c", S(":a:b::c"):
+ self.assertTypedEqual(re.split(":", string),
+ ['', 'a', 'b', '', 'c'])
+ self.assertTypedEqual(re.split(":*", string),
+ ['', 'a', 'b', 'c'])
+ self.assertTypedEqual(re.split("(:*)", string),
+ ['', ':', 'a', ':', 'b', '::', 'c'])
+ for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"),
+ memoryview(b":a:b::c")):
+ self.assertTypedEqual(re.split(b":", string),
+ [b'', b'a', b'b', b'', b'c'])
+ self.assertTypedEqual(re.split(b":*", string),
+ [b'', b'a', b'b', b'c'])
+ self.assertTypedEqual(re.split(b"(:*)", string),
+ [b'', b':', b'a', b':', b'b', b'::', b'c'])
+ for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432",
+ "\U0001d49c\U0001d49e\U0001d4b5"):
+ string = ":%s:%s::%s" % (a, b, c)
+ self.assertEqual(re.split(":", string), ['', a, b, '', c])
+ self.assertEqual(re.split(":*", string), ['', a, b, c])
+ self.assertEqual(re.split("(:*)", string),
+ ['', ':', a, ':', b, '::', c])
+
self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
self.assertEqual(re.split("(:)*", ":a:b::c"),
['', ':', 'a', ':', 'b', ':', 'c'])
@@ -235,22 +281,53 @@ class ReTests(unittest.TestCase):
def test_re_findall(self):
self.assertEqual(re.findall(":+", "abc"), [])
- self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
- self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
- self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
- (":", ":"),
- (":", "::")])
+ for string in "a:b::c:::d", S("a:b::c:::d"):
+ self.assertTypedEqual(re.findall(":+", string),
+ [":", "::", ":::"])
+ self.assertTypedEqual(re.findall("(:+)", string),
+ [":", "::", ":::"])
+ self.assertTypedEqual(re.findall("(:)(:*)", string),
+ [(":", ""), (":", ":"), (":", "::")])
+ for string in (b"a:b::c:::d", B(b"a:b::c:::d"), bytearray(b"a:b::c:::d"),
+ memoryview(b"a:b::c:::d")):
+ self.assertTypedEqual(re.findall(b":+", string),
+ [b":", b"::", b":::"])
+ self.assertTypedEqual(re.findall(b"(:+)", string),
+ [b":", b"::", b":::"])
+ self.assertTypedEqual(re.findall(b"(:)(:*)", string),
+ [(b":", b""), (b":", b":"), (b":", b"::")])
+ for x in ("\xe0", "\u0430", "\U0001d49c"):
+ xx = x * 2
+ xxx = x * 3
+ string = "a%sb%sc%sd" % (x, xx, xxx)
+ self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx])
+ self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx])
+ self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string),
+ [(x, ""), (x, x), (x, xx)])
def test_bug_117612(self):
self.assertEqual(re.findall(r"(a|(b))", "aba"),
[("a", ""),("b", "b"),("a", "")])
def test_re_match(self):
- self.assertEqual(re.match('a', 'a').groups(), ())
- self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
- self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
- self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
- self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
+ for string in 'a', S('a'):
+ self.assertEqual(re.match('a', string).groups(), ())
+ self.assertEqual(re.match('(a)', string).groups(), ('a',))
+ self.assertEqual(re.match('(a)', string).group(0), 'a')
+ self.assertEqual(re.match('(a)', string).group(1), 'a')
+ self.assertEqual(re.match('(a)', string).group(1, 1), ('a', 'a'))
+ for string in b'a', B(b'a'), bytearray(b'a'), memoryview(b'a'):
+ self.assertEqual(re.match(b'a', string).groups(), ())
+ self.assertEqual(re.match(b'(a)', string).groups(), (b'a',))
+ self.assertEqual(re.match(b'(a)', string).group(0), b'a')
+ self.assertEqual(re.match(b'(a)', string).group(1), b'a')
+ self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a'))
+ for a in ("\xe0", "\u0430", "\U0001d49c"):
+ self.assertEqual(re.match(a, a).groups(), ())
+ self.assertEqual(re.match('(%s)' % a, a).groups(), (a,))
+ self.assertEqual(re.match('(%s)' % a, a).group(0), a)
+ self.assertEqual(re.match('(%s)' % a, a).group(1), a)
+ self.assertEqual(re.match('(%s)' % a, a).group(1, 1), (a, a))
pat = re.compile('((a)|(b))(c)?')
self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
@@ -1053,6 +1130,28 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
[b'xyz'], msg=pattern)
+ def test_match_repr(self):
+ for string in '[abracadabra]', S('[abracadabra]'):
+ m = re.search(r'(.+)(.*?)\1', string)
+ self.assertEqual(repr(m), "<%s.%s object; "
+ "span=(1, 12), match='abracadabra'>" %
+ (type(m).__module__, type(m).__qualname__))
+ for string in (b'[abracadabra]', B(b'[abracadabra]'),
+ bytearray(b'[abracadabra]'),
+ memoryview(b'[abracadabra]')):
+ m = re.search(rb'(.+)(.*?)\1', string)
+ self.assertEqual(repr(m), "<%s.%s object; "
+ "span=(1, 12), match=b'abracadabra'>" %
+ (type(m).__module__, type(m).__qualname__))
+
+ first, second = list(re.finditer("(aa)|(bb)", "aa bb"))
+ self.assertEqual(repr(first), "<%s.%s object; "
+ "span=(0, 2), match='aa'>" %
+ (type(second).__module__, type(first).__qualname__))
+ self.assertEqual(repr(second), "<%s.%s object; "
+ "span=(3, 5), match='bb'>" %
+ (type(second).__module__, type(second).__qualname__))
+
def test_bug_2537(self):
# issue 2537: empty submatches
@@ -1064,6 +1163,22 @@ class ReTests(unittest.TestCase):
self.assertEqual(m.group(1), "")
self.assertEqual(m.group(2), "y")
+
+class ImplementationTest(unittest.TestCase):
+ """
+ Test implementation details of the re module.
+ """
+
+ def test_overlap_table(self):
+ f = sre_compile._generate_overlap_table
+ self.assertEqual(f(""), [])
+ self.assertEqual(f("a"), [0])
+ self.assertEqual(f("abcd"), [0, 0, 0, 0])
+ self.assertEqual(f("aaaa"), [0, 1, 2, 3])
+ self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
+ self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])
+
+
def run_re_tests():
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
if verbose:
@@ -1193,7 +1308,7 @@ def run_re_tests():
def test_main():
- run_unittest(ReTests)
+ run_unittest(__name__)
run_re_tests()
if __name__ == "__main__":