summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_re.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_re.py')
-rw-r--r--Lib/test/test_re.py284
1 files changed, 218 insertions, 66 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 7348af3..0fbf8c5 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -84,7 +84,7 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
'9.3 -3 24x100y')
- self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
+ self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),
'9.3 -3 23x99y')
self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
@@ -155,8 +155,8 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
- self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
- self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
+ self.assertRaises(re.error, re.sub, 'x', r'\400', 'x')
+ self.assertRaises(re.error, re.sub, 'x', r'\777', 'x')
self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
@@ -180,7 +180,7 @@ class ReTests(unittest.TestCase):
def test_qualified_re_sub(self):
self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
- self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
+ self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')
def test_bug_114660(self):
self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
@@ -194,6 +194,7 @@ class ReTests(unittest.TestCase):
def test_symbolic_groups(self):
re.compile('(?P<a>x)(?P=a)(?(a)y)')
re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
+ re.compile('(?P<a1>x)\1(?(1)y)')
self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
self.assertRaises(re.error, re.compile, '(?Px)')
self.assertRaises(re.error, re.compile, '(?P=)')
@@ -213,6 +214,10 @@ class ReTests(unittest.TestCase):
re.compile('(?P<ยต>x)(?P=ยต)(?(ยต)y)')
re.compile('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)(?P=๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)(?(๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)y)')
self.assertRaises(re.error, re.compile, '(?P<ยฉ>x)')
+ # Support > 100 groups.
+ pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
+ pat = '(?:%s)(?(200)z|t)' % pat
+ self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
def test_symbolic_refs(self):
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
@@ -221,48 +226,53 @@ class ReTests(unittest.TestCase):
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
+ self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<2>', 'xx')
+ self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\2', 'xx')
self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
- self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
- self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
+ self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
+ self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
# New valid/invalid identifiers in Python 3
self.assertEqual(re.sub('(?P<ยต>x)', r'\g<ยต>', 'xx'), 'xx')
self.assertEqual(re.sub('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)', r'\g<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>', 'xx'), 'xx')
self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<ยฉ>', 'xx')
+ # Support > 100 groups.
+ pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
+ self.assertEqual(re.sub(pat, '\g<200>', 'xc8yzxc8y'), 'c8zc8')
def test_re_subn(self):
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
- self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
+ self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))
def test_re_split(self):
for string in ":a:b::c", S(":a:b::c"):
self.assertTypedEqual(re.split(":", string),
['', 'a', 'b', '', 'c'])
- self.assertTypedEqual(re.split(":*", string),
+ self.assertTypedEqual(re.split(":+", string),
['', 'a', 'b', 'c'])
- self.assertTypedEqual(re.split("(:*)", string),
+ self.assertTypedEqual(re.split("(:+)", string),
['', ':', 'a', ':', 'b', '::', 'c'])
for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"),
memoryview(b":a:b::c")):
self.assertTypedEqual(re.split(b":", string),
[b'', b'a', b'b', b'', b'c'])
- self.assertTypedEqual(re.split(b":*", string),
+ self.assertTypedEqual(re.split(b":+", string),
[b'', b'a', b'b', b'c'])
- self.assertTypedEqual(re.split(b"(:*)", string),
+ self.assertTypedEqual(re.split(b"(:+)", string),
[b'', b':', b'a', b':', b'b', b'::', b'c'])
for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432",
"\U0001d49c\U0001d49e\U0001d4b5"):
string = ":%s:%s::%s" % (a, b, c)
self.assertEqual(re.split(":", string), ['', a, b, '', c])
- self.assertEqual(re.split(":*", string), ['', a, b, c])
- self.assertEqual(re.split("(:*)", string),
+ self.assertEqual(re.split(":+", string), ['', a, b, c])
+ self.assertEqual(re.split("(:+)", string),
['', ':', a, ':', b, '::', c])
- self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
- self.assertEqual(re.split("(:)*", ":a:b::c"),
+ self.assertEqual(re.split("(?::+)", ":a:b::c"), ['', 'a', 'b', 'c'])
+ self.assertEqual(re.split("(:)+", ":a:b::c"),
['', ':', 'a', ':', 'b', ':', 'c'])
self.assertEqual(re.split("([b:]+)", ":a:b::c"),
['', ':', 'a', ':b::', 'c'])
@@ -272,13 +282,34 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
['', 'a', '', '', 'c'])
+ for sep, expected in [
+ (':*', ['', 'a', 'b', 'c']),
+ ('(?::*)', ['', 'a', 'b', 'c']),
+ ('(:*)', ['', ':', 'a', ':', 'b', '::', 'c']),
+ ('(:)*', ['', ':', 'a', ':', 'b', ':', 'c']),
+ ]:
+ with self.subTest(sep=sep), self.assertWarns(FutureWarning):
+ self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
+
+ for sep, expected in [
+ ('', [':a:b::c']),
+ (r'\b', [':a:b::c']),
+ (r'(?=:)', [':a:b::c']),
+ (r'(?<=:)', [':a:b::c']),
+ ]:
+ with self.subTest(sep=sep), self.assertRaises(ValueError):
+ self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
+
def test_qualified_re_split(self):
- self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
- self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
- self.assertEqual(re.split("(:)", ":a:b::c", 2),
+ self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])
+ self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])
+ self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),
['', ':', 'a', ':', 'b::c'])
- self.assertEqual(re.split("(:*)", ":a:b::c", 2),
+ self.assertEqual(re.split("(:+)", ":a:b::c", maxsplit=2),
['', ':', 'a', ':', 'b::c'])
+ with self.assertWarns(FutureWarning):
+ self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
+ ['', ':', 'a', ':', 'b::c'])
def test_re_findall(self):
self.assertEqual(re.findall(":+", "abc"), [])
@@ -405,6 +436,10 @@ class ReTests(unittest.TestCase):
self.assertIsNone(p.match('abd'))
self.assertIsNone(p.match('ac'))
+ # Support > 100 groups.
+ pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
+ pat = '(?:%s)(?(200)z)' % pat
+ self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
def test_re_groupref(self):
self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
@@ -428,6 +463,10 @@ class ReTests(unittest.TestCase):
"first second")
.expand(r"\2 \1 \g<second> \g<first>"),
"second first second first")
+ self.assertEqual(re.match("(?P<first>first)|(?P<second>second)",
+ "first")
+ .expand(r"\2 \g<second>"),
+ " ")
def test_repeat_minmax(self):
self.assertIsNone(re.match("^(\w){1}$", "abc"))
@@ -484,10 +523,6 @@ class ReTests(unittest.TestCase):
"abcd abc bcd bx", re.ASCII).group(1), "bx")
self.assertEqual(re.search(r"\B(b.)\B",
"abc bcd bc abxd", re.ASCII).group(1), "bx")
- self.assertEqual(re.search(r"\b(b.)\b",
- "abcd abc bcd bx", re.LOCALE).group(1), "bx")
- self.assertEqual(re.search(r"\B(b.)\B",
- "abc bcd bc abxd", re.LOCALE).group(1), "bx")
self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
@@ -508,11 +543,23 @@ class ReTests(unittest.TestCase):
b"1aa! a").group(0), b"1aa! a")
self.assertEqual(re.search(r"\d\D\w\W\s\S",
"1aa! a", re.ASCII).group(0), "1aa! a")
- self.assertEqual(re.search(r"\d\D\w\W\s\S",
- "1aa! a", re.LOCALE).group(0), "1aa! a")
self.assertEqual(re.search(br"\d\D\w\W\s\S",
b"1aa! a", re.LOCALE).group(0), b"1aa! a")
+ def test_other_escapes(self):
+ self.assertRaises(re.error, re.compile, "\\")
+ self.assertEqual(re.match(r"\(", '(').group(), '(')
+ self.assertIsNone(re.match(r"\(", ')'))
+ self.assertEqual(re.match(r"\\", '\\').group(), '\\')
+ self.assertEqual(re.match(r"\y", 'y').group(), 'y')
+ self.assertIsNone(re.match(r"\y", 'z'))
+ self.assertEqual(re.match(r"[\]]", ']').group(), ']')
+ self.assertIsNone(re.match(r"[\]]", '['))
+ self.assertEqual(re.match(r"[a\-c]", '-').group(), '-')
+ self.assertIsNone(re.match(r"[a\-c]", 'b'))
+ self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^')
+ self.assertIsNone(re.match(r"[\^a]+", 'b'))
+
def test_string_boundaries(self):
# See http://bugs.python.org/issue10713
self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
@@ -574,9 +621,6 @@ class ReTests(unittest.TestCase):
# Group reference.
self.assertTrue(re.match(r'(a)b(?=\1)a', 'aba'))
self.assertIsNone(re.match(r'(a)b(?=\1)c', 'abac'))
- # Named group reference.
- self.assertTrue(re.match(r'(?P<g>a)b(?=(?P=g))a', 'aba'))
- self.assertIsNone(re.match(r'(?P<g>a)b(?=(?P=g))c', 'abac'))
# Conditional group reference.
self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(2)c|x))c', 'abc'))
@@ -594,13 +638,25 @@ class ReTests(unittest.TestCase):
self.assertIsNone(re.match(r'ab(?<!b)c', 'abc'))
self.assertTrue(re.match(r'ab(?<!c)c', 'abc'))
# Group reference.
- self.assertWarns(RuntimeWarning, re.compile, r'(a)a(?<=\1)c')
- # Named group reference.
- self.assertWarns(RuntimeWarning, re.compile, r'(?P<g>a)a(?<=(?P=g))c')
+ self.assertTrue(re.match(r'(a)a(?<=\1)c', 'aac'))
+ self.assertIsNone(re.match(r'(a)b(?<=\1)a', 'abaa'))
+ self.assertIsNone(re.match(r'(a)a(?<!\1)c', 'aac'))
+ self.assertTrue(re.match(r'(a)b(?<!\1)a', 'abaa'))
# Conditional group reference.
- self.assertWarns(RuntimeWarning, re.compile, r'(a)b(?<=(?(1)b|x))c')
+ self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)x|c))c', 'abc'))
+ self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)b|x))c', 'abc'))
+ self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(2)x|b))c', 'abc'))
+ self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(1)c|x))c', 'abc'))
+ self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(1)b|x))c', 'abc'))
# Group used before defined.
- self.assertWarns(RuntimeWarning, re.compile, r'(a)b(?<=(?(2)b|x))(c)')
+ self.assertRaises(re.error, re.compile, r'(a)b(?<=(?(2)b|x))(c)')
+ self.assertIsNone(re.match(r'(a)b(?<=(?(1)c|x))(c)', 'abc'))
+ self.assertTrue(re.match(r'(a)b(?<=(?(1)b|x))(c)', 'abc'))
+ # Group defined in the same lookbehind pattern
+ self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)\2)(c)')
+ self.assertRaises(re.error, re.compile, r'(a)b(?<=(?P<a>.)(?P=a))(c)')
+ self.assertRaises(re.error, re.compile, r'(a)b(?<=(a)(?(2)b|x))(c)')
+ self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)(?<=\2))(c)')
def test_ignore_case(self):
self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
@@ -692,9 +748,12 @@ class ReTests(unittest.TestCase):
self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
+ self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a'))
self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
+ self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC")
+ self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC")
def test_not_literal(self):
self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
@@ -779,8 +838,10 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.X, re.VERBOSE)
def test_flags(self):
- for flag in [re.I, re.M, re.X, re.S, re.L]:
+ for flag in [re.I, re.M, re.X, re.S, re.A, re.U]:
self.assertTrue(re.compile('^pattern$', flag))
+ for flag in [re.I, re.M, re.X, re.S, re.A, re.L]:
+ self.assertTrue(re.compile(b'^pattern$', flag))
def test_sre_character_literals(self):
for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
@@ -802,7 +863,7 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match(r"\08", "\0008"))
self.assertTrue(re.match(r"\01", "\001"))
self.assertTrue(re.match(r"\018", "\0018"))
- self.assertTrue(re.match(r"\567", chr(0o167)))
+ self.assertRaises(re.error, re.match, r"\567", "")
self.assertRaises(re.error, re.match, r"\911", "")
self.assertRaises(re.error, re.match, r"\x1", "")
self.assertRaises(re.error, re.match, r"\x1z", "")
@@ -830,12 +891,13 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match(r"[\U%08x]" % i, chr(i)))
self.assertTrue(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
self.assertTrue(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
- self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
+ self.assertRaises(re.error, re.match, r"[\567]", "")
self.assertRaises(re.error, re.match, r"[\911]", "")
self.assertRaises(re.error, re.match, r"[\x1z]", "")
self.assertRaises(re.error, re.match, r"[\u123z]", "")
self.assertRaises(re.error, re.match, r"[\U0001234z]", "")
self.assertRaises(re.error, re.match, r"[\U00110000]", "")
+ self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
def test_sre_byte_literals(self):
for i in [0, 8, 16, 32, 64, 127, 128, 255]:
@@ -851,7 +913,7 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match(br"\08", b"\0008"))
self.assertTrue(re.match(br"\01", b"\001"))
self.assertTrue(re.match(br"\018", b"\0018"))
- self.assertTrue(re.match(br"\567", bytes([0o167])))
+ self.assertRaises(re.error, re.match, br"\567", b"")
self.assertRaises(re.error, re.match, br"\911", b"")
self.assertRaises(re.error, re.match, br"\x1", b"")
self.assertRaises(re.error, re.match, br"\x1z", b"")
@@ -868,6 +930,7 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
self.assertTrue(re.match(br"[\u]", b'u'))
self.assertTrue(re.match(br"[\U]", b'U'))
+ self.assertRaises(re.error, re.match, br"[\567]", b"")
self.assertRaises(re.error, re.match, br"[\911]", b"")
self.assertRaises(re.error, re.match, br"[\x1z]", b"")
@@ -1062,8 +1125,8 @@ class ReTests(unittest.TestCase):
def test_inline_flags(self):
# Bug #1700
- upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
- lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
+ upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
+ lower_char = '\u1ea1' # Latin Small Letter A with Dot Below
p = re.compile(upper_char, re.I | re.U)
q = p.match(lower_char)
@@ -1143,6 +1206,52 @@ class ReTests(unittest.TestCase):
self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
self.assertRaises(ValueError, re.compile, '(?au)\w')
+ def test_locale_flag(self):
+ import locale
+ _, enc = locale.getlocale(locale.LC_CTYPE)
+ # Search non-ASCII letter
+ for i in range(128, 256):
+ try:
+ c = bytes([i]).decode(enc)
+ sletter = c.lower()
+ if sletter == c: continue
+ bletter = sletter.encode(enc)
+ if len(bletter) != 1: continue
+ if bletter.decode(enc) != sletter: continue
+ bpat = re.escape(bytes([i]))
+ break
+ except (UnicodeError, TypeError):
+ pass
+ else:
+ bletter = None
+ bpat = b'A'
+ # Bytes patterns
+ pat = re.compile(bpat, re.LOCALE | re.IGNORECASE)
+ if bletter:
+ self.assertTrue(pat.match(bletter))
+ pat = re.compile(b'(?L)' + bpat, re.IGNORECASE)
+ if bletter:
+ self.assertTrue(pat.match(bletter))
+ pat = re.compile(bpat, re.IGNORECASE)
+ if bletter:
+ self.assertIsNone(pat.match(bletter))
+ pat = re.compile(b'\w', re.LOCALE)
+ if bletter:
+ self.assertTrue(pat.match(bletter))
+ pat = re.compile(b'(?L)\w')
+ if bletter:
+ self.assertTrue(pat.match(bletter))
+ pat = re.compile(b'\w')
+ if bletter:
+ self.assertIsNone(pat.match(bletter))
+ # Incompatibilities
+ self.assertWarns(DeprecationWarning, re.compile, '', re.LOCALE)
+ self.assertWarns(DeprecationWarning, re.compile, '(?L)')
+ self.assertWarns(DeprecationWarning, re.compile, b'', re.LOCALE | re.ASCII)
+ self.assertWarns(DeprecationWarning, re.compile, b'(?L)', re.ASCII)
+ self.assertWarns(DeprecationWarning, re.compile, b'(?a)', re.LOCALE)
+ self.assertWarns(DeprecationWarning, re.compile, b'(?aL)')
+
def test_bug_6509(self):
# Replacement strings of both types must parse properly.
# all strings
@@ -1170,8 +1279,10 @@ class ReTests(unittest.TestCase):
# a RuntimeError is raised instead of OverflowError.
long_overflow = 2**128
self.assertRaises(TypeError, re.finditer, "a", {})
- self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
- self.assertRaises(TypeError, _sre.compile, {}, 0, [])
+ with self.assertRaises(OverflowError):
+ _sre.compile("abc", 0, [long_overflow], 0, [], [])
+ with self.assertRaises(TypeError):
+ _sre.compile({}, 0, [], 0, [], [])
def test_search_dot_unicode(self):
self.assertTrue(re.search("123.*-", '123abc-'))
@@ -1309,22 +1420,22 @@ class ReTests(unittest.TestCase):
with captured_stdout() as out:
re.compile(pat, re.DEBUG)
dump = '''\
-subpattern 1
- literal 46
-subpattern None
- branch
- in
- literal 99
- literal 104
- or
- literal 112
- literal 121
-subpattern None
- groupref_exists 1
- at at_end
- else
- literal 58
- literal 32
+SUBPATTERN 1
+ LITERAL 46
+SUBPATTERN None
+ BRANCH
+ IN
+ LITERAL 99
+ LITERAL 104
+ OR
+ LITERAL 112
+ LITERAL 121
+SUBPATTERN None
+ GROUPREF_EXISTS 1
+ AT AT_END
+ ELSE
+ LITERAL 58
+ LITERAL 32
'''
self.assertEqual(out.getvalue(), dump)
# Debug output is output again even a second time (bypassing
@@ -1392,6 +1503,42 @@ subpattern None
self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
+ def test_error(self):
+ with self.assertRaises(re.error) as cm:
+ re.compile('(\u20ac))')
+ err = cm.exception
+ self.assertIsInstance(err.pattern, str)
+ self.assertEqual(err.pattern, '(\u20ac))')
+ self.assertEqual(err.pos, 3)
+ self.assertEqual(err.lineno, 1)
+ self.assertEqual(err.colno, 4)
+ self.assertIn(err.msg, str(err))
+ self.assertIn(' at position 3', str(err))
+ self.assertNotIn(' at position 3', err.msg)
+ # Bytes pattern
+ with self.assertRaises(re.error) as cm:
+ re.compile(b'(\xa4))')
+ err = cm.exception
+ self.assertIsInstance(err.pattern, bytes)
+ self.assertEqual(err.pattern, b'(\xa4))')
+ self.assertEqual(err.pos, 3)
+ # Multiline pattern
+ with self.assertRaises(re.error) as cm:
+ re.compile("""
+ (
+ abc
+ )
+ )
+ (
+ """, re.VERBOSE)
+ err = cm.exception
+ self.assertEqual(err.pos, 77)
+ self.assertEqual(err.lineno, 5)
+ self.assertEqual(err.colno, 17)
+ self.assertIn(err.msg, str(err))
+ self.assertIn(' at position 77', str(err))
+ self.assertIn('(line 5, column 17)', str(err))
+
class PatternReprTests(unittest.TestCase):
def check(self, pattern, expected):
@@ -1436,6 +1583,10 @@ class PatternReprTests(unittest.TestCase):
self.check_flags(b'bytes pattern', re.A,
"re.compile(b'bytes pattern', re.ASCII)")
+ def test_locale(self):
+ self.check_flags(b'bytes pattern', re.L,
+ "re.compile(b'bytes pattern', re.LOCALE)")
+
def test_quotes(self):
self.check('random "double quoted" pattern',
'''re.compile('random "double quoted" pattern')''')
@@ -1549,8 +1700,16 @@ class ExternalTests(unittest.TestCase):
pass
else:
with self.subTest('bytes pattern match'):
- bpat = re.compile(bpat)
- self.assertTrue(bpat.search(bs))
+ obj = re.compile(bpat)
+ self.assertTrue(obj.search(bs))
+
+ # Try the match with LOCALE enabled, and check that it
+ # still succeeds.
+ with self.subTest('locale-sensitive match'):
+ obj = re.compile(bpat, re.LOCALE)
+ result = obj.search(bs)
+ if result is None:
+ print('=== Fails on locale-sensitive match', t)
# Try the match with the search area limited to the extent
# of the match and see if it still succeeds. \B will
@@ -1568,13 +1727,6 @@ class ExternalTests(unittest.TestCase):
obj = re.compile(pattern, re.IGNORECASE)
self.assertTrue(obj.search(s))
- # Try the match with LOCALE enabled, and check that it
- # still succeeds.
- if '(?u)' not in pattern:
- with self.subTest('locale-sensitive match'):
- obj = re.compile(pattern, re.LOCALE)
- self.assertTrue(obj.search(s))
-
# Try the match with UNICODE locale enabled, and check
# that it still succeeds.
with self.subTest('unicode-sensitive match'):