diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2022-04-19 13:56:51 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-19 13:56:51 (GMT) |
commit | 74070085da5322ac83c954f101f2caa150655be2 (patch) | |
tree | 870aa77b196e0fae718d46dde7908399640d733a /Lib | |
parent | 16fc5733b737320e43fe3244bf4be4e6c3b794a5 (diff) | |
download | cpython-74070085da5322ac83c954f101f2caa150655be2.zip cpython-74070085da5322ac83c954f101f2caa150655be2.tar.gz cpython-74070085da5322ac83c954f101f2caa150655be2.tar.bz2 |
Add more tests for group names and refs in RE (GH-91695)
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_re.py | 56 |
1 files changed, 41 insertions, 15 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index a1c27c9..7bb8bfa 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -236,6 +236,16 @@ class ReTests(unittest.TestCase): re.compile(r'(?P<a>x)(?P=a)(?(a)y)') re.compile(r'(?P<a1>x)(?P=a1)(?(a1)y)') re.compile(r'(?P<a1>x)\1(?(1)y)') + re.compile(b'(?P<a1>x)(?P=a1)(?(a1)y)') + # New valid identifiers in Python 3 + re.compile('(?P<ยต>x)(?P=ยต)(?(ยต)y)') + re.compile('(?P<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>x)(?P=๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข)(?(๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข)y)') + # Support > 100 groups. + pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1)) + pat = '(?:%s)(?(200)z|t)' % pat + self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) + + def test_symbolic_groups_errors(self): self.checkPatternError(r'(?P<a>)(?P<a>)', "redefinition of group name 'a' as group 2; " "was group 1") @@ -261,16 +271,22 @@ class ReTests(unittest.TestCase): self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3) self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3) self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3) - # New valid/invalid identifiers in Python 3 - re.compile('(?P<ยต>x)(?P=ยต)(?(ยต)y)') - re.compile('(?P<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>x)(?P=๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข)(?(๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข)y)') self.checkPatternError('(?P<ยฉ>x)', "bad character in group name 'ยฉ'", 4) + self.checkPatternError('(?P=ยฉ)', "bad character in group name 'ยฉ'", 4) + self.checkPatternError('(?(ยฉ)y)', "bad character in group name 'ยฉ'", 3) + + def test_symbolic_refs(self): + self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '') + self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '') + self.assertEqual(re.sub(b'(?P<a1>x)', br'\g<a1>', b'xx'), b'xx') + # New valid identifiers in Python 3 + self.assertEqual(re.sub('(?P<ยต>x)', r'\g<ยต>', 'xx'), 'xx') + self.assertEqual(re.sub('(?P<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>x)', r'\g<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>', 'xx'), 'xx') # Support > 100 groups. pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1)) - pat = '(?:%s)(?(200)z|t)' % pat - self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) + self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8') - def test_symbolic_refs(self): + def test_symbolic_refs_errors(self): self.checkTemplateError('(?P<a>x)', r'\g<a', 'xx', 'missing >, unterminated name', 3) self.checkTemplateError('(?P<a>x)', r'\g<', 'xx', @@ -288,18 +304,14 @@ class ReTests(unittest.TestCase): 'invalid group reference 2', 1) with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"): re.sub('(?P<a>x)', r'\g<ab>', 'xx') - self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '') - self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '') self.checkTemplateError('(?P<a>x)', r'\g<-1>', 'xx', "bad character in group name '-1'", 3) - # New valid/invalid identifiers in Python 3 - self.assertEqual(re.sub('(?P<ยต>x)', r'\g<ยต>', 'xx'), 'xx') - self.assertEqual(re.sub('(?P<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>x)', r'\g<๐๐ซ๐ฆ๐ ๐ฌ๐ก๐ข>', 'xx'), 'xx') self.checkTemplateError('(?P<a>x)', r'\g<ยฉ>', 'xx', "bad character in group name 'ยฉ'", 3) - # Support > 100 groups. - pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1)) - self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8') + self.checkTemplateError('(?P<a>x)', r'\g<ใ>', 'xx', + "bad character in group name 'ใ'", 3) + self.checkTemplateError('(?P<a>x)', r'\g<ยน>', 'xx', + "bad character in group name 'ยน'", 3) def test_re_subn(self): self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) @@ -561,9 +573,23 @@ class ReTests(unittest.TestCase): pat = '(?:%s)(?(200)z)' % pat self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) - self.checkPatternError(r'(?P<a>)(?(0))', 'bad group number', 10) + def test_re_groupref_exists_errors(self): + self.checkPatternError(r'(?P<a>)(?(0)a|b)', 'bad group number', 10) + self.checkPatternError(r'()(?(-1)a|b)', + "bad character in group name '-1'", 5) + self.checkPatternError(r'()(?(ใ)a|b)', + "bad character in group name 'ใ'", 5) + self.checkPatternError(r'()(?(ยน)a|b)', + "bad character in group name 'ยน'", 5) + self.checkPatternError(r'()(?(1', + "missing ), unterminated name", 5) + self.checkPatternError(r'()(?(1)a', + "missing ), unterminated subpattern", 2) self.checkPatternError(r'()(?(1)a|b', 'missing ), unterminated subpattern', 2) + self.checkPatternError(r'()(?(1)a|b|c', + 'conditional backref with more than ' + 'two branches', 10) self.checkPatternError(r'()(?(1)a|b|c)', 'conditional backref with more than ' 'two branches', 10) |