diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2024-06-20 07:19:32 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-20 07:19:32 (GMT) |
commit | 8bc76ae45f48bede7ce3191db08cf36d879e6e8d (patch) | |
tree | 56936a5de2d13bb77cbd0c6153af124dda90a8db /Lib/test | |
parent | 3846fcfb928753bd52265ba67a9b4ceae51d2ef3 (diff) | |
download | cpython-8bc76ae45f48bede7ce3191db08cf36d879e6e8d.zip cpython-8bc76ae45f48bede7ce3191db08cf36d879e6e8d.tar.gz cpython-8bc76ae45f48bede7ce3191db08cf36d879e6e8d.tar.bz2 |
gh-111259: Optimize complementary character sets in RE (GH-120742)
Patterns like "[\s\S]" or "\s|\S" which match any character are now compiled
to the same effective code as a dot with the DOTALL modifier ("(?s:.)").
Diffstat (limited to 'Lib/test')
-rw-r--r-- | Lib/test/test_re.py | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 1f2ab60..a93c2ae 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2473,6 +2473,24 @@ class ReTests(unittest.TestCase): def test_fail(self): self.assertEqual(re.search(r'12(?!)|3', '123')[0], '3') + def test_character_set_any(self): + # The union of complementary character sets mathes any character + # and is equivalent to "(?s:.)". + s = '1x\n' + for p in r'[\s\S]', r'[\d\D]', r'[\w\W]', r'[\S\s]', r'\s|\S': + with self.subTest(pattern=p): + self.assertEqual(re.findall(p, s), list(s)) + self.assertEqual(re.fullmatch('(?:' + p + ')+', s).group(), s) + + def test_character_set_none(self): + # Negation of the union of complementary character sets does not match + # any character. + s = '1x\n' + for p in r'[^\s\S]', r'[^\d\D]', r'[^\w\W]', r'[^\S\s]': + with self.subTest(pattern=p): + self.assertIsNone(re.search(p, s)) + self.assertIsNone(re.search('(?s:.)' + p, s)) + def get_debug_out(pat): with captured_stdout() as out: |