summaryrefslogtreecommitdiffstats
path: root/Lib/test
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2024-06-20 07:19:32 (GMT)
committerGitHub <noreply@github.com>2024-06-20 07:19:32 (GMT)
commit8bc76ae45f48bede7ce3191db08cf36d879e6e8d (patch)
tree56936a5de2d13bb77cbd0c6153af124dda90a8db /Lib/test
parent3846fcfb928753bd52265ba67a9b4ceae51d2ef3 (diff)
downloadcpython-8bc76ae45f48bede7ce3191db08cf36d879e6e8d.zip
cpython-8bc76ae45f48bede7ce3191db08cf36d879e6e8d.tar.gz
cpython-8bc76ae45f48bede7ce3191db08cf36d879e6e8d.tar.bz2
gh-111259: Optimize complementary character sets in RE (GH-120742)
Patterns like "[\s\S]" or "\s|\S" which match any character are now compiled to the same effective code as a dot with the DOTALL modifier ("(?s:.)").
Diffstat (limited to 'Lib/test')
-rw-r--r--Lib/test/test_re.py18
1 files changed, 18 insertions, 0 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 1f2ab60..a93c2ae 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -2473,6 +2473,24 @@ class ReTests(unittest.TestCase):
def test_fail(self):
self.assertEqual(re.search(r'12(?!)|3', '123')[0], '3')
+ def test_character_set_any(self):
+ # The union of complementary character sets mathes any character
+ # and is equivalent to "(?s:.)".
+ s = '1x\n'
+ for p in r'[\s\S]', r'[\d\D]', r'[\w\W]', r'[\S\s]', r'\s|\S':
+ with self.subTest(pattern=p):
+ self.assertEqual(re.findall(p, s), list(s))
+ self.assertEqual(re.fullmatch('(?:' + p + ')+', s).group(), s)
+
+ def test_character_set_none(self):
+ # Negation of the union of complementary character sets does not match
+ # any character.
+ s = '1x\n'
+ for p in r'[^\s\S]', r'[^\d\D]', r'[^\w\W]', r'[^\S\s]':
+ with self.subTest(pattern=p):
+ self.assertIsNone(re.search(p, s))
+ self.assertIsNone(re.search('(?s:.)' + p, s))
+
def get_debug_out(pat):
with captured_stdout() as out: