summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2023-08-09 10:26:51 (GMT)
committerGitHub <noreply@github.com>2023-08-09 10:26:51 (GMT)
commit3bb43b7b1b75154bc4e94b1fa81afe296a8150d0 (patch)
tree9a31a9314f4f6965c8662a102294b5f58ecde50e
parentaa2ecef22a66938cba072ea57c27c63c11f79c9a (diff)
downloadcpython-3bb43b7b1b75154bc4e94b1fa81afe296a8150d0.zip
cpython-3bb43b7b1b75154bc4e94b1fa81afe296a8150d0.tar.gz
cpython-3bb43b7b1b75154bc4e94b1fa81afe296a8150d0.tar.bz2
[3.12] gh-106052: Fix bug in the matching of possessive quantifiers (GH-106515) (#107796)
[3.12] gh-106052: Fix bug in the matching of possessive quantifiers (gh-106515) It did not work in the case of a subpattern containing backtracking. Temporary implement possessive quantifiers as equivalent greedy qualifiers in atomic groups.. (cherry picked from commit 7b6e34e5baeb4162815ffa4d943b09a58e3f6580)
-rw-r--r--Lib/re/_compiler.py7
-rw-r--r--Lib/test/test_re.py12
-rw-r--r--Misc/NEWS.d/next/Library/2023-07-07-14-52-31.gh-issue-106052.ak8nbs.rst2
3 files changed, 21 insertions, 0 deletions
diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py
index d8e0d2f..e30740b 100644
--- a/Lib/re/_compiler.py
+++ b/Lib/re/_compiler.py
@@ -100,6 +100,13 @@ def _compile(code, pattern, flags):
emit(ANY_ALL)
else:
emit(ANY)
+ elif op is POSSESSIVE_REPEAT:
+ # gh-106052: Possessive quantifiers do not work when the
+ # subpattern contains backtracking, i.e. "(?:ab?c)*+".
+ # Implement it as equivalent greedy qualifier in atomic group.
+ p = [(MAX_REPEAT, av)]
+ p = [(ATOMIC_GROUP, p)]
+ _compile(code, p, flags)
elif op in REPEATING_CODES:
if flags & SRE_FLAG_TEMPLATE:
raise error("internal: unsupported template operator %r" % (op,))
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 50b9ad7..85541f4 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -2365,6 +2365,16 @@ class ReTests(unittest.TestCase):
self.assertTrue(template_re1.match('ahoy'))
self.assertFalse(template_re1.match('nope'))
+ def test_bug_gh106052(self):
+ self.assertEqual(re.match("(?>(?:ab?c)+)", "aca").span(), (0, 2))
+ self.assertEqual(re.match("(?:ab?c)++", "aca").span(), (0, 2))
+ self.assertEqual(re.match("(?>(?:ab?c)*)", "aca").span(), (0, 2))
+ self.assertEqual(re.match("(?:ab?c)*+", "aca").span(), (0, 2))
+ self.assertEqual(re.match("(?>(?:ab?c)?)", "a").span(), (0, 0))
+ self.assertEqual(re.match("(?:ab?c)?+", "a").span(), (0, 0))
+ self.assertEqual(re.match("(?>(?:ab?c){1,3})", "aca").span(), (0, 2))
+ self.assertEqual(re.match("(?:ab?c){1,3}+", "aca").span(), (0, 2))
+
@unittest.skipIf(multiprocessing is None, 'test requires multiprocessing')
def test_regression_gh94675(self):
pattern = re.compile(r'(?<=[({}])(((//[^\n]*)?[\n])([\000-\040])*)*'
@@ -2461,6 +2471,7 @@ ATOMIC_GROUP
17: SUCCESS
''')
+ @unittest.expectedFailure # gh-106052
def test_possesive_repeat_one(self):
self.assertEqual(get_debug_out(r'a?+'), '''\
POSSESSIVE_REPEAT 0 1
@@ -2473,6 +2484,7 @@ POSSESSIVE_REPEAT 0 1
12: SUCCESS
''')
+ @unittest.expectedFailure # gh-106052
def test_possesive_repeat(self):
self.assertEqual(get_debug_out(r'(?:ab)?+'), '''\
POSSESSIVE_REPEAT 0 1
diff --git a/Misc/NEWS.d/next/Library/2023-07-07-14-52-31.gh-issue-106052.ak8nbs.rst b/Misc/NEWS.d/next/Library/2023-07-07-14-52-31.gh-issue-106052.ak8nbs.rst
new file mode 100644
index 0000000..f2d4c2f
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-07-07-14-52-31.gh-issue-106052.ak8nbs.rst
@@ -0,0 +1,2 @@
+:mod:`re` module: fix the matching of possessive quantifiers in the case of
+a subpattern containing backtracking.