From bd2ef82a5010985abdeef2ca71bcbcc9a366993b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 16 Aug 2023 13:00:55 +0300 Subject: [3.12] gh-100061: Proper fix of the bug in the matching of possessive quantifiers (GH-102612) (#108003) Restore the global Input Stream pointer after trying to match a sub-pattern. . (cherry picked from commit abd9cc52d94b8e2835322b62c29f09bb0e6fcfe9) Co-authored-by: SKO <41810398+uyw4687@users.noreply.github.com> --- Lib/re/_compiler.py | 7 ------- Lib/test/test_re.py | 12 ++++++++++-- .../Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst | 2 ++ Modules/_sre/sre_lib.h | 4 ++++ 4 files changed, 16 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py index e30740b..d8e0d2f 100644 --- a/Lib/re/_compiler.py +++ b/Lib/re/_compiler.py @@ -100,13 +100,6 @@ def _compile(code, pattern, flags): emit(ANY_ALL) else: emit(ANY) - elif op is POSSESSIVE_REPEAT: - # gh-106052: Possessive quantifiers do not work when the - # subpattern contains backtracking, i.e. "(?:ab?c)*+". - # Implement it as equivalent greedy qualifier in atomic group. - p = [(MAX_REPEAT, av)] - p = [(ATOMIC_GROUP, p)] - _compile(code, p, flags) elif op in REPEATING_CODES: if flags & SRE_FLAG_TEMPLATE: raise error("internal: unsupported template operator %r" % (op,)) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 85541f4..5a5de52 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2366,6 +2366,16 @@ class ReTests(unittest.TestCase): self.assertFalse(template_re1.match('nope')) def test_bug_gh106052(self): + # gh-100061 + self.assertEqual(re.match('(?>(?:.(?!D))+)', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?:.(?!D))++', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?>(?:.(?!D))*)', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?:.(?!D))*+', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?>(?:.(?!D))?)', 'CDE').span(), (0, 0)) + self.assertEqual(re.match('(?:.(?!D))?+', 'CDE').span(), (0, 0)) + self.assertEqual(re.match('(?>(?:.(?!D)){1,3})', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?:.(?!D)){1,3}+', 'ABCDE').span(), (0, 2)) + # gh-106052 self.assertEqual(re.match("(?>(?:ab?c)+)", "aca").span(), (0, 2)) self.assertEqual(re.match("(?:ab?c)++", "aca").span(), (0, 2)) self.assertEqual(re.match("(?>(?:ab?c)*)", "aca").span(), (0, 2)) @@ -2471,7 +2481,6 @@ ATOMIC_GROUP 17: SUCCESS ''') - @unittest.expectedFailure # gh-106052 def test_possesive_repeat_one(self): self.assertEqual(get_debug_out(r'a?+'), '''\ POSSESSIVE_REPEAT 0 1 @@ -2484,7 +2493,6 @@ POSSESSIVE_REPEAT 0 1 12: SUCCESS ''') - @unittest.expectedFailure # gh-106052 def test_possesive_repeat(self): self.assertEqual(get_debug_out(r'(?:ab)?+'), '''\ POSSESSIVE_REPEAT 0 1 diff --git a/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst b/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst new file mode 100644 index 0000000..dfed34f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst @@ -0,0 +1,2 @@ +Fix a bug that causes wrong matches for regular expressions with possessive +qualifier. diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index fb4c18b..e831498 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1334,6 +1334,10 @@ dispatch: MARK_POP(ctx->lastmark); LASTMARK_RESTORE(); + /* Restore the global Input Stream pointer + since it can change after jumps. */ + state->ptr = ptr; + /* We have sufficient matches, so exit loop. */ break; } -- cgit v0.12