From 48ec61a89a959071206549819448405c2cea61b0 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 22 Apr 2022 19:53:10 +0300 Subject: gh-91700: Validate the group number in conditional expression in RE (GH-91702) In expression (?(group)...) an appropriate re.error is now raised if the group number refers to not defined group. Previously it raised RuntimeError: invalid SRE code. --- Lib/re/_parser.py | 10 ++++++++++ Lib/test/test_re.py | 2 ++ .../next/Library/2022-04-19-17-30-17.gh-issue-91700.MRJi6m.rst | 4 ++++ 3 files changed, 16 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-04-19-17-30-17.gh-issue-91700.MRJi6m.rst diff --git a/Lib/re/_parser.py b/Lib/re/_parser.py index 6588862..60ec3e8 100644 --- a/Lib/re/_parser.py +++ b/Lib/re/_parser.py @@ -77,6 +77,7 @@ class State: self.groupdict = {} self.groupwidths = [None] # group 0 self.lookbehindgroups = None + self.grouprefpos = {} @property def groups(self): return len(self.groupwidths) @@ -795,6 +796,10 @@ def _parse(source, state, verbose, nested, first=False): if condgroup >= MAXGROUPS: msg = "invalid group reference %d" % condgroup raise source.error(msg, len(condname) + 1) + if condgroup not in state.grouprefpos: + state.grouprefpos[condgroup] = ( + source.tell() - len(condname) - 1 + ) state.checklookbehindgroup(condgroup, source) item_yes = _parse(source, state, verbose, nested + 1) if source.match("|"): @@ -975,6 +980,11 @@ def parse(str, flags=0, state=None): assert source.next == ")" raise source.error("unbalanced parenthesis") + for g in p.state.grouprefpos: + if g >= p.state.groups: + msg = "invalid group reference %d" % g + raise error(msg, str, p.state.grouprefpos[g]) + if flags & SRE_FLAG_DEBUG: p.dump() diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 2d3fef8..7002750 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -593,6 +593,8 @@ class ReTests(unittest.TestCase): self.checkPatternError(r'()(?(1)a|b|c)', 'conditional backref with more than ' 'two branches', 10) + self.checkPatternError(r'()(?(2)a)', + "invalid group reference 2", 5) def test_re_groupref_overflow(self): from re._constants import MAXGROUPS diff --git a/Misc/NEWS.d/next/Library/2022-04-19-17-30-17.gh-issue-91700.MRJi6m.rst b/Misc/NEWS.d/next/Library/2022-04-19-17-30-17.gh-issue-91700.MRJi6m.rst new file mode 100644 index 0000000..73b1068 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-19-17-30-17.gh-issue-91700.MRJi6m.rst @@ -0,0 +1,4 @@ +Compilation of regular expression containing a conditional expression +``(?(group)...)`` now raises an appropriate :exc:`re.error` if the group +number refers to not defined group. Previously an internal RuntimeError was +raised. -- cgit v0.12