From 76ff68666fa946c44d16173ff52a127d3a9785db Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Fri, 22 Apr 2022 12:02:20 -0700 Subject: [3.9] gh-91700: Validate the group number in conditional expression in RE (GH-91702) (GH-91831) (GH-91836) In expression (?(group)...) an appropriate re.error is now raised if the group number refers to not defined group. Previously it raised RuntimeError: invalid SRE code. (cherry picked from commit 48ec61a89a959071206549819448405c2cea61b0) (cherry picked from commit 080781cd49b13da4a73db87b6f5e0c7aeec83e92) Co-authored-by: Serhiy Storchaka --- Lib/sre_parse.py | 10 ++++++++++ Lib/test/test_re.py | 2 ++ .../next/Library/2022-04-19-17-30-17.gh-issue-91700.MRJi6m.rst | 4 ++++ 3 files changed, 16 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-04-19-17-30-17.gh-issue-91700.MRJi6m.rst diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index d3ff196..20a6025 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -78,6 +78,7 @@ class State: self.groupdict = {} self.groupwidths = [None] # group 0 self.lookbehindgroups = None + self.grouprefpos = {} @property def groups(self): return len(self.groupwidths) @@ -786,6 +787,10 @@ def _parse(source, state, verbose, nested, first=False): if condgroup >= MAXGROUPS: msg = "invalid group reference %d" % condgroup raise source.error(msg, len(condname) + 1) + if condgroup not in state.grouprefpos: + state.grouprefpos[condgroup] = ( + source.tell() - len(condname) - 1 + ) state.checklookbehindgroup(condgroup, source) item_yes = _parse(source, state, verbose, nested + 1) if source.match("|"): @@ -963,6 +968,11 @@ def parse(str, flags=0, state=None): assert source.next == ")" raise source.error("unbalanced parenthesis") + for g in p.state.grouprefpos: + if g >= p.state.groups: + msg = "invalid group reference %d" % g + raise error(msg, str, p.state.grouprefpos[g]) + if flags & SRE_FLAG_DEBUG: p.dump() diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 0070640..8eeddd6 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -574,6 +574,8 @@ class ReTests(unittest.TestCase): self.checkPatternError(r'()(?(1)a|b|c)', 'conditional backref with more than ' 'two branches', 10) + self.checkPatternError(r'()(?(2)a)', + "invalid group reference 2", 5) def test_re_groupref_overflow(self): from sre_constants import MAXGROUPS diff --git a/Misc/NEWS.d/next/Library/2022-04-19-17-30-17.gh-issue-91700.MRJi6m.rst b/Misc/NEWS.d/next/Library/2022-04-19-17-30-17.gh-issue-91700.MRJi6m.rst new file mode 100644 index 0000000..73b1068 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-19-17-30-17.gh-issue-91700.MRJi6m.rst @@ -0,0 +1,4 @@ +Compilation of regular expression containing a conditional expression +``(?(group)...)`` now raises an appropriate :exc:`re.error` if the group +number refers to not defined group. Previously an internal RuntimeError was +raised. -- cgit v0.12