From c34f2555bd414254f941d0659ba9c229b96ec728 Mon Sep 17 00:00:00 2001 From: Gustavo Niemeyer Date: Sun, 27 Apr 2003 12:34:14 +0000 Subject: Applied patch #725106, by Greg Chapman, fixing capturing groups within repeats of alternatives. The only change to the original patch was to convert the tests to the new test_re.py file. This patch fixes cases like: >>> re.match('((a)|b)*', 'abc').groups() ('b', '') Which is wrong (it's impossible to match the empty string), and incompatible with other regex systems, like the following examples show: % perl -e '"abc" =~ /^((a)|b)*/; print "$1 $2\n";' b a % echo "abc" | sed -r -e "s/^((a)|b)*/\1 \2|/" b a|c --- Lib/test/test_re.py | 19 +++++++++++++++++++ Modules/_sre.c | 10 ++++++++++ 2 files changed, 29 insertions(+) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 2430790..7ba9a1b 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -276,6 +276,25 @@ class ReTests(unittest.TestCase): self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(), ('a:', 'a')) + def test_bug_725106(self): + # capturing groups in alternatives in repeats + self.assertEqual(re.match('^((a)|b)*', 'abc').groups(), + ('b', 'a')) + self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(), + ('c', 'b')) + self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(), + ('b', None)) + self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(), + ('b', None)) + self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(), + ('b', 'a')) + self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(), + ('c', 'b')) + self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(), + ('b', None)) + self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(), + ('b', None)) + def test_finditer(self): iter = re.finditer(r":+", "a:b::c:::d") self.assertEqual([item.group(0) for item in iter], diff --git a/Modules/_sre.c b/Modules/_sre.c index 3f17d13..b9e1827 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -947,10 +947,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) if (pattern[1] == SRE_OP_IN && (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr))) continue; + if (state->repeat) { + i = mark_save(state, 0, lastmark); + if (i < 0) + return i; + } state->ptr = ptr; i = SRE_MATCH(state, pattern + 1, level + 1); if (i) return i; + if (state->repeat) { + i = mark_restore(state, 0, lastmark); + if (i < 0) + return i; + } LASTMARK_RESTORE(); } return 0; -- cgit v0.12