From 4e7be06a652680611a81d6bb1fb03ce4227ac02e Mon Sep 17 00:00:00 2001 From: Gustavo Niemeyer Date: Wed, 6 Nov 2002 14:06:53 +0000 Subject: Fixed bug #470582, using a modified version of patch #527371, from Greg Chapman. * Modules/_sre.c (lastmark_restore): New function, implementing algorithm to restore a state to a given lastmark. In addition to the similar algorithm used in a few places of SRE_MATCH, restore lastindex when restoring lastmark. (SRE_MATCH): Replace lastmark inline restoring by lastmark_restore(), function. Also include it where missing. In SRE_OP_MARK, set lastindex only if i > lastmark. * Lib/test/re_tests.py * Lib/test/test_sre.py Included regression tests for the fixed bugs. * Misc/NEWS Mention fixes. --- Lib/test/re_tests.py | 2 ++ Lib/test/test_sre.py | 5 +++++ Misc/NEWS | 7 +++++++ Modules/_sre.c | 37 +++++++++++++++++++------------------ 4 files changed, 33 insertions(+), 18 deletions(-) diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py index 953e4fd..d6f04f0 100755 --- a/Lib/test/re_tests.py +++ b/Lib/test/re_tests.py @@ -646,6 +646,8 @@ xyzabc (r'a[^>]*?b', 'a>b', FAIL), # bug 490573: minimizing repeat problem (r'^a*?$', 'foo', FAIL), + # bug 470582: nested groups problem + (r'^((a)c)?(ab)$', 'ab', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-ab'), ] try: diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py index 284212c..6a00aff 100644 --- a/Lib/test/test_sre.py +++ b/Lib/test/test_sre.py @@ -78,6 +78,11 @@ test(r"""sre.match(r'(a)|(b)', 'b').start(1)""", -1) test(r"""sre.match(r'(a)|(b)', 'b').end(1)""", -1) test(r"""sre.match(r'(a)|(b)', 'b').span(1)""", (-1, -1)) +# bug described in patch 527371 +test(r"""sre.match(r'(a)?a','a').lastindex""", None) +test(r"""sre.match(r'(a)(b)?b','ab').lastindex""", 1) +test(r"""sre.match(r'(?Pa)(?Pb)?b','ab').lastgroup""", 'a') + if verbose: print 'Running tests on sre.sub' diff --git a/Misc/NEWS b/Misc/NEWS index c57ca1d..cccaff5 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -352,6 +352,13 @@ Extension modules to the value of the PYTHON_API_VERSION macro with which the interpreter was compiled. +- Fixed bug #470582: sre module would return a tuple (None, 'a', 'ab') + when applying the regular expression '^((a)c)?(ab)$' on 'ab'. It now + returns (None, None, 'ab'), as expected. Also fixed handling of + lastindex/lastgroup match attributes in a similar cases. For example, + when running the expression r'(a)(b)?b' over 'ab', lastindex must be + 1, not 2. + Library ------- diff --git a/Modules/_sre.c b/Modules/_sre.c index f4dbef0..4440a6e 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -353,6 +353,18 @@ mark_restore(SRE_STATE* state, int lo, int hi) return 0; } +void lastmark_restore(SRE_STATE *state, int lastmark) +{ + if (state->lastmark > lastmark) { + memset( + state->mark + lastmark + 1, 0, + (state->lastmark - lastmark) * sizeof(void*) + ); + state->lastmark = lastmark; + state->lastindex = (lastmark == 0) ? -1 : (lastmark-1)/2+1; + } +} + /* generate 8-bit version */ #define SRE_CHAR unsigned char @@ -860,10 +872,11 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) /* */ TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0])); i = pattern[0]; - if (i & 1) - state->lastindex = i/2 + 1; - if (i > state->lastmark) + if (i > state->lastmark) { state->lastmark = i; + if (i & 1) + state->lastindex = i/2 + 1; + } state->mark[i] = ptr; pattern++; break; @@ -920,13 +933,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) i = SRE_MATCH(state, pattern + 1, level + 1); if (i) return i; - if (state->lastmark > lastmark) { - memset( - state->mark + lastmark + 1, 0, - (state->lastmark - lastmark) * sizeof(void*) - ); - state->lastmark = lastmark; - } + lastmark_restore(state, lastmark); } return 0; @@ -997,13 +1004,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) return i; ptr--; count--; - if (state->lastmark > lastmark) { - memset( - state->mark + lastmark + 1, 0, - (state->lastmark - lastmark) * sizeof(void*) - ); - state->lastmark = lastmark; - } + lastmark_restore(state, lastmark); } } return 0; @@ -1071,9 +1072,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) if (i) return i; i = mark_restore(state, 0, lastmark); - state->lastmark = lastmark; if (i < 0) return i; + lastmark_restore(state, lastmark); rp->count = count - 1; state->ptr = ptr; } -- cgit v0.12