From ebc37b28fa3fd66336116447b7c2b9b1c2614630 Mon Sep 17 00:00:00 2001 From: Fredrik Lundh Date: Sat, 28 Oct 2000 19:30:41 +0000 Subject: -- properly reset groups in findall (bug #117612) -- fixed negative lookbehind to work correctly at the beginning of the target string (bug #117242) -- improved syntax check; you can no longer refer to a group inside itself (bug #110866) --- Lib/sre_parse.py | 14 ++++++++++++-- Lib/test/test_sre.py | 3 +++ Modules/_sre.c | 33 ++++++++++++++++++--------------- 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 7c36d4f..5334e06 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -62,14 +62,20 @@ class Pattern: # master pattern object. keeps track of global attributes def __init__(self): self.flags = 0 + self.open = [] self.groups = 1 self.groupdict = {} - def getgroup(self, name=None): + def opengroup(self, name=None): gid = self.groups self.groups = gid + 1 if name: self.groupdict[name] = gid + self.open.append(gid) return gid + def closegroup(self, gid): + self.open.remove(gid) + def checkgroup(self, gid): + return gid < self.groups and gid not in self.open class SubPattern: # a subpattern, in intermediate form @@ -278,6 +284,8 @@ def _escape(source, escape, state): # got at least one decimal digit; this is a group reference group = _group(escape, state.groups) if group: + if not state.checkgroup(group): + raise error, "cannot refer to open group" return GROUPREF, group raise ValueError if len(escape) == 2: @@ -547,10 +555,12 @@ def _parse(source, state): # anonymous group group = None else: - group = state.getgroup(name) + group = state.opengroup(name) p = _parse_sub(source, state) if not source.match(")"): raise error, "unbalanced parenthesis" + if group is not None: + state.closegroup(group) subpattern.append((SUBPATTERN, (group, p))) else: while 1: diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py index 373efa0..b9692a1 100644 --- a/Lib/test/test_sre.py +++ b/Lib/test/test_sre.py @@ -167,6 +167,9 @@ test(r"""sre.findall(r"(:)(:*)", "a:b::c:::d")""", [(":", ""), (":", ":"), (":", "::")]) test(r"""sre.findall(r"(a)|(b)", "abc")""", [("a", ""), ("", "b")]) +# bug 117612 +test(r"""sre.findall(r"(a|(b))", "aba")""", [("a", ""),("b", "b"),("a", "")]) + if verbose: print "Running tests on sre.match" diff --git a/Modules/_sre.c b/Modules/_sre.c index b72b8b2..954547f 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -5,14 +5,14 @@ * * partial history: * 1999-10-24 fl created (based on existing template matcher code) - * 2000-03-06 fl first alpha, sort of (0.5) - * 2000-06-30 fl added fast search optimization (0.9.3) - * 2000-06-30 fl added assert (lookahead) primitives, etc (0.9.4) - * 2000-07-02 fl added charset optimizations, etc (0.9.5) + * 2000-03-06 fl first alpha, sort of + * 2000-06-30 fl added fast search optimization + * 2000-06-30 fl added assert (lookahead) primitives, etc + * 2000-07-02 fl added charset optimizations, etc * 2000-07-03 fl store code in pattern object, lookbehind, etc * 2000-07-08 fl added regs attribute - * 2000-07-21 fl reset lastindex in scanner methods (0.9.6) - * 2000-08-01 fl fixes for 1.6b1 (0.9.8) + * 2000-07-21 fl reset lastindex in scanner methods + * 2000-08-01 fl fixes for 1.6b1 * 2000-08-03 fl added recursion limit * 2000-08-07 fl use PyOS_CheckStack() if available * 2000-08-08 fl changed findall to return empty strings instead of None @@ -21,6 +21,7 @@ * 2000-09-20 fl added expand method * 2000-09-21 fl don't use the buffer interface for unicode strings * 2000-10-03 fl fixed assert_not primitive; support keyword arguments + * 2000-10-24 fl really fixed assert_not; reset groups in findall * * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved. * @@ -35,7 +36,7 @@ #ifndef SRE_RECURSIVE -char copyright[] = " SRE 0.9.8 Copyright (c) 1997-2000 by Secret Labs AB "; +char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2000 by Secret Labs AB "; #include "Python.h" @@ -783,13 +784,13 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) /* */ TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1])); state->ptr = ptr - pattern[1]; - if (state->ptr < state->beginning) - return 0; - i = SRE_MATCH(state, pattern + 2, level + 1); - if (i < 0) - return i; - if (i) - return 0; + if (state->ptr >= state->beginning) { + i = SRE_MATCH(state, pattern + 2, level + 1); + if (i < 0) + return i; + if (i) + return 0; + } pattern += pattern[0]; break; @@ -1199,7 +1200,7 @@ _compile(PyObject* self_, PyObject* args) n = PySequence_Length(code); #endif - self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, 100*n); + self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n); if (!self) { Py_DECREF(code); return NULL; @@ -1680,6 +1681,8 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw) PyObject* item; + state_reset(&state); + state.ptr = state.start; if (state.charsize == 1) { -- cgit v0.12