From 72b82ba16dea929b3fa9db5208b2353e8449c2d5 Mon Sep 17 00:00:00 2001 From: Fredrik Lundh Date: Mon, 3 Jul 2000 21:31:48 +0000 Subject: - fixed grouping error bug - changed "group" operator to "groupref" --- Lib/sre_compile.py | 19 +++++++++++++------ Lib/sre_constants.py | 8 ++++---- Lib/sre_parse.py | 4 ++-- Lib/test/output/test_sre | 3 --- Modules/_sre.c | 49 ++++++++++++++++++++++++++++++++---------------- Modules/sre.h | 3 +++ 6 files changed, 55 insertions(+), 31 deletions(-) diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index 701b267..828b170 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -148,18 +148,25 @@ def _compile(code, pattern, flags): skip = len(code); emit(0) emit(av[0]) emit(av[1]) + mark = MAXCODE + if av[2][0][0] == SUBPATTERN: + # repeated subpattern + gid, foo = av[2][0][1] + if gid: + mark = (gid-1)*2 + emit(mark) _compile(code, av[2], flags) emit(OPCODES[SUCCESS]) code[skip] = len(code) - skip elif op is SUBPATTERN: - group = av[0] - if group: + gid = av[0] + if gid: emit(OPCODES[MARK]) - emit((group-1)*2) + emit((gid-1)*2) _compile(code, av[1], flags) - if group: + if gid: emit(OPCODES[MARK]) - emit((group-1)*2+1) + emit((gid-1)*2+1) elif op in (SUCCESS, FAILURE): emit(OPCODES[op]) elif op in (ASSERT, ASSERT_NOT): @@ -207,7 +214,7 @@ def _compile(code, pattern, flags): emit(CHCODES[CH_UNICODE[av]]) else: emit(CHCODES[av]) - elif op is GROUP: + elif op is GROUPREF: if flags & SRE_FLAG_IGNORECASE: emit(OPCODES[OP_IGNORE[op]]) else: diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py index 076637d..ef32c32 100644 --- a/Lib/sre_constants.py +++ b/Lib/sre_constants.py @@ -29,8 +29,8 @@ BRANCH = "branch" CALL = "call" CATEGORY = "category" CHARSET = "charset" -GROUP = "group" -GROUP_IGNORE = "group_ignore" +GROUPREF = "groupref" +GROUPREF_IGNORE = "groupref_ignore" IN = "in" IN_IGNORE = "in_ignore" INDEX = "index" @@ -90,7 +90,7 @@ OPCODES = [ CALL, CATEGORY, CHARSET, - GROUP, GROUP_IGNORE, + GROUPREF, GROUPREF_IGNORE, INDEX, IN, IN_IGNORE, INFO, @@ -136,7 +136,7 @@ CHCODES = makedict(CHCODES) # replacement operations for "ignore case" mode OP_IGNORE = { - GROUP: GROUP_IGNORE, + GROUPREF: GROUPREF_IGNORE, IN: IN_IGNORE, LITERAL: LITERAL_IGNORE, NOT_LITERAL: NOT_LITERAL_IGNORE diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 07ab782..053335a 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -241,7 +241,7 @@ def _escape(source, escape, state): if group: if (not source.next or not _group(escape + source.next, state.groups)): - return GROUP, group + return GROUPREF, group escape = escape + source.get() elif source.next in OCTDIGITS: escape = escape + source.get() @@ -450,7 +450,7 @@ def _parse(source, state): gid = state.groupdict.get(name) if gid is None: raise error, "unknown group name" - subpattern.append((GROUP, gid)) + subpattern.append((GROUPREF, gid)) elif source.match("#"): index = "" while 1: diff --git a/Lib/test/output/test_sre b/Lib/test/output/test_sre index 3ba209d..d949f25 100644 --- a/Lib/test/output/test_sre +++ b/Lib/test/output/test_sre @@ -1,7 +1,4 @@ test_sre === Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A') === Failed incorrectly ('(a+)+\\1', 'aa', 0, 'found+"-"+g1', 'aa-a') -=== grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/' -=== grouping error ('([abc])*bcd', 'abcd', 0, 'found+"-"+g1', 'abcd-a') 'abcd-c' should be 'abcd-a' -=== grouping error ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') 'ABCD-C' should be 'ABCD-A' === Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A') diff --git a/Modules/_sre.c b/Modules/_sre.c index 764e155..cac480d 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -406,6 +406,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) int stackbase; int lastmark; int i, count; + SRE_STACK* sp; /* FIXME: this is a hack! */ void* mark_copy[SRE_MARK_SIZE]; @@ -571,8 +572,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) /* set mark */ /* args: */ TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0])); - if (state->lastmark < pattern[0]) - state->lastmark = pattern[0]; + if (state->lastmark < pattern[0]+1) + state->lastmark = pattern[0]+1; if (!mark) { mark = mark_copy; memcpy(mark, state->mark, state->lastmark*sizeof(void*)); @@ -780,10 +781,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) #endif case SRE_OP_MAX_REPEAT: - /* match repeated sequence (maximizing regexp). repeated - group should end with a MAX_UNTIL code */ - - /* args: */ + /* match repeated sequence (maximizing regexp) */ + /* args: <1=min> <2=max> <3=save> <4=item> */ TRACE(("%8d: max repeat (%d %d)\n", PTR(ptr), pattern[1], pattern[2])); @@ -793,7 +792,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) /* match minimum number of items */ while (count < (int) pattern[1]) { - i = SRE_MATCH(state, pattern + 3); + i = SRE_MATCH(state, pattern + 4); if (i < 0) return i; if (!i) @@ -817,8 +816,13 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) points to the stack */ while (pattern[2] == 65535 || count < (int) pattern[2]) { + void *mark0, *mark1; + if (pattern[3] != 65535) { + mark0 = state->mark[pattern[3]]; + mark1 = state->mark[pattern[3]+1]; + } state->stackbase = stack; - i = SRE_MATCH(state, pattern + 3); + i = SRE_MATCH(state, pattern + 4); state->stackbase = stackbase; /* rewind */ if (i < 0) return i; @@ -837,8 +841,14 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) return i; /* out of memory */ } TRACE(("%8d: stack[%d] = %d\n", PTR(ptr), stack, PTR(ptr))); - state->stack[stack].ptr = ptr; - state->stack[stack].pattern = pattern + pattern[0]; + sp = state->stack + stack; + sp->ptr = ptr; + sp->pattern = pattern + pattern[0]; + sp->mark = pattern[3]; + if (pattern[3] != 65535) { + sp->mark0 = mark0; + sp->mark1 = mark1; + } stack++; /* move forward */ ptr = state->ptr; @@ -855,13 +865,15 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) case SRE_OP_MIN_REPEAT: /* match repeated sequence (minimizing regexp) */ + /* args: <1=min> <2=max> <3=save> <4=item> */ + TRACE(("%8d: min repeat %d %d\n", PTR(ptr), pattern[1], pattern[2])); count = 0; state->ptr = ptr; /* match minimum number of items */ while (count < (int) pattern[1]) { - i = SRE_MATCH(state, pattern + 3); + i = SRE_MATCH(state, pattern + 4); if (i < 0) return i; if (!i) @@ -877,7 +889,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) goto success; } state->ptr = ptr; /* backtrack */ - i = SRE_MATCH(state, pattern + 3); + i = SRE_MATCH(state, pattern + 4); if (i < 0) return i; if (!i) @@ -940,15 +952,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) } failure: + TRACE(("%8d: leave (failure)\n", PTR(ptr))); if (stack-- > stackbase) { - ptr = state->stack[stack].ptr; - pattern = state->stack[stack].pattern; + sp = state->stack + stack; + ptr = sp->ptr; + pattern = sp->pattern; + if (sp->mark != 65535) { + state->mark[sp->mark] = sp->mark0; + state->mark[sp->mark+1] = sp->mark1; + } TRACE(("%8d: retry (%d)\n", PTR(ptr), stack)); goto retry; } - TRACE(("%8d: leave (failure)\n", PTR(ptr))); - state->stackbase = stackbase; state->lastmark = lastmark; + state->stackbase = stackbase; if (mark) memcpy(state->mark, mark, state->lastmark*sizeof(void*)); return 0; diff --git a/Modules/sre.h b/Modules/sre.h index d4e93da..1c4bf68 100644 --- a/Modules/sre.h +++ b/Modules/sre.h @@ -46,6 +46,9 @@ typedef struct { /* stack elements */ SRE_CODE* pattern; void* ptr; + int mark; + void* mark0; + void* mark1; } SRE_STACK; /* FIXME: shouldn't be a constant, really... */ -- cgit v0.12