diff options
author | Guido van Rossum <guido@python.org> | 2003-04-14 17:59:34 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2003-04-14 17:59:34 (GMT) |
commit | 41c99e7f96f7a0f192839801c568d8a80dcc7091 (patch) | |
tree | 5733d278d932546d0b642dce7e631512da483f76 /Modules | |
parent | 44c62ef5ee2d48cfa0bd024507f19e47d987e6b3 (diff) | |
download | cpython-41c99e7f96f7a0f192839801c568d8a80dcc7091.zip cpython-41c99e7f96f7a0f192839801c568d8a80dcc7091.tar.gz cpython-41c99e7f96f7a0f192839801c568d8a80dcc7091.tar.bz2 |
SF patch #720991 by Gary Herron:
A small fix for bug #545855 and Greg Chapman's
addition of op code SRE_OP_MIN_REPEAT_ONE for
eliminating recursion on simple uses of pattern '*?' on a
long string.
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/_sre.c | 60 | ||||
-rw-r--r-- | Modules/sre_constants.h | 1 |
2 files changed, 61 insertions, 0 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c index 619d39b..dde365b 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -993,6 +993,66 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) } return 0; + case SRE_OP_MIN_REPEAT_ONE: + /* match repeated sequence (minimizing regexp) */ + + /* this operator only works if the repeated item is + exactly one character wide, and we're not already + collecting backtracking points. for other cases, + use the MIN_REPEAT operator */ + + /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ + + TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr, + pattern[1], pattern[2])); + + if (ptr + pattern[1] > end) + return 0; /* cannot match */ + + state->ptr = ptr; + + if (pattern[1] == 0) + count = 0; + else { + /* count using pattern min as the maximum */ + count = SRE_COUNT(state, pattern + 3, pattern[1], level + 1); + + if (count < 0) + return count; /* exception */ + if (count < (int) pattern[1]) + return 0; /* did not match minimum number of times */ + ptr += count; /* advance past minimum matches of repeat */ + } + + if (pattern[pattern[0]] == SRE_OP_SUCCESS) { + /* tail is empty. we're finished */ + state->ptr = ptr; + return 1; + + } else { + /* general case */ + int matchmax = ((int)pattern[2] == 65535); + int c; + lastmark = state->lastmark; + while (matchmax || count <= (int) pattern[2]) { + state->ptr = ptr; + i = SRE_MATCH(state, pattern + pattern[0], level + 1); + if (i) + return i; + state->ptr = ptr; + c = SRE_COUNT(state, pattern+3, 1, level+1); + if (c < 0) + return c; + if (c == 0) + break; + assert(c == 1); + ptr++; + count++; + } + lastmark_restore(state, lastmark); + } + return 0; + case SRE_OP_REPEAT: /* create repeat context. all the hard work is done by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ diff --git a/Modules/sre_constants.h b/Modules/sre_constants.h index ebb9fd0..540008e 100644 --- a/Modules/sre_constants.h +++ b/Modules/sre_constants.h @@ -42,6 +42,7 @@ #define SRE_OP_REPEAT 27 #define SRE_OP_REPEAT_ONE 28 #define SRE_OP_SUBPATTERN 29 +#define SRE_OP_MIN_REPEAT_ONE 30 #define SRE_AT_BEGINNING 0 #define SRE_AT_BEGINNING_LINE 1 #define SRE_AT_BEGINNING_STRING 2 |