summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2003-04-14 17:59:34 (GMT)
committerGuido van Rossum <guido@python.org>2003-04-14 17:59:34 (GMT)
commit41c99e7f96f7a0f192839801c568d8a80dcc7091 (patch)
tree5733d278d932546d0b642dce7e631512da483f76 /Modules
parent44c62ef5ee2d48cfa0bd024507f19e47d987e6b3 (diff)
downloadcpython-41c99e7f96f7a0f192839801c568d8a80dcc7091.zip
cpython-41c99e7f96f7a0f192839801c568d8a80dcc7091.tar.gz
cpython-41c99e7f96f7a0f192839801c568d8a80dcc7091.tar.bz2
SF patch #720991 by Gary Herron:
A small fix for bug #545855 and Greg Chapman's addition of op code SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of pattern '*?' on a long string.
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_sre.c60
-rw-r--r--Modules/sre_constants.h1
2 files changed, 61 insertions, 0 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 619d39b..dde365b 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -993,6 +993,66 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
}
return 0;
+ case SRE_OP_MIN_REPEAT_ONE:
+ /* match repeated sequence (minimizing regexp) */
+
+ /* this operator only works if the repeated item is
+ exactly one character wide, and we're not already
+ collecting backtracking points. for other cases,
+ use the MIN_REPEAT operator */
+
+ /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
+
+ TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
+ pattern[1], pattern[2]));
+
+ if (ptr + pattern[1] > end)
+ return 0; /* cannot match */
+
+ state->ptr = ptr;
+
+ if (pattern[1] == 0)
+ count = 0;
+ else {
+ /* count using pattern min as the maximum */
+ count = SRE_COUNT(state, pattern + 3, pattern[1], level + 1);
+
+ if (count < 0)
+ return count; /* exception */
+ if (count < (int) pattern[1])
+ return 0; /* did not match minimum number of times */
+ ptr += count; /* advance past minimum matches of repeat */
+ }
+
+ if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
+ /* tail is empty. we're finished */
+ state->ptr = ptr;
+ return 1;
+
+ } else {
+ /* general case */
+ int matchmax = ((int)pattern[2] == 65535);
+ int c;
+ lastmark = state->lastmark;
+ while (matchmax || count <= (int) pattern[2]) {
+ state->ptr = ptr;
+ i = SRE_MATCH(state, pattern + pattern[0], level + 1);
+ if (i)
+ return i;
+ state->ptr = ptr;
+ c = SRE_COUNT(state, pattern+3, 1, level+1);
+ if (c < 0)
+ return c;
+ if (c == 0)
+ break;
+ assert(c == 1);
+ ptr++;
+ count++;
+ }
+ lastmark_restore(state, lastmark);
+ }
+ return 0;
+
case SRE_OP_REPEAT:
/* create repeat context. all the hard work is done
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
diff --git a/Modules/sre_constants.h b/Modules/sre_constants.h
index ebb9fd0..540008e 100644
--- a/Modules/sre_constants.h
+++ b/Modules/sre_constants.h
@@ -42,6 +42,7 @@
#define SRE_OP_REPEAT 27
#define SRE_OP_REPEAT_ONE 28
#define SRE_OP_SUBPATTERN 29
+#define SRE_OP_MIN_REPEAT_ONE 30
#define SRE_AT_BEGINNING 0
#define SRE_AT_BEGINNING_LINE 1
#define SRE_AT_BEGINNING_STRING 2