diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2000-08-01 22:47:49 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2000-08-01 22:47:49 (GMT) |
commit | e186983842f0b27606b141010513fa8e3d0cc5db (patch) | |
tree | 8160cdbd00dc449a79a25cfaa6a16069b4bd74b3 /Modules | |
parent | fb06539e999271ea9b07b754d461f2172d65978b (diff) | |
download | cpython-e186983842f0b27606b141010513fa8e3d0cc5db.zip cpython-e186983842f0b27606b141010513fa8e3d0cc5db.tar.gz cpython-e186983842f0b27606b141010513fa8e3d0cc5db.tar.bz2 |
final 0.9.8 updates:
-- added REPEAT_ONE operator
-- added ANY_ALL operator (used to represent "(?s).")
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/_sre.c | 61 | ||||
-rw-r--r-- | Modules/sre_constants.h | 51 |
2 files changed, 72 insertions, 40 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c index 69bc171..677edb8 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -448,6 +448,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) int i, count; SRE_REPEAT* rp; int lastmark; + SRE_CODE chr; SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */ @@ -525,8 +526,17 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) break; case SRE_OP_ANY: - /* match anything */ + /* match anything (except a newline) */ /* <ANY> */ + TRACE(("%8d: anything (except newline)\n", PTR(ptr))); + if (ptr >= end || SRE_IS_LINEBREAK(ptr[0])) + return 0; + ptr++; + break; + + case SRE_OP_ANY_ALL: + /* match anything */ + /* <ANY_ALL> */ TRACE(("%8d: anything\n", PTR(ptr))); if (ptr >= end) return 0; @@ -695,60 +705,79 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr), pattern[1], pattern[2])); + if (ptr + pattern[1] > end) + return 0; /* cannot match */ + count = 0; - if (pattern[3] == SRE_OP_ANY) { + switch (pattern[3]) { + + case SRE_OP_ANY: + /* repeated wildcard. */ + while (count < (int) pattern[2]) { + if (ptr >= end || SRE_IS_LINEBREAK(ptr[0])) + break; + ptr++; + count++; + } + break; + + case SRE_OP_ANY_ALL: /* repeated wildcard. skip to the end of the target string, and backtrack from there */ - /* FIXME: must look for line endings */ if (ptr + pattern[1] > end) return 0; /* cannot match */ count = pattern[2]; if (count > end - ptr) count = end - ptr; ptr += count; + break; - } else if (pattern[3] == SRE_OP_LITERAL) { + case SRE_OP_LITERAL: /* repeated literal */ - SRE_CODE chr = pattern[4]; + chr = pattern[4]; while (count < (int) pattern[2]) { if (ptr >= end || (SRE_CODE) ptr[0] != chr) break; ptr++; count++; } + break; - } else if (pattern[3] == SRE_OP_LITERAL_IGNORE) { + case SRE_OP_LITERAL_IGNORE: /* repeated literal */ - SRE_CODE chr = pattern[4]; + chr = pattern[4]; while (count < (int) pattern[2]) { if (ptr >= end || (SRE_CODE) state->lower(*ptr) != chr) break; ptr++; count++; } + break; - } else if (pattern[3] == SRE_OP_NOT_LITERAL) { + case SRE_OP_NOT_LITERAL: /* repeated non-literal */ - SRE_CODE chr = pattern[4]; + chr = pattern[4]; while (count < (int) pattern[2]) { if (ptr >= end || (SRE_CODE) ptr[0] == chr) break; ptr++; count++; } - - } else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) { + break; + + case SRE_OP_NOT_LITERAL_IGNORE: /* repeated non-literal */ - SRE_CODE chr = pattern[4]; + chr = pattern[4]; while (count < (int) pattern[2]) { if (ptr >= end || (SRE_CODE) state->lower(ptr[0]) == chr) break; ptr++; count++; } + break; - } else if (pattern[3] == SRE_OP_IN) { + case SRE_OP_IN: /* repeated set */ while (count < (int) pattern[2]) { if (ptr >= end || !SRE_MEMBER(pattern + 5, *ptr)) @@ -756,8 +785,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) ptr++; count++; } + break; - } else { + default: /* repeated single character pattern */ state->ptr = ptr; while (count < (int) pattern[2]) { @@ -770,6 +800,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) } state->ptr = ptr; ptr += count; + break; } /* when we arrive here, count contains the number of @@ -791,7 +822,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) } else if (pattern[pattern[0]] == SRE_OP_LITERAL) { /* tail starts with a literal. skip positions where the rest of the pattern cannot possibly match */ - SRE_CODE chr = pattern[pattern[0]+1]; + chr = pattern[pattern[0]+1]; TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr)); for (;;) { TRACE(("%8d: scan for tail match\n", PTR(ptr))); diff --git a/Modules/sre_constants.h b/Modules/sre_constants.h index 5cfe495..5c55c3d 100644 --- a/Modules/sre_constants.h +++ b/Modules/sre_constants.h @@ -14,31 +14,32 @@ #define SRE_OP_FAILURE 0 #define SRE_OP_SUCCESS 1 #define SRE_OP_ANY 2 -#define SRE_OP_ASSERT 3 -#define SRE_OP_ASSERT_NOT 4 -#define SRE_OP_AT 5 -#define SRE_OP_BRANCH 6 -#define SRE_OP_CALL 7 -#define SRE_OP_CATEGORY 8 -#define SRE_OP_CHARSET 9 -#define SRE_OP_GROUPREF 10 -#define SRE_OP_GROUPREF_IGNORE 11 -#define SRE_OP_IN 12 -#define SRE_OP_IN_IGNORE 13 -#define SRE_OP_INFO 14 -#define SRE_OP_JUMP 15 -#define SRE_OP_LITERAL 16 -#define SRE_OP_LITERAL_IGNORE 17 -#define SRE_OP_MARK 18 -#define SRE_OP_MAX_UNTIL 19 -#define SRE_OP_MIN_UNTIL 20 -#define SRE_OP_NOT_LITERAL 21 -#define SRE_OP_NOT_LITERAL_IGNORE 22 -#define SRE_OP_NEGATE 23 -#define SRE_OP_RANGE 24 -#define SRE_OP_REPEAT 25 -#define SRE_OP_REPEAT_ONE 26 -#define SRE_OP_SUBPATTERN 27 +#define SRE_OP_ANY_ALL 3 +#define SRE_OP_ASSERT 4 +#define SRE_OP_ASSERT_NOT 5 +#define SRE_OP_AT 6 +#define SRE_OP_BRANCH 7 +#define SRE_OP_CALL 8 +#define SRE_OP_CATEGORY 9 +#define SRE_OP_CHARSET 10 +#define SRE_OP_GROUPREF 11 +#define SRE_OP_GROUPREF_IGNORE 12 +#define SRE_OP_IN 13 +#define SRE_OP_IN_IGNORE 14 +#define SRE_OP_INFO 15 +#define SRE_OP_JUMP 16 +#define SRE_OP_LITERAL 17 +#define SRE_OP_LITERAL_IGNORE 18 +#define SRE_OP_MARK 19 +#define SRE_OP_MAX_UNTIL 20 +#define SRE_OP_MIN_UNTIL 21 +#define SRE_OP_NOT_LITERAL 22 +#define SRE_OP_NOT_LITERAL_IGNORE 23 +#define SRE_OP_NEGATE 24 +#define SRE_OP_RANGE 25 +#define SRE_OP_REPEAT 26 +#define SRE_OP_REPEAT_ONE 27 +#define SRE_OP_SUBPATTERN 28 #define SRE_AT_BEGINNING 0 #define SRE_AT_BEGINNING_LINE 1 #define SRE_AT_BOUNDARY 2 |