diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2000-07-02 17:33:27 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2000-07-02 17:33:27 (GMT) |
commit | 7cafe4d7e466996d5fc32e871fe834e0e0c94282 (patch) | |
tree | dc3572d1d6bd95316c7a044cfd8639be014e3520 /Modules/_sre.c | |
parent | b19948b7fb96cfc2ed69bb58f2205d1399f1f9f5 (diff) | |
download | cpython-7cafe4d7e466996d5fc32e871fe834e0e0c94282.zip cpython-7cafe4d7e466996d5fc32e871fe834e0e0c94282.tar.gz cpython-7cafe4d7e466996d5fc32e871fe834e0e0c94282.tar.bz2 |
- actually enabled charset anchors in the engine (still not
used by the code generator)
- changed max repeat value in engine (to match earlier array fix)
- added experimental "which part matched?" mechanism to sre; see
http://hem.passagen.se/eff/2000_07_01_bot-archive.htm#416954
or python-dev for details.
Diffstat (limited to 'Modules/_sre.c')
-rw-r--r-- | Modules/_sre.c | 30 |
1 files changed, 26 insertions, 4 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c index 3bc0237..e11a892 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -21,6 +21,7 @@ * 00-06-29 fl fixed split, added more scanner features (0.9.2) * 00-06-30 fl added fast search optimization (0.9.3) * 00-06-30 fl added assert (lookahead) primitives, etc (0.9.4) + * 00-07-02 fl added charset optimizations, etc (0.9.5) * * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved. * @@ -31,7 +32,7 @@ #ifndef SRE_RECURSIVE -char copyright[] = " SRE 0.9.4 Copyright (c) 1997-2000 by Secret Labs AB "; +char copyright[] = " SRE 0.9.5 Copyright (c) 1997-2000 by Secret Labs AB "; #include "Python.h" @@ -587,6 +588,14 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) pattern++; break; + case SRE_OP_INDEX: + /* set index */ + /* args: <index> */ + TRACE(("%8d: set index %d\n", PTR(ptr), pattern[0])); + state->index = pattern[0]; + pattern++; + break; + case SRE_OP_JUMP: case SRE_OP_INFO: /* jump forward */ @@ -810,7 +819,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) /* match maximum number of items, pushing alternate end points to the stack */ - while (pattern[2] == 32767 || count < (int) pattern[2]) { + while (pattern[2] == 65535 || count < (int) pattern[2]) { state->stackbase = stack; i = SRE_MATCH(state, pattern + 3); state->stackbase = stackbase; /* rewind */ @@ -980,10 +989,12 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern) } if (flags & SRE_INFO_PREFIX) { + /* pattern starts with a known prefix */ prefix_len = pattern[5]; prefix = pattern + 6; overlap = prefix + prefix_len - 1; } else if (flags & SRE_INFO_CHARSET) + /* pattern starts with a character from a known set */ charset = pattern + 5; pattern += 1 + pattern[1]; @@ -1042,7 +1053,6 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern) if (status != 0) break; } -#if 0 } else if (charset) { /* pattern starts with a character from a known set */ for (;;) { @@ -1057,7 +1067,6 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern) if (status != 0) break; } -#endif } else /* general case */ while (ptr <= end) { @@ -1204,6 +1213,8 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* args) for (i = 0; i < SRE_MARK_SIZE; i++) state->mark[i] = NULL; + state->index = -1; + state->stack = NULL; state->stackbase = 0; state->stacksize = 0; @@ -1286,6 +1297,8 @@ pattern_new_match(PatternObject* pattern, SRE_STATE* state, } else match->mark[j+2] = match->mark[j+3] = -1; /* undefined */ + match->index = state->index; + return (PyObject*) match; } else if (status < 0) { @@ -1887,6 +1900,15 @@ match_getattr(MatchObject* self, char* name) if (!strcmp(name, "endpos")) return Py_BuildValue("i", 0); /* FIXME */ + if (!strcmp(name, "index")) { + /* experimental */ + if (self->index < 0) { + Py_INCREF(Py_None); + return Py_None; + } else + return Py_BuildValue("i", self->index); + } + PyErr_SetString(PyExc_AttributeError, name); return NULL; } |