diff options
Diffstat (limited to 'Modules/_sre.c')
-rw-r--r-- | Modules/_sre.c | 120 |
1 files changed, 34 insertions, 86 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c index 8d9cb98..99c3cd5 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -4,25 +4,24 @@ * regular expression matching engine * * partial history: - * 1999-10-24 fl created (based on existing template matcher code) - * 2000-03-06 fl first alpha, sort of - * 2000-08-01 fl fixes for 1.6b1 - * 2000-08-07 fl use PyOS_CheckStack() if available - * 2000-09-20 fl added expand method - * 2001-03-20 fl lots of fixes for 2.1b2 - * 2001-04-15 fl export copyright as Python attribute, not global - * 2001-04-28 fl added __copy__ methods (work in progress) - * 2001-05-14 fl fixes for 1.5.2 compatibility - * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) - * 2001-10-18 fl fixed group reset issue (from Matthew Mueller) - * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1 - * 2001-10-21 fl added sub/subn primitive - * 2001-10-24 fl added finditer primitive (for 2.2 only) - * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum) - * 2002-11-09 fl fixed empty sub/subn return type - * 2003-04-18 mvl fully support 4-byte codes - * 2003-10-17 gn implemented non recursive scheme - * 2013-02-04 mrab added fullmatch primitive + * 1999-10-24 fl created (based on existing template matcher code) + * 2000-03-06 fl first alpha, sort of + * 2000-08-01 fl fixes for 1.6b1 + * 2000-08-07 fl use PyOS_CheckStack() if available + * 2000-09-20 fl added expand method + * 2001-03-20 fl lots of fixes for 2.1b2 + * 2001-04-15 fl export copyright as Python attribute, not global + * 2001-04-28 fl added __copy__ methods (work in progress) + * 2001-05-14 fl fixes for 1.5.2 compatibility + * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) + * 2001-10-18 fl fixed group reset issue (from Matthew Mueller) + * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1 + * 2001-10-21 fl added sub/subn primitive + * 2001-10-24 fl added finditer primitive (for 2.2 only) + * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum) + * 2002-11-09 fl fixed empty sub/subn return type + * 2003-04-18 mvl fully support 4-byte codes + * 2003-10-17 gn implemented non recursive scheme * * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. * @@ -747,12 +746,11 @@ do { \ #define JUMP_ASSERT 12 #define JUMP_ASSERT_NOT 13 -#define DO_JUMP(jumpvalue, jumplabel, nextpattern, matchall) \ +#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \ DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \ nextctx->last_ctx_pos = ctx_pos; \ nextctx->jump = jumpvalue; \ nextctx->pattern = nextpattern; \ - nextctx->match_all = matchall; \ ctx_pos = alloc_pos; \ ctx = nextctx; \ goto entrance; \ @@ -771,7 +769,6 @@ typedef struct { SRE_CODE chr; SRE_REPEAT* rep; } u; - int match_all; } SRE_MATCH_CONTEXT; /* check if string matches the given pattern. returns <0 for @@ -794,7 +791,6 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) ctx->last_ctx_pos = -1; ctx->jump = JUMP_NONE; ctx->pattern = pattern; - ctx->match_all = state->match_all; ctx_pos = alloc_pos; entrance: @@ -868,8 +864,6 @@ entrance: case SRE_OP_SUCCESS: /* end of pattern */ TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr)); - if (ctx->match_all && ctx->ptr != state->end) - RETURN_FAILURE; state->ptr = ctx->ptr; RETURN_SUCCESS; @@ -978,7 +972,7 @@ entrance: !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) SRE_CHARGET(state, ctx->ptr, 0)))) continue; state->ptr = ctx->ptr; - DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1, ctx->match_all); + DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1); if (ret) { if (ctx->u.rep) MARK_POP_DISCARD(ctx->lastmark); @@ -1025,8 +1019,7 @@ entrance: if (ctx->count < (Py_ssize_t) ctx->pattern[1]) RETURN_FAILURE; - if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && - (!ctx->match_all || ctx->ptr == state->end)) { + if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) { /* tail is empty. we're finished */ state->ptr = ctx->ptr; RETURN_SUCCESS; @@ -1049,7 +1042,7 @@ entrance: break; state->ptr = ctx->ptr; DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, - ctx->pattern+ctx->pattern[0], ctx->match_all); + ctx->pattern+ctx->pattern[0]); if (ret) { RETURN_ON_ERROR(ret); RETURN_SUCCESS; @@ -1066,7 +1059,7 @@ entrance: while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) { state->ptr = ctx->ptr; DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, - ctx->pattern+ctx->pattern[0], ctx->match_all); + ctx->pattern+ctx->pattern[0]); if (ret) { RETURN_ON_ERROR(ret); RETURN_SUCCESS; @@ -1111,8 +1104,7 @@ entrance: ctx->ptr += state->charsize * ctx->count; } - if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && - (!ctx->match_all || ctx->ptr == state->end)) { + if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) { /* tail is empty. we're finished */ state->ptr = ctx->ptr; RETURN_SUCCESS; @@ -1124,7 +1116,7 @@ entrance: || ctx->count <= (Py_ssize_t)ctx->pattern[2]) { state->ptr = ctx->ptr; DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, - ctx->pattern+ctx->pattern[0], ctx->match_all); + ctx->pattern+ctx->pattern[0]); if (ret) { RETURN_ON_ERROR(ret); RETURN_SUCCESS; @@ -1163,7 +1155,7 @@ entrance: state->repeat = ctx->u.rep; state->ptr = ctx->ptr; - DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0], ctx->match_all); + DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]); state->repeat = ctx->u.rep->prev; PyObject_FREE(ctx->u.rep); @@ -1195,7 +1187,7 @@ entrance: /* not enough matches */ ctx->u.rep->count = ctx->count; DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, - ctx->u.rep->pattern+3, ctx->match_all); + ctx->u.rep->pattern+3); if (ret) { RETURN_ON_ERROR(ret); RETURN_SUCCESS; @@ -1217,7 +1209,7 @@ entrance: DATA_PUSH(&ctx->u.rep->last_ptr); ctx->u.rep->last_ptr = state->ptr; DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, - ctx->u.rep->pattern+3, ctx->match_all); + ctx->u.rep->pattern+3); DATA_POP(&ctx->u.rep->last_ptr); if (ret) { MARK_POP_DISCARD(ctx->lastmark); @@ -1233,7 +1225,7 @@ entrance: /* cannot match more repeated items here. make sure the tail matches */ state->repeat = ctx->u.rep->prev; - DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern, ctx->match_all); + DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern); RETURN_ON_SUCCESS(ret); state->repeat = ctx->u.rep; state->ptr = ctx->ptr; @@ -1258,7 +1250,7 @@ entrance: /* not enough matches */ ctx->u.rep->count = ctx->count; DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, - ctx->u.rep->pattern+3, ctx->match_all); + ctx->u.rep->pattern+3); if (ret) { RETURN_ON_ERROR(ret); RETURN_SUCCESS; @@ -1272,7 +1264,7 @@ entrance: /* see if the tail matches */ state->repeat = ctx->u.rep->prev; - DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern, ctx->match_all); + DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern); if (ret) { RETURN_ON_ERROR(ret); RETURN_SUCCESS; @@ -1293,7 +1285,7 @@ entrance: DATA_PUSH(&ctx->u.rep->last_ptr); ctx->u.rep->last_ptr = state->ptr; DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, - ctx->u.rep->pattern+3, ctx->match_all); + ctx->u.rep->pattern+3); DATA_POP(&ctx->u.rep->last_ptr); if (ret) { RETURN_ON_ERROR(ret); @@ -1386,7 +1378,7 @@ entrance: state->ptr = ctx->ptr - state->charsize * ctx->pattern[1]; if (state->ptr < state->beginning) RETURN_FAILURE; - DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2, 0); + DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2); RETURN_ON_FAILURE(ret); ctx->pattern += ctx->pattern[0]; break; @@ -1398,7 +1390,7 @@ entrance: ctx->ptr, ctx->pattern[1])); state->ptr = ctx->ptr - state->charsize * ctx->pattern[1]; if (state->ptr >= state->beginning) { - DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2, 0); + DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2); if (ret) { RETURN_ON_ERROR(ret); RETURN_FAILURE; @@ -1918,44 +1910,6 @@ pattern_match(PatternObject* self, PyObject* args, PyObject* kw) } static PyObject* -pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw) -{ - SRE_STATE state; - Py_ssize_t status; - - PyObject* string; - Py_ssize_t start = 0; - Py_ssize_t end = PY_SSIZE_T_MAX; - static char* kwlist[] = { "pattern", "pos", "endpos", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:fullmatch", kwlist, - &string, &start, &end)) - return NULL; - - string = state_init(&state, self, string, start, end); - if (!string) - return NULL; - - state.match_all = 1; - state.ptr = state.start; - - TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr)); - - if (state.logical_charsize == 1) { - status = sre_match(&state, PatternObject_GetCode(self)); - } else { - status = sre_umatch(&state, PatternObject_GetCode(self)); - } - - TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); - if (PyErr_Occurred()) - return NULL; - - state_fini(&state); - - return pattern_new_match(self, &state, status); -} - -static PyObject* pattern_search(PatternObject* self, PyObject* args, PyObject* kw) { SRE_STATE state; @@ -2576,10 +2530,6 @@ PyDoc_STRVAR(pattern_match_doc, "match(string[, pos[, endpos]]) -> match object or None.\n\ Matches zero or more characters at the beginning of the string"); -PyDoc_STRVAR(pattern_fullmatch_doc, -"fullmatch(string[, pos[, endpos]]) -> match object or None.\n\ - Matches against all of the string"); - PyDoc_STRVAR(pattern_search_doc, "search(string[, pos[, endpos]]) -> match object or None.\n\ Scan through string looking for a match, and return a corresponding\n\ @@ -2615,8 +2565,6 @@ PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects"); static PyMethodDef pattern_methods[] = { {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS, pattern_match_doc}, - {"fullmatch", (PyCFunction) pattern_fullmatch, METH_VARARGS|METH_KEYWORDS, - pattern_fullmatch_doc}, {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS, pattern_search_doc}, {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS, |