diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2014-09-29 19:49:23 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2014-09-29 19:49:23 (GMT) |
commit | 9baa5b2de2e1bd4d56791de8144f737f65b89c74 (patch) | |
tree | 59cff121a4d2becfe389a825f1e2b66e8a839c45 /Modules | |
parent | c31e6227f94c2bb0290336c739873173672a8991 (diff) | |
download | cpython-9baa5b2de2e1bd4d56791de8144f737f65b89c74.zip cpython-9baa5b2de2e1bd4d56791de8144f737f65b89c74.tar.gz cpython-9baa5b2de2e1bd4d56791de8144f737f65b89c74.tar.bz2 |
Issue #22437: Number of capturing groups in regular expression is no longer
limited by 100.
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/_sre.c | 51 | ||||
-rw-r--r-- | Modules/sre.h | 7 |
2 files changed, 40 insertions, 18 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c index 13479ba..5c3d105 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -357,6 +357,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, memset(state, 0, sizeof(SRE_STATE)); + state->mark = PyMem_New(void *, pattern->groups * 2); + if (!state->mark) { + PyErr_NoMemory(); + goto err; + } state->lastmark = -1; state->lastindex = -1; @@ -409,6 +414,8 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, return string; err: + PyMem_Del(state->mark); + state->mark = NULL; if (state->buffer.buf) PyBuffer_Release(&state->buffer); return NULL; @@ -421,6 +428,8 @@ state_fini(SRE_STATE* state) PyBuffer_Release(&state->buffer); Py_XDECREF(state->string); data_stack_dealloc(state); + PyMem_Del(state->mark); + state->mark = NULL; } /* calculate offset from start of string */ @@ -560,6 +569,7 @@ pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs) PyObject *pattern = NULL; SRE_STATE state; Py_ssize_t status; + PyObject *match; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|Onn$O:match", _keywords, @@ -579,12 +589,14 @@ pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs) status = sre_match(&state, PatternObject_GetCode(self), 0); TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); - if (PyErr_Occurred()) + if (PyErr_Occurred()) { + state_fini(&state); return NULL; + } + match = pattern_new_match(self, &state, status); state_fini(&state); - - return (PyObject *)pattern_new_match(self, &state, status); + return match; } static PyObject* @@ -592,6 +604,7 @@ pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw) { SRE_STATE state; Py_ssize_t status; + PyObject *match; PyObject *string = NULL, *string2 = NULL; Py_ssize_t start = 0; @@ -616,12 +629,14 @@ pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw) status = sre_match(&state, PatternObject_GetCode(self), 1); TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); - if (PyErr_Occurred()) + if (PyErr_Occurred()) { + state_fini(&state); return NULL; + } + match = pattern_new_match(self, &state, status); state_fini(&state); - - return pattern_new_match(self, &state, status); + return match; } static PyObject* @@ -629,6 +644,7 @@ pattern_search(PatternObject* self, PyObject* args, PyObject* kw) { SRE_STATE state; Py_ssize_t status; + PyObject *match; PyObject *string = NULL, *string2 = NULL; Py_ssize_t start = 0; @@ -652,12 +668,14 @@ pattern_search(PatternObject* self, PyObject* args, PyObject* kw) TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); - state_fini(&state); - - if (PyErr_Occurred()) + if (PyErr_Occurred()) { + state_fini(&state); return NULL; + } - return pattern_new_match(self, &state, status); + match = pattern_new_match(self, &state, status); + state_fini(&state); + return match; } static PyObject* @@ -1417,7 +1435,7 @@ _compile(PyObject* self_, PyObject* args) PyObject* groupindex = NULL; PyObject* indexgroup = NULL; - if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags, + if (!PyArg_ParseTuple(args, "OiO!nOO", &pattern, &flags, &PyList_Type, &code, &groups, &groupindex, &indexgroup)) return NULL; @@ -1933,10 +1951,9 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) static int _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) { - if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS) + if (groups < 0 || (size_t)groups > SRE_MAXGROUPS || + code >= end || end[-1] != SRE_OP_SUCCESS) FAIL; - if (groups == 0) /* fix for simplejson */ - groups = 100; /* 100 groups should always be safe */ return _validate_inner(code, end-1, groups); } @@ -2747,6 +2764,12 @@ PyMODINIT_FUNC PyInit__sre(void) Py_DECREF(x); } + x = PyLong_FromUnsignedLong(SRE_MAXGROUPS); + if (x) { + PyDict_SetItemString(d, "MAXGROUPS", x); + Py_DECREF(x); + } + x = PyUnicode_FromString(copyright); if (x) { PyDict_SetItemString(d, "copyright", x); diff --git a/Modules/sre.h b/Modules/sre.h index 42fe28d..35d198f 100644 --- a/Modules/sre.h +++ b/Modules/sre.h @@ -18,8 +18,10 @@ #define SRE_CODE Py_UCS4 #if SIZEOF_SIZE_T > 4 # define SRE_MAXREPEAT (~(SRE_CODE)0) +# define SRE_MAXGROUPS ((~(SRE_CODE)0) / 2) #else # define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX) +# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_SIZE_T / 2) #endif typedef struct { @@ -52,9 +54,6 @@ typedef struct { typedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch); -/* FIXME: <fl> shouldn't be a constant, really... */ -#define SRE_MARK_SIZE 200 - typedef struct SRE_REPEAT_T { Py_ssize_t count; SRE_CODE* pattern; /* points to REPEAT operator arguments */ @@ -76,7 +75,7 @@ typedef struct { /* registers */ Py_ssize_t lastindex; Py_ssize_t lastmark; - void* mark[SRE_MARK_SIZE]; + void** mark; /* dynamically allocated stuff */ char* data_stack; size_t data_stack_size; |