diff options
Diffstat (limited to 'Modules/_sre.c')
-rw-r--r-- | Modules/_sre.c | 65 |
1 files changed, 43 insertions, 22 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c index 48193f8..506363d 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -427,6 +427,12 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, state->lastmark = -1; state->lastindex = -1; + state->repeats_array = PyMem_New(SRE_REPEAT, pattern->repeat_count); + if (!state->repeats_array) { + PyErr_NoMemory(); + goto err; + } + state->buffer.buf = NULL; ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer); if (!ptr) @@ -476,6 +482,9 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, safely casted to `void*`, see bpo-39943 for details. */ PyMem_Free((void*) state->mark); state->mark = NULL; + PyMem_Free(state->repeats_array); + state->repeats_array = NULL; + if (state->buffer.buf) PyBuffer_Release(&state->buffer); return NULL; @@ -491,6 +500,8 @@ state_fini(SRE_STATE* state) /* See above PyMem_Del for why we explicitly cast here. */ PyMem_Free((void*) state->mark); state->mark = NULL; + PyMem_Free(state->repeats_array); + state->repeats_array = NULL; } /* calculate offset from start of string */ @@ -1407,14 +1418,15 @@ _sre.compile groups: Py_ssize_t groupindex: object(subclass_of='&PyDict_Type') indexgroup: object(subclass_of='&PyTuple_Type') + repeat_count: Py_ssize_t [clinic start generated code]*/ static PyObject * _sre_compile_impl(PyObject *module, PyObject *pattern, int flags, PyObject *code, Py_ssize_t groups, PyObject *groupindex, - PyObject *indexgroup) -/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/ + PyObject *indexgroup, Py_ssize_t repeat_count) +/*[clinic end generated code: output=922af562d51b1657 input=77e39c322501ec2a]*/ { /* "compile" pattern descriptor to pattern object */ @@ -1472,8 +1484,8 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags, self->pattern = pattern; self->flags = flags; - self->groups = groups; + self->repeat_count = repeat_count; if (PyDict_GET_SIZE(groupindex) > 0) { Py_INCREF(groupindex); @@ -1645,7 +1657,7 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end) } static int -_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) +_validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self) { /* Some variables are manipulated by the macros above */ SRE_CODE op; @@ -1666,8 +1678,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) sre_match() code is robust even if they don't, and the worst you can get is nonsensical match results. */ GET_ARG; - if (arg > 2 * (size_t)groups + 1) { - VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups)); + if (arg > 2 * (size_t)self->groups + 1) { + VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)self->groups)); FAIL; } break; @@ -1796,7 +1808,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) if (skip == 0) break; /* Stop 2 before the end; we check the JUMP below */ - if (!_validate_inner(code, code+skip-3, groups)) + if (!_validate_inner(code, code+skip-3, self)) FAIL; code += skip-3; /* Check that it ends with a JUMP, and that each JUMP @@ -1825,7 +1837,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) FAIL; if (max > SRE_MAXREPEAT) FAIL; - if (!_validate_inner(code, code+skip-4, groups)) + if (!_validate_inner(code, code+skip-4, self)) FAIL; code += skip-4; GET_OP; @@ -1837,7 +1849,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) case SRE_OP_REPEAT: case SRE_OP_POSSESSIVE_REPEAT: { - SRE_CODE op1 = op, min, max; + SRE_CODE op1 = op, min, max, repeat_index; GET_SKIP; GET_ARG; min = arg; GET_ARG; max = arg; @@ -1845,9 +1857,17 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) FAIL; if (max > SRE_MAXREPEAT) FAIL; - if (!_validate_inner(code, code+skip-3, groups)) + if (op1 == SRE_OP_REPEAT) { + GET_ARG; repeat_index = arg; + if (repeat_index >= (size_t)self->repeat_count) + FAIL; + skip -= 4; + } else { + skip -= 3; + } + if (!_validate_inner(code, code+skip, self)) FAIL; - code += skip-3; + code += skip; GET_OP; if (op1 == SRE_OP_POSSESSIVE_REPEAT) { if (op != SRE_OP_SUCCESS) @@ -1863,7 +1883,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) case SRE_OP_ATOMIC_GROUP: { GET_SKIP; - if (!_validate_inner(code, code+skip-2, groups)) + if (!_validate_inner(code, code+skip-2, self)) FAIL; code += skip-2; GET_OP; @@ -1877,7 +1897,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) case SRE_OP_GROUPREF_UNI_IGNORE: case SRE_OP_GROUPREF_LOC_IGNORE: GET_ARG; - if (arg >= (size_t)groups) + if (arg >= (size_t)self->groups) FAIL; break; @@ -1886,7 +1906,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) 'group' is either an integer group number or a group name, 'then' and 'else' are sub-regexes, and 'else' is optional. */ GET_ARG; - if (arg >= (size_t)groups) + if (arg >= (size_t)self->groups) FAIL; GET_SKIP_ADJ(1); code--; /* The skip is relative to the first arg! */ @@ -1919,17 +1939,17 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) code[skip-3] == SRE_OP_JUMP) { VTRACE(("both then and else parts present\n")); - if (!_validate_inner(code+1, code+skip-3, groups)) + if (!_validate_inner(code+1, code+skip-3, self)) FAIL; code += skip-2; /* Position after JUMP, at <skipno> */ GET_SKIP; - if (!_validate_inner(code, code+skip-1, groups)) + if (!_validate_inner(code, code+skip-1, self)) FAIL; code += skip-1; } else { VTRACE(("only a then part present\n")); - if (!_validate_inner(code+1, code+skip-1, groups)) + if (!_validate_inner(code+1, code+skip-1, self)) FAIL; code += skip-1; } @@ -1943,7 +1963,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) if (arg & 0x80000000) FAIL; /* Width too large */ /* Stop 1 before the end; we check the SUCCESS below */ - if (!_validate_inner(code+1, code+skip-2, groups)) + if (!_validate_inner(code+1, code+skip-2, self)) FAIL; code += skip-2; GET_OP; @@ -1962,18 +1982,19 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) } static int -_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) +_validate_outer(SRE_CODE *code, SRE_CODE *end, PatternObject *self) { - if (groups < 0 || (size_t)groups > SRE_MAXGROUPS || + if (self->groups < 0 || (size_t)self->groups > SRE_MAXGROUPS || + self->repeat_count < 0 || code >= end || end[-1] != SRE_OP_SUCCESS) FAIL; - return _validate_inner(code, end-1, groups); + return _validate_inner(code, end-1, self); } static int _validate(PatternObject *self) { - if (!_validate_outer(self->code, self->code+self->codesize, self->groups)) + if (!_validate_outer(self->code, self->code+self->codesize, self)) { PyErr_SetString(PyExc_RuntimeError, "invalid SRE code"); return 0; |