summaryrefslogtreecommitdiffstats
path: root/Modules/_sre
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2022-06-17 08:43:56 (GMT)
committerGitHub <noreply@github.com>2022-06-17 08:43:56 (GMT)
commit029835d9d4073d34d95bde9f969a6b0f969fda00 (patch)
tree3675e1fae9fc9df5a16f61b40a8afc881e0d6993 /Modules/_sre
parent5ee86d43067f46d8a99134bfaf01ebb98a71e295 (diff)
downloadcpython-029835d9d4073d34d95bde9f969a6b0f969fda00.zip
cpython-029835d9d4073d34d95bde9f969a6b0f969fda00.tar.gz
cpython-029835d9d4073d34d95bde9f969a6b0f969fda00.tar.bz2
gh-91404: Revert "bpo-23689: re module, fix memory leak when a match is terminated by a signal or allocation failure (GH-32283) (GH-93882)
Revert "bpo-23689: re module, fix memory leak when a match is terminated by a signal or memory allocation failure (GH-32283)" This reverts commit 6e3eee5c11b539e9aab39cff783acf57838c355a. Manual fixups to increase the MAGIC number and to handle conflicts with a couple of changes that landed after that. Thanks for reviews by Ma Lin and Serhiy Storchaka. (cherry picked from commit 4beee0c7b0c2cc78a893dde88fd8e34099dcf877) Co-authored-by: Gregory P. Smith <greg@krypto.org>
Diffstat (limited to 'Modules/_sre')
-rw-r--r--Modules/_sre/clinic/sre.c.h27
-rw-r--r--Modules/_sre/sre.c65
-rw-r--r--Modules/_sre/sre.h4
-rw-r--r--Modules/_sre/sre_constants.h2
-rw-r--r--Modules/_sre/sre_lib.h30
5 files changed, 47 insertions, 81 deletions
diff --git a/Modules/_sre/clinic/sre.c.h b/Modules/_sre/clinic/sre.c.h
index e243c75..048a494 100644
--- a/Modules/_sre/clinic/sre.c.h
+++ b/Modules/_sre/clinic/sre.c.h
@@ -764,7 +764,7 @@ PyDoc_STRVAR(_sre_SRE_Pattern___deepcopy____doc__,
PyDoc_STRVAR(_sre_compile__doc__,
"compile($module, /, pattern, flags, code, groups, groupindex,\n"
-" indexgroup, repeat_count)\n"
+" indexgroup)\n"
"--\n"
"\n");
@@ -774,24 +774,23 @@ PyDoc_STRVAR(_sre_compile__doc__,
static PyObject *
_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
PyObject *code, Py_ssize_t groups, PyObject *groupindex,
- PyObject *indexgroup, Py_ssize_t repeat_count);
+ PyObject *indexgroup);
static PyObject *
_sre_compile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
- static const char * const _keywords[] = {"pattern", "flags", "code", "groups", "groupindex", "indexgroup", "repeat_count", NULL};
+ static const char * const _keywords[] = {"pattern", "flags", "code", "groups", "groupindex", "indexgroup", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "compile", 0};
- PyObject *argsbuf[7];
+ PyObject *argsbuf[6];
PyObject *pattern;
int flags;
PyObject *code;
Py_ssize_t groups;
PyObject *groupindex;
PyObject *indexgroup;
- Py_ssize_t repeat_count;
- args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 7, 7, 0, argsbuf);
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 6, 6, 0, argsbuf);
if (!args) {
goto exit;
}
@@ -827,19 +826,7 @@ _sre_compile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject
goto exit;
}
indexgroup = args[5];
- {
- Py_ssize_t ival = -1;
- PyObject *iobj = _PyNumber_Index(args[6]);
- if (iobj != NULL) {
- ival = PyLong_AsSsize_t(iobj);
- Py_DECREF(iobj);
- }
- if (ival == -1 && PyErr_Occurred()) {
- goto exit;
- }
- repeat_count = ival;
- }
- return_value = _sre_compile_impl(module, pattern, flags, code, groups, groupindex, indexgroup, repeat_count);
+ return_value = _sre_compile_impl(module, pattern, flags, code, groups, groupindex, indexgroup);
exit:
return return_value;
@@ -1129,4 +1116,4 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyTypeObject *cls, PyObject *const
}
return _sre_SRE_Scanner_search_impl(self, cls);
}
-/*[clinic end generated code: output=97e7ce058366760b input=a9049054013a1b77]*/
+/*[clinic end generated code: output=fd2f45c941620e6e input=a9049054013a1b77]*/
diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c
index 491734f..0a7019a 100644
--- a/Modules/_sre/sre.c
+++ b/Modules/_sre/sre.c
@@ -427,12 +427,6 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
state->lastmark = -1;
state->lastindex = -1;
- state->repeats_array = PyMem_New(SRE_REPEAT, pattern->repeat_count);
- if (!state->repeats_array) {
- PyErr_NoMemory();
- goto err;
- }
-
state->buffer.buf = NULL;
ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
if (!ptr)
@@ -482,9 +476,6 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
safely casted to `void*`, see bpo-39943 for details. */
PyMem_Free((void*) state->mark);
state->mark = NULL;
- PyMem_Free(state->repeats_array);
- state->repeats_array = NULL;
-
if (state->buffer.buf)
PyBuffer_Release(&state->buffer);
return NULL;
@@ -500,8 +491,6 @@ state_fini(SRE_STATE* state)
/* See above PyMem_Del for why we explicitly cast here. */
PyMem_Free((void*) state->mark);
state->mark = NULL;
- PyMem_Free(state->repeats_array);
- state->repeats_array = NULL;
}
/* calculate offset from start of string */
@@ -1418,15 +1407,14 @@ _sre.compile
groups: Py_ssize_t
groupindex: object(subclass_of='&PyDict_Type')
indexgroup: object(subclass_of='&PyTuple_Type')
- repeat_count: Py_ssize_t
[clinic start generated code]*/
static PyObject *
_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
PyObject *code, Py_ssize_t groups, PyObject *groupindex,
- PyObject *indexgroup, Py_ssize_t repeat_count)
-/*[clinic end generated code: output=922af562d51b1657 input=77e39c322501ec2a]*/
+ PyObject *indexgroup)
+/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
{
/* "compile" pattern descriptor to pattern object */
@@ -1484,8 +1472,8 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
self->pattern = pattern;
self->flags = flags;
+
self->groups = groups;
- self->repeat_count = repeat_count;
if (PyDict_GET_SIZE(groupindex) > 0) {
Py_INCREF(groupindex);
@@ -1657,7 +1645,7 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
}
static int
-_validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
+_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
{
/* Some variables are manipulated by the macros above */
SRE_CODE op;
@@ -1678,8 +1666,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
sre_match() code is robust even if they don't, and the worst
you can get is nonsensical match results. */
GET_ARG;
- if (arg > 2 * (size_t)self->groups + 1) {
- VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)self->groups));
+ if (arg > 2 * (size_t)groups + 1) {
+ VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
FAIL;
}
break;
@@ -1808,7 +1796,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
if (skip == 0)
break;
/* Stop 2 before the end; we check the JUMP below */
- if (!_validate_inner(code, code+skip-3, self))
+ if (!_validate_inner(code, code+skip-3, groups))
FAIL;
code += skip-3;
/* Check that it ends with a JUMP, and that each JUMP
@@ -1837,7 +1825,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
FAIL;
if (max > SRE_MAXREPEAT)
FAIL;
- if (!_validate_inner(code, code+skip-4, self))
+ if (!_validate_inner(code, code+skip-4, groups))
FAIL;
code += skip-4;
GET_OP;
@@ -1849,7 +1837,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
case SRE_OP_REPEAT:
case SRE_OP_POSSESSIVE_REPEAT:
{
- SRE_CODE op1 = op, min, max, repeat_index;
+ SRE_CODE op1 = op, min, max;
GET_SKIP;
GET_ARG; min = arg;
GET_ARG; max = arg;
@@ -1857,17 +1845,9 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
FAIL;
if (max > SRE_MAXREPEAT)
FAIL;
- if (op1 == SRE_OP_REPEAT) {
- GET_ARG; repeat_index = arg;
- if (repeat_index >= (size_t)self->repeat_count)
- FAIL;
- skip -= 4;
- } else {
- skip -= 3;
- }
- if (!_validate_inner(code, code+skip, self))
+ if (!_validate_inner(code, code+skip-3, groups))
FAIL;
- code += skip;
+ code += skip-3;
GET_OP;
if (op1 == SRE_OP_POSSESSIVE_REPEAT) {
if (op != SRE_OP_SUCCESS)
@@ -1883,7 +1863,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
case SRE_OP_ATOMIC_GROUP:
{
GET_SKIP;
- if (!_validate_inner(code, code+skip-2, self))
+ if (!_validate_inner(code, code+skip-2, groups))
FAIL;
code += skip-2;
GET_OP;
@@ -1897,7 +1877,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
case SRE_OP_GROUPREF_UNI_IGNORE:
case SRE_OP_GROUPREF_LOC_IGNORE:
GET_ARG;
- if (arg >= (size_t)self->groups)
+ if (arg >= (size_t)groups)
FAIL;
break;
@@ -1906,7 +1886,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
'group' is either an integer group number or a group name,
'then' and 'else' are sub-regexes, and 'else' is optional. */
GET_ARG;
- if (arg >= (size_t)self->groups)
+ if (arg >= (size_t)groups)
FAIL;
GET_SKIP_ADJ(1);
code--; /* The skip is relative to the first arg! */
@@ -1939,17 +1919,17 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
code[skip-3] == SRE_OP_JUMP)
{
VTRACE(("both then and else parts present\n"));
- if (!_validate_inner(code+1, code+skip-3, self))
+ if (!_validate_inner(code+1, code+skip-3, groups))
FAIL;
code += skip-2; /* Position after JUMP, at <skipno> */
GET_SKIP;
- if (!_validate_inner(code, code+skip-1, self))
+ if (!_validate_inner(code, code+skip-1, groups))
FAIL;
code += skip-1;
}
else {
VTRACE(("only a then part present\n"));
- if (!_validate_inner(code+1, code+skip-1, self))
+ if (!_validate_inner(code+1, code+skip-1, groups))
FAIL;
code += skip-1;
}
@@ -1963,7 +1943,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
if (arg & 0x80000000)
FAIL; /* Width too large */
/* Stop 1 before the end; we check the SUCCESS below */
- if (!_validate_inner(code+1, code+skip-2, self))
+ if (!_validate_inner(code+1, code+skip-2, groups))
FAIL;
code += skip-2;
GET_OP;
@@ -1982,19 +1962,18 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
}
static int
-_validate_outer(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
+_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
{
- if (self->groups < 0 || (size_t)self->groups > SRE_MAXGROUPS ||
- self->repeat_count < 0 ||
+ if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
code >= end || end[-1] != SRE_OP_SUCCESS)
FAIL;
- return _validate_inner(code, end-1, self);
+ return _validate_inner(code, end-1, groups);
}
static int
_validate(PatternObject *self)
{
- if (!_validate_outer(self->code, self->code+self->codesize, self))
+ if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
{
PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
return 0;
diff --git a/Modules/_sre/sre.h b/Modules/_sre/sre.h
index aff064d..52ae3e1 100644
--- a/Modules/_sre/sre.h
+++ b/Modules/_sre/sre.h
@@ -29,8 +29,6 @@ typedef struct {
Py_ssize_t groups; /* must be first! */
PyObject* groupindex; /* dict */
PyObject* indexgroup; /* tuple */
- /* the number of REPEATs */
- Py_ssize_t repeat_count;
/* compatibility */
PyObject* pattern; /* pattern source (or None) */
int flags; /* flags used when compiling pattern source */
@@ -85,8 +83,6 @@ typedef struct {
size_t data_stack_base;
/* current repeat context */
SRE_REPEAT *repeat;
- /* repeat contexts array */
- SRE_REPEAT *repeats_array;
} SRE_STATE;
typedef struct {
diff --git a/Modules/_sre/sre_constants.h b/Modules/_sre/sre_constants.h
index 590d5be..c633514 100644
--- a/Modules/_sre/sre_constants.h
+++ b/Modules/_sre/sre_constants.h
@@ -11,7 +11,7 @@
* See the sre.c file for information on usage and redistribution.
*/
-#define SRE_MAGIC 20220423
+#define SRE_MAGIC 20220615
#define SRE_OP_FAILURE 0
#define SRE_OP_SUCCESS 1
#define SRE_OP_ANY 2
diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h
index 1e5b501..fb4c18b 100644
--- a/Modules/_sre/sre_lib.h
+++ b/Modules/_sre/sre_lib.h
@@ -1079,12 +1079,17 @@ dispatch:
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
/* <REPEAT> <skip> <1=min> <2=max>
<3=repeat_index> item <UNTIL> tail */
- TRACE(("|%p|%p|REPEAT %d %d %d\n", pattern, ptr,
- pattern[1], pattern[2], pattern[3]));
-
- /* install repeat context */
- ctx->u.rep = &state->repeats_array[pattern[3]];
+ TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
+ pattern[1], pattern[2]));
+ /* install new repeat context */
+ /* TODO(https://github.com/python/cpython/issues/67877): Fix this
+ * potential memory leak. */
+ ctx->u.rep = (SRE_REPEAT*) PyObject_Malloc(sizeof(*ctx->u.rep));
+ if (!ctx->u.rep) {
+ PyErr_NoMemory();
+ RETURN_FAILURE;
+ }
ctx->u.rep->count = -1;
ctx->u.rep->pattern = pattern;
ctx->u.rep->prev = state->repeat;
@@ -1094,6 +1099,7 @@ dispatch:
state->ptr = ptr;
DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
state->repeat = ctx->u.rep->prev;
+ PyObject_Free(ctx->u.rep);
if (ret) {
RETURN_ON_ERROR(ret);
@@ -1103,8 +1109,7 @@ dispatch:
TARGET(SRE_OP_MAX_UNTIL):
/* maximizing repeat */
- /* <REPEAT> <skip> <1=min> <2=max>
- <3=repeat_index> item <MAX_UNTIL> tail */
+ /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
/* FIXME: we probably need to deal with zero-width
matches in here... */
@@ -1124,7 +1129,7 @@ dispatch:
/* not enough matches */
ctx->u.rep->count = ctx->count;
DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
- ctx->u.rep->pattern+4);
+ ctx->u.rep->pattern+3);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
@@ -1146,7 +1151,7 @@ dispatch:
DATA_PUSH(&ctx->u.rep->last_ptr);
ctx->u.rep->last_ptr = state->ptr;
DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
- ctx->u.rep->pattern+4);
+ ctx->u.rep->pattern+3);
DATA_POP(&ctx->u.rep->last_ptr);
if (ret) {
MARK_POP_DISCARD(ctx->lastmark);
@@ -1171,8 +1176,7 @@ dispatch:
TARGET(SRE_OP_MIN_UNTIL):
/* minimizing repeat */
- /* <REPEAT> <skip> <1=min> <2=max>
- <3=repeat_index> item <MIN_UNTIL> tail */
+ /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
ctx->u.rep = state->repeat;
if (!ctx->u.rep)
@@ -1189,7 +1193,7 @@ dispatch:
/* not enough matches */
ctx->u.rep->count = ctx->count;
DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
- ctx->u.rep->pattern+4);
+ ctx->u.rep->pattern+3);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
@@ -1232,7 +1236,7 @@ dispatch:
DATA_PUSH(&ctx->u.rep->last_ptr);
ctx->u.rep->last_ptr = state->ptr;
DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
- ctx->u.rep->pattern+4);
+ ctx->u.rep->pattern+3);
DATA_POP(&ctx->u.rep->last_ptr);
if (ret) {
RETURN_ON_ERROR(ret);