summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2024-11-18 09:53:45 (GMT)
committerGitHub <noreply@github.com>2024-11-18 09:53:45 (GMT)
commit7538e7f5696408fa0aa02fce8a413a7dfac76a04 (patch)
tree80987d19e109f6f6f544972cfdfd415b3248372b /Modules
parent3938fd60c0c88891b213097380aeea91a45bcd77 (diff)
downloadcpython-7538e7f5696408fa0aa02fce8a413a7dfac76a04.zip
cpython-7538e7f5696408fa0aa02fce8a413a7dfac76a04.tar.gz
cpython-7538e7f5696408fa0aa02fce8a413a7dfac76a04.tar.bz2
gh-67877: Fix memory leaks in terminated RE matching (GH-126840)
If SRE(match) function terminates abruptly, either because of a signal or because memory allocation fails, allocated SRE_REPEAT blocks might be never released. Co-authored-by: <wjssz@users.noreply.github.com>
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_sre/clinic/sre.c.h44
-rw-r--r--Modules/_sre/sre.c132
-rw-r--r--Modules/_sre/sre.h17
-rw-r--r--Modules/_sre/sre_lib.h26
4 files changed, 205 insertions, 14 deletions
diff --git a/Modules/_sre/clinic/sre.c.h b/Modules/_sre/clinic/sre.c.h
index e287f3d..87e4785 100644
--- a/Modules/_sre/clinic/sre.c.h
+++ b/Modules/_sre/clinic/sre.c.h
@@ -985,6 +985,44 @@ PyDoc_STRVAR(_sre_SRE_Pattern___deepcopy____doc__,
#define _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF \
{"__deepcopy__", (PyCFunction)_sre_SRE_Pattern___deepcopy__, METH_O, _sre_SRE_Pattern___deepcopy____doc__},
+#if defined(Py_DEBUG)
+
+PyDoc_STRVAR(_sre_SRE_Pattern__fail_after__doc__,
+"_fail_after($self, count, exception, /)\n"
+"--\n"
+"\n"
+"For debugging.");
+
+#define _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF \
+ {"_fail_after", _PyCFunction_CAST(_sre_SRE_Pattern__fail_after), METH_FASTCALL, _sre_SRE_Pattern__fail_after__doc__},
+
+static PyObject *
+_sre_SRE_Pattern__fail_after_impl(PatternObject *self, int count,
+ PyObject *exception);
+
+static PyObject *
+_sre_SRE_Pattern__fail_after(PatternObject *self, PyObject *const *args, Py_ssize_t nargs)
+{
+ PyObject *return_value = NULL;
+ int count;
+ PyObject *exception;
+
+ if (!_PyArg_CheckPositional("_fail_after", nargs, 2, 2)) {
+ goto exit;
+ }
+ count = PyLong_AsInt(args[0]);
+ if (count == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+ exception = args[1];
+ return_value = _sre_SRE_Pattern__fail_after_impl(self, count, exception);
+
+exit:
+ return return_value;
+}
+
+#endif /* defined(Py_DEBUG) */
+
PyDoc_STRVAR(_sre_compile__doc__,
"compile($module, /, pattern, flags, code, groups, groupindex,\n"
" indexgroup)\n"
@@ -1474,4 +1512,8 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyTypeObject *cls, PyObject *const
}
return _sre_SRE_Scanner_search_impl(self, cls);
}
-/*[clinic end generated code: output=afaa301d55957cb0 input=a9049054013a1b77]*/
+
+#ifndef _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF
+ #define _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF
+#endif /* !defined(_SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF) */
+/*[clinic end generated code: output=f8cb77f2261f0b2e input=a9049054013a1b77]*/
diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c
index 2c86f88..36f542d 100644
--- a/Modules/_sre/sre.c
+++ b/Modules/_sre/sre.c
@@ -267,6 +267,85 @@ data_stack_grow(SRE_STATE* state, Py_ssize_t size)
return 0;
}
+/* memory pool functions for SRE_REPEAT, this can avoid memory
+ leak when SRE(match) function terminates abruptly.
+ state->repeat_pool_used is a doubly-linked list, so that we
+ can remove a SRE_REPEAT node from it.
+ state->repeat_pool_unused is a singly-linked list, we put/get
+ node at the head. */
+static SRE_REPEAT *
+repeat_pool_malloc(SRE_STATE *state)
+{
+ SRE_REPEAT *repeat;
+
+ if (state->repeat_pool_unused) {
+ /* remove from unused pool (singly-linked list) */
+ repeat = state->repeat_pool_unused;
+ state->repeat_pool_unused = repeat->pool_next;
+ }
+ else {
+ repeat = PyMem_Malloc(sizeof(SRE_REPEAT));
+ if (!repeat) {
+ return NULL;
+ }
+ }
+
+ /* add to used pool (doubly-linked list) */
+ SRE_REPEAT *temp = state->repeat_pool_used;
+ if (temp) {
+ temp->pool_prev = repeat;
+ }
+ repeat->pool_prev = NULL;
+ repeat->pool_next = temp;
+ state->repeat_pool_used = repeat;
+
+ return repeat;
+}
+
+static void
+repeat_pool_free(SRE_STATE *state, SRE_REPEAT *repeat)
+{
+ SRE_REPEAT *prev = repeat->pool_prev;
+ SRE_REPEAT *next = repeat->pool_next;
+
+ /* remove from used pool (doubly-linked list) */
+ if (prev) {
+ prev->pool_next = next;
+ }
+ else {
+ state->repeat_pool_used = next;
+ }
+ if (next) {
+ next->pool_prev = prev;
+ }
+
+ /* add to unused pool (singly-linked list) */
+ repeat->pool_next = state->repeat_pool_unused;
+ state->repeat_pool_unused = repeat;
+}
+
+static void
+repeat_pool_clear(SRE_STATE *state)
+{
+ /* clear used pool */
+ SRE_REPEAT *next = state->repeat_pool_used;
+ state->repeat_pool_used = NULL;
+ while (next) {
+ SRE_REPEAT *temp = next;
+ next = temp->pool_next;
+ PyMem_Free(temp);
+ }
+
+ /* clear unused pool */
+ next = state->repeat_pool_unused;
+ state->repeat_pool_unused = NULL;
+ while (next) {
+ SRE_REPEAT *temp = next;
+ next = temp->pool_next;
+ PyMem_Free(temp);
+ }
+}
+
/* generate 8-bit version */
#define SRE_CHAR Py_UCS1
@@ -511,6 +590,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
state->pos = start;
state->endpos = end;
+#ifdef Py_DEBUG
+ state->fail_after_count = pattern->fail_after_count;
+ state->fail_after_exc = pattern->fail_after_exc; // borrowed ref
+#endif
+
return string;
err:
/* We add an explicit cast here because MSVC has a bug when
@@ -533,6 +617,8 @@ state_fini(SRE_STATE* state)
/* See above PyMem_Free() for why we explicitly cast here. */
PyMem_Free((void*) state->mark);
state->mark = NULL;
+ /* SRE_REPEAT pool */
+ repeat_pool_clear(state);
}
/* calculate offset from start of string */
@@ -619,6 +705,9 @@ pattern_traverse(PatternObject *self, visitproc visit, void *arg)
Py_VISIT(self->groupindex);
Py_VISIT(self->indexgroup);
Py_VISIT(self->pattern);
+#ifdef Py_DEBUG
+ Py_VISIT(self->fail_after_exc);
+#endif
return 0;
}
@@ -628,6 +717,9 @@ pattern_clear(PatternObject *self)
Py_CLEAR(self->groupindex);
Py_CLEAR(self->indexgroup);
Py_CLEAR(self->pattern);
+#ifdef Py_DEBUG
+ Py_CLEAR(self->fail_after_exc);
+#endif
return 0;
}
@@ -690,7 +782,7 @@ _sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls,
Py_ssize_t status;
PyObject *match;
- if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
+ if (!state_init(&state, self, string, pos, endpos))
return NULL;
INIT_TRACE(&state);
@@ -1381,6 +1473,29 @@ _sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
return Py_NewRef(self);
}
+#ifdef Py_DEBUG
+/*[clinic input]
+_sre.SRE_Pattern._fail_after
+
+ count: int
+ exception: object
+ /
+
+For debugging.
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Pattern__fail_after_impl(PatternObject *self, int count,
+ PyObject *exception)
+/*[clinic end generated code: output=9a6bf12135ac50c2 input=ef80a45c66c5499d]*/
+{
+ self->fail_after_count = count;
+ Py_INCREF(exception);
+ Py_XSETREF(self->fail_after_exc, exception);
+ Py_RETURN_NONE;
+}
+#endif /* Py_DEBUG */
+
static PyObject *
pattern_repr(PatternObject *obj)
{
@@ -1506,6 +1621,10 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
self->pattern = NULL;
self->groupindex = NULL;
self->indexgroup = NULL;
+#ifdef Py_DEBUG
+ self->fail_after_count = -1;
+ self->fail_after_exc = NULL;
+#endif
self->codesize = n;
@@ -2604,7 +2723,8 @@ pattern_new_match(_sremodulestate* module_state,
if (!match)
return NULL;
- match->pattern = (PatternObject*)Py_NewRef(pattern);
+ Py_INCREF(pattern);
+ match->pattern = pattern;
match->string = Py_NewRef(state->string);
@@ -2740,7 +2860,7 @@ _sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls)
return NULL;
}
- match = pattern_new_match(module_state, (PatternObject*) self->pattern,
+ match = pattern_new_match(module_state, self->pattern,
state, status);
if (status == 0)
@@ -2790,7 +2910,7 @@ _sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls)
return NULL;
}
- match = pattern_new_match(module_state, (PatternObject*) self->pattern,
+ match = pattern_new_match(module_state, self->pattern,
state, status);
if (status == 0)
@@ -2826,7 +2946,8 @@ pattern_scanner(_sremodulestate *module_state,
return NULL;
}
- scanner->pattern = Py_NewRef(self);
+ Py_INCREF(self);
+ scanner->pattern = self;
PyObject_GC_Track(scanner);
return (PyObject*) scanner;
@@ -3020,6 +3141,7 @@ static PyMethodDef pattern_methods[] = {
_SRE_SRE_PATTERN_SCANNER_METHODDEF
_SRE_SRE_PATTERN___COPY___METHODDEF
_SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
+ _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF
{"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
PyDoc_STR("See PEP 585")},
{NULL, NULL}
diff --git a/Modules/_sre/sre.h b/Modules/_sre/sre.h
index 83d89d5..42681c2 100644
--- a/Modules/_sre/sre.h
+++ b/Modules/_sre/sre.h
@@ -34,6 +34,11 @@ typedef struct {
int flags; /* flags used when compiling pattern source */
PyObject *weakreflist; /* List of weak references */
int isbytes; /* pattern type (1 - bytes, 0 - string, -1 - None) */
+#ifdef Py_DEBUG
+ /* for simulation of user interruption */
+ int fail_after_count;
+ PyObject *fail_after_exc;
+#endif
/* pattern code */
Py_ssize_t codesize;
SRE_CODE code[1];
@@ -68,6 +73,9 @@ typedef struct SRE_REPEAT_T {
const SRE_CODE* pattern; /* points to REPEAT operator arguments */
const void* last_ptr; /* helper to check for infinite loops */
struct SRE_REPEAT_T *prev; /* points to previous repeat context */
+ /* for SRE_REPEAT pool */
+ struct SRE_REPEAT_T *pool_prev;
+ struct SRE_REPEAT_T *pool_next;
} SRE_REPEAT;
typedef struct {
@@ -95,12 +103,19 @@ typedef struct {
size_t data_stack_base;
/* current repeat context */
SRE_REPEAT *repeat;
+ /* SRE_REPEAT pool */
+ SRE_REPEAT *repeat_pool_used;
+ SRE_REPEAT *repeat_pool_unused;
unsigned int sigcount;
+#ifdef Py_DEBUG
+ int fail_after_count;
+ PyObject *fail_after_exc;
+#endif
} SRE_STATE;
typedef struct {
PyObject_HEAD
- PyObject* pattern;
+ PatternObject* pattern;
SRE_STATE state;
int executing;
} ScannerObject;
diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h
index 97fbb0a..0c93f51 100644
--- a/Modules/_sre/sre_lib.h
+++ b/Modules/_sre/sre_lib.h
@@ -560,13 +560,28 @@ typedef struct {
Py_ssize_t last_ctx_pos;
} SRE(match_context);
-#define MAYBE_CHECK_SIGNALS \
+#define _MAYBE_CHECK_SIGNALS \
do { \
if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
RETURN_ERROR(SRE_ERROR_INTERRUPTED); \
} \
} while (0)
+#ifdef Py_DEBUG
+# define MAYBE_CHECK_SIGNALS \
+ do { \
+ _MAYBE_CHECK_SIGNALS; \
+ if (state->fail_after_count >= 0) { \
+ if (state->fail_after_count-- == 0) { \
+ PyErr_SetNone(state->fail_after_exc); \
+ RETURN_ERROR(SRE_ERROR_INTERRUPTED); \
+ } \
+ } \
+ } while (0)
+#else
+# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
+#endif /* Py_DEBUG */
+
#ifdef HAVE_COMPUTED_GOTOS
#ifndef USE_COMPUTED_GOTOS
#define USE_COMPUTED_GOTOS 1
@@ -1120,12 +1135,9 @@ dispatch:
pattern[1], pattern[2]));
/* install new repeat context */
- /* TODO(https://github.com/python/cpython/issues/67877): Fix this
- * potential memory leak. */
- ctx->u.rep = (SRE_REPEAT*) PyMem_Malloc(sizeof(*ctx->u.rep));
+ ctx->u.rep = repeat_pool_malloc(state);
if (!ctx->u.rep) {
- PyErr_NoMemory();
- RETURN_FAILURE;
+ RETURN_ERROR(SRE_ERROR_MEMORY);
}
ctx->u.rep->count = -1;
ctx->u.rep->pattern = pattern;
@@ -1136,7 +1148,7 @@ dispatch:
state->ptr = ptr;
DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
state->repeat = ctx->u.rep->prev;
- PyMem_Free(ctx->u.rep);
+ repeat_pool_free(state, ctx->u.rep);
if (ret) {
RETURN_ON_ERROR(ret);