summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2024-12-03 17:26:25 (GMT)
committerGitHub <noreply@github.com>2024-12-03 17:26:25 (GMT)
commit49da170709dd47c49bfc1e5d4b5d46122a049a3b (patch)
treeffe9d0289f8a1098ec779d9c97371ab0e38771bd /Objects
parentb49e902b81a0ead84e2002f94a2a2b2ae9b09ada (diff)
downloadcpython-49da170709dd47c49bfc1e5d4b5d46122a049a3b.zip
cpython-49da170709dd47c49bfc1e5d4b5d46122a049a3b.tar.gz
cpython-49da170709dd47c49bfc1e5d4b5d46122a049a3b.tar.bz2
[3.12] gh-116510: Fix a Crash Due to Shared Immortal Interned Strings (gh-125205)
Fix a crash caused by immortal interned strings being shared between sub-interpreters that use basic single-phase init. In that case, the string can be used by an interpreter that outlives the interpreter that created and interned it. For interpreters that share obmalloc state, also share the interned dict with the main interpreter. This is an un-revert of gh-124646 that then addresses the Py_TRACE_REFS failures identified by gh-124785 (i.e. backporting gh-125709 too). (cherry picked from commit f2cb39947093feda3ff85b8dc820922cc5e5f954, AKA gh-124865) Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
Diffstat (limited to 'Objects')
-rw-r--r--Objects/object.c22
-rw-r--r--Objects/unicodeobject.c48
2 files changed, 60 insertions, 10 deletions
diff --git a/Objects/object.c b/Objects/object.c
index 6b2e0ae..7067191 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -159,11 +159,27 @@ _PyDebug_PrintTotalRefs(void) {
#ifdef Py_TRACE_REFS
-#define REFCHAIN(interp) &interp->object_state.refchain
+#define REFCHAIN(interp) interp->object_state.refchain
+
+static inline int
+has_own_refchain(PyInterpreterState *interp)
+{
+ if (interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) {
+ return (_Py_IsMainInterpreter(interp)
+ || _PyInterpreterState_Main() == NULL);
+ }
+ return 1;
+}
static inline void
init_refchain(PyInterpreterState *interp)
{
+ if (!has_own_refchain(interp)) {
+ // Legacy subinterpreters share a refchain with the main interpreter.
+ REFCHAIN(interp) = REFCHAIN(_PyInterpreterState_Main());
+ return;
+ }
+ REFCHAIN(interp) = &interp->object_state._refchain_obj;
PyObject *refchain = REFCHAIN(interp);
refchain->_ob_prev = refchain;
refchain->_ob_next = refchain;
@@ -2010,9 +2026,7 @@ void
_PyObject_InitState(PyInterpreterState *interp)
{
#ifdef Py_TRACE_REFS
- if (!_Py_IsMainInterpreter(interp)) {
- init_refchain(interp);
- }
+ init_refchain(interp);
#endif
}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ba11351..c885be5 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -287,13 +287,37 @@ hashtable_unicode_compare(const void *key1, const void *key2)
}
}
+/* Return true if this interpreter should share the main interpreter's
+ intern_dict. That's important for interpreters which load basic
+ single-phase init extension modules (m_size == -1). There could be interned
+ immortal strings that are shared between interpreters, due to the
+ PyDict_Update(mdict, m_copy) call in import_find_extension().
+
+ It's not safe to deallocate those strings until all interpreters that
+ potentially use them are freed. By storing them in the main interpreter, we
+ ensure they get freed after all other interpreters are freed.
+*/
+static bool
+has_shared_intern_dict(PyInterpreterState *interp)
+{
+ PyInterpreterState *main_interp = _PyInterpreterState_Main();
+ return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC;
+}
+
static int
init_interned_dict(PyInterpreterState *interp)
{
assert(get_interned_dict(interp) == NULL);
- PyObject *interned = interned = PyDict_New();
- if (interned == NULL) {
- return -1;
+ PyObject *interned;
+ if (has_shared_intern_dict(interp)) {
+ interned = get_interned_dict(_PyInterpreterState_Main());
+ Py_INCREF(interned);
+ }
+ else {
+ interned = PyDict_New();
+ if (interned == NULL) {
+ return -1;
+ }
}
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned;
return 0;
@@ -304,7 +328,10 @@ clear_interned_dict(PyInterpreterState *interp)
{
PyObject *interned = get_interned_dict(interp);
if (interned != NULL) {
- PyDict_Clear(interned);
+ if (!has_shared_intern_dict(interp)) {
+ // only clear if the dict belongs to this interpreter
+ PyDict_Clear(interned);
+ }
Py_DECREF(interned);
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL;
}
@@ -15152,6 +15179,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
}
assert(PyDict_CheckExact(interned));
+ if (has_shared_intern_dict(interp)) {
+ // the dict doesn't belong to this interpreter, skip the debug
+ // checks on it and just clear the pointer to it
+ clear_interned_dict(interp);
+ return;
+ }
+
#ifdef INTERNED_STATS
fprintf(stderr, "releasing %zd interned strings\n",
PyDict_GET_SIZE(interned));
@@ -15670,8 +15704,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
{
struct _Py_unicode_state *state = &interp->unicode;
- // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
- assert(get_interned_dict(interp) == NULL);
+ if (!has_shared_intern_dict(interp)) {
+ // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
+ assert(get_interned_dict(interp) == NULL);
+ }
_PyUnicode_FiniEncodings(&state->fs_codec);