From 87e7cb09e4258c4deb01a07dc52c1021907195d7 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 21 Jul 2023 08:32:42 -0600 Subject: gh-105699: Fix an Interned Strings Crasher (gh-106930) A static (process-global) str object must only have its "interned" state cleared when no longer interned in any interpreters. They are the only ones that can be shared by interpreters so we don't have to worry about any other str objects. We trigger clearing the state with the main interpreter, since no other interpreters may exist at that point and _PyUnicode_ClearInterned() is only called during interpreter finalization. We do not address here the fact that a string will only be interned in the first interpreter that interns it. In any subsequent interpreters str.state.interned is already set so _PyUnicode_InternInPlace() will skip it. That needs to be addressed separately from fixing the crasher. --- .../2023-07-20-15-15-57.gh-issue-105699.DdqHFg.rst | 3 +++ Objects/unicodeobject.c | 13 ++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-07-20-15-15-57.gh-issue-105699.DdqHFg.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-07-20-15-15-57.gh-issue-105699.DdqHFg.rst b/Misc/NEWS.d/next/Core and Builtins/2023-07-20-15-15-57.gh-issue-105699.DdqHFg.rst new file mode 100644 index 0000000..4a257c6 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-07-20-15-15-57.gh-issue-105699.DdqHFg.rst @@ -0,0 +1,3 @@ +Python no longer crashes due an infrequent race when initialzing +per-interpreter interned strings. The crash would manifest when the +interpreter was finalized. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f543c0a..e8e8cab 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14818,6 +14818,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) PyObject *s, *ignored_value; while (PyDict_Next(interned, &pos, &s, &ignored_value)) { assert(PyUnicode_IS_READY(s)); + int shared = 0; switch (PyUnicode_CHECK_INTERNED(s)) { case SSTATE_INTERNED_IMMORTAL: // Skip the Immortal Instance check and restore @@ -14829,6 +14830,14 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) #endif break; case SSTATE_INTERNED_IMMORTAL_STATIC: + /* It is shared between interpreters, so we should unmark it + only when this is the last interpreter in which it's + interned. We immortalize all the statically initialized + strings during startup, so we can rely on the + main interpreter to be the last one. */ + if (!_Py_IsMainInterpreter(interp)) { + shared = 1; + } break; case SSTATE_INTERNED_MORTAL: /* fall through */ @@ -14837,7 +14846,9 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) default: Py_UNREACHABLE(); } - _PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED; + if (!shared) { + _PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED; + } } #ifdef INTERNED_STATS fprintf(stderr, -- cgit v0.12