From e728303532168efab7694c55c82ea19b18bf8385 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 21 Mar 2024 10:01:16 -0400 Subject: gh-116522: Stop the world before fork() and during shutdown (#116607) This changes the free-threaded build to perform a stop-the-world pause before deleting other thread states when forking and during shutdown. This fixes some crashes when using multiprocessing and during shutdown when running with `PYTHON_GIL=0`. This also changes `PyOS_BeforeFork` to acquire the runtime lock (i.e., `HEAD_LOCK(&_PyRuntime)`) before forking to ensure that data protected by the runtime lock (and not just the GIL or stop-the-world) is in a consistent state before forking. --- Modules/posixmodule.c | 28 +++++++++++++++++++++------- Python/pylifecycle.c | 3 +++ Python/pystate.c | 6 ++++++ 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 2498b61..644d4ba 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -613,11 +613,16 @@ PyOS_BeforeFork(void) run_at_forkers(interp->before_forkers, 1); _PyImport_AcquireLock(interp); + _PyEval_StopTheWorldAll(&_PyRuntime); + HEAD_LOCK(&_PyRuntime); } void PyOS_AfterFork_Parent(void) { + HEAD_UNLOCK(&_PyRuntime); + _PyEval_StartTheWorldAll(&_PyRuntime); + PyInterpreterState *interp = _PyInterpreterState_GET(); if (_PyImport_ReleaseLock(interp) <= 0) { Py_FatalError("failed releasing import lock after fork"); @@ -632,6 +637,7 @@ PyOS_AfterFork_Child(void) PyStatus status; _PyRuntimeState *runtime = &_PyRuntime; + // re-creates runtime->interpreters.mutex (HEAD_UNLOCK) status = _PyRuntimeState_ReInitThreads(runtime); if (_PyStatus_EXCEPTION(status)) { goto fatal_error; @@ -7731,10 +7737,15 @@ os_register_at_fork_impl(PyObject *module, PyObject *before, // running in the process. Best effort, silent if unable to count threads. // Constraint: Quick. Never overcounts. Never leaves an error set. // -// This code might do an import, thus acquiring the import lock, which -// PyOS_BeforeFork() also does. As this should only be called from -// the parent process, it is in the same thread so that works. -static void warn_about_fork_with_threads(const char* name) { +// This should only be called from the parent process after +// PyOS_AfterFork_Parent(). +static void +warn_about_fork_with_threads(const char* name) +{ + // It's not safe to issue the warning while the world is stopped, because + // other threads might be holding locks that we need, which would deadlock. + assert(!_PyRuntime.stoptheworld.world_stopped); + // TODO: Consider making an `os` module API to return the current number // of threads in the process. That'd presumably use this platform code but // raise an error rather than using the inaccurate fallback. @@ -7858,9 +7869,10 @@ os_fork1_impl(PyObject *module) /* child: this clobbers and resets the import lock. */ PyOS_AfterFork_Child(); } else { - warn_about_fork_with_threads("fork1"); /* parent: release the import lock. */ PyOS_AfterFork_Parent(); + // After PyOS_AfterFork_Parent() starts the world to avoid deadlock. + warn_about_fork_with_threads("fork1"); } if (pid == -1) { errno = saved_errno; @@ -7906,9 +7918,10 @@ os_fork_impl(PyObject *module) /* child: this clobbers and resets the import lock. */ PyOS_AfterFork_Child(); } else { - warn_about_fork_with_threads("fork"); /* parent: release the import lock. */ PyOS_AfterFork_Parent(); + // After PyOS_AfterFork_Parent() starts the world to avoid deadlock. + warn_about_fork_with_threads("fork"); } if (pid == -1) { errno = saved_errno; @@ -8737,9 +8750,10 @@ os_forkpty_impl(PyObject *module) /* child: this clobbers and resets the import lock. */ PyOS_AfterFork_Child(); } else { - warn_about_fork_with_threads("forkpty"); /* parent: release the import lock. */ PyOS_AfterFork_Parent(); + // After PyOS_AfterFork_Parent() starts the world to avoid deadlock. + warn_about_fork_with_threads("forkpty"); } if (pid == -1) { return posix_error(); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 3a2c0a4..bc76822 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1911,6 +1911,9 @@ Py_FinalizeEx(void) int malloc_stats = tstate->interp->config.malloc_stats; #endif + /* Ensure that remaining threads are detached */ + _PyEval_StopTheWorldAll(runtime); + /* Remaining daemon threads will automatically exit when they attempt to take the GIL (ex: PyEval_RestoreThread()). */ _PyInterpreterState_SetFinalizing(tstate->interp, tstate); diff --git a/Python/pystate.c b/Python/pystate.c index eedcb92..5a334e8 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1692,6 +1692,10 @@ _PyThreadState_DeleteExcept(PyThreadState *tstate) PyInterpreterState *interp = tstate->interp; _PyRuntimeState *runtime = interp->runtime; +#ifdef Py_GIL_DISABLED + assert(runtime->stoptheworld.world_stopped); +#endif + HEAD_LOCK(runtime); /* Remove all thread states, except tstate, from the linked list of thread states. This will allow calling PyThreadState_Clear() @@ -1710,6 +1714,8 @@ _PyThreadState_DeleteExcept(PyThreadState *tstate) interp->threads.head = tstate; HEAD_UNLOCK(runtime); + _PyEval_StartTheWorldAll(runtime); + /* Clear and deallocate all stale thread states. Even if this executes Python code, we should be safe since it executes in the current thread, not one of the stale threads. */ -- cgit v0.12