diff options
| author | Brett Simmers <swtaarrs@users.noreply.github.com> | 2024-02-20 14:57:48 (GMT) |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-02-20 14:57:48 (GMT) |
| commit | 0749244d13412d7cb5b53d834f586f2198f5b9a6 (patch) | |
| tree | 06387c5b41cd14cdf230f71eab6fd92873c3d5a9 /Python/ceval_gil.c | |
| parent | e71468ba4f5fb2da0cefe9e923b01811cb53fb5f (diff) | |
| download | cpython-0749244d13412d7cb5b53d834f586f2198f5b9a6.zip cpython-0749244d13412d7cb5b53d834f586f2198f5b9a6.tar.gz cpython-0749244d13412d7cb5b53d834f586f2198f5b9a6.tar.bz2 | |
gh-112175: Add `eval_breaker` to `PyThreadState` (#115194)
This change adds an `eval_breaker` field to `PyThreadState`. The primary
motivation is for performance in free-threaded builds: with thread-local eval
breakers, we can stop a specific thread (e.g., for an async exception) without
interrupting other threads.
The source of truth for the global instrumentation version is stored in the
`instrumentation_version` field in PyInterpreterState. Threads usually read the
version from their local `eval_breaker`, where it continues to be colocated
with the eval breaker bits.
Diffstat (limited to 'Python/ceval_gil.c')
| -rw-r--r-- | Python/ceval_gil.c | 243 |
1 files changed, 150 insertions, 93 deletions
diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index deb9741..f5c4430 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -56,60 +56,52 @@ #define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) _Py_atomic_load_relaxed(ATOMIC_VAL) #endif -/* bpo-40010: eval_breaker should be recomputed if there - is a pending signal: signal received by another thread which cannot - handle signals. - Similarly, we set CALLS_TO_DO and ASYNC_EXCEPTION to match the thread. -*/ +// Atomically copy the bits indicated by mask between two values. static inline void -update_eval_breaker_from_thread(PyInterpreterState *interp, PyThreadState *tstate) +copy_eval_breaker_bits(uintptr_t *from, uintptr_t *to, uintptr_t mask) { - if (tstate == NULL) { + uintptr_t from_bits = _Py_atomic_load_uintptr_relaxed(from) & mask; + uintptr_t old_value = _Py_atomic_load_uintptr_relaxed(to); + uintptr_t to_bits = old_value & mask; + if (from_bits == to_bits) { return; } - if (_Py_IsMainThread()) { - int32_t calls_to_do = _Py_atomic_load_int32_relaxed( - &_PyRuntime.ceval.pending_mainthread.calls_to_do); - if (calls_to_do) { - _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 1); - } - if (_Py_ThreadCanHandleSignals(interp)) { - if (_Py_atomic_load_int(&_PyRuntime.signals.is_tripped)) { - _Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 1); - } - } - } - if (tstate->async_exc != NULL) { - _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 1); - } + uintptr_t new_value; + do { + new_value = (old_value & ~mask) | from_bits; + } while (!_Py_atomic_compare_exchange_uintptr(to, &old_value, new_value)); } +// When attaching a thread, set the global instrumentation version and +// _PY_CALLS_TO_DO_BIT from the current state of the interpreter. static inline void -SET_GIL_DROP_REQUEST(PyInterpreterState *interp) +update_eval_breaker_for_thread(PyInterpreterState *interp, PyThreadState *tstate) { - _Py_set_eval_breaker_bit(interp, _PY_GIL_DROP_REQUEST_BIT, 1); -} - - -static inline void -RESET_GIL_DROP_REQUEST(PyInterpreterState *interp) -{ - _Py_set_eval_breaker_bit(interp, _PY_GIL_DROP_REQUEST_BIT, 0); -} - - -static inline void -SIGNAL_PENDING_CALLS(PyInterpreterState *interp) -{ - _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 1); -} +#ifdef Py_GIL_DISABLED + // Free-threaded builds eagerly update the eval_breaker on *all* threads as + // needed, so this function doesn't apply. + return; +#endif + int32_t calls_to_do = _Py_atomic_load_int32_relaxed( + &interp->ceval.pending.calls_to_do); + if (calls_to_do) { + _Py_set_eval_breaker_bit(tstate, _PY_CALLS_TO_DO_BIT); + } + else if (_Py_IsMainThread()) { + calls_to_do = _Py_atomic_load_int32_relaxed( + &_PyRuntime.ceval.pending_mainthread.calls_to_do); + if (calls_to_do) { + _Py_set_eval_breaker_bit(tstate, _PY_CALLS_TO_DO_BIT); + } + } -static inline void -UNSIGNAL_PENDING_CALLS(PyInterpreterState *interp) -{ - _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 0); + // _PY_CALLS_TO_DO_BIT was derived from other state above, so the only bits + // we copy from our interpreter's state are the instrumentation version. + copy_eval_breaker_bits(&interp->ceval.instrumentation_version, + &tstate->eval_breaker, + ~_PY_EVAL_EVENTS_MASK); } /* @@ -254,13 +246,14 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate) the GIL, and that's the only time we might delete the interpreter, so checking tstate first prevents the crash. See https://github.com/python/cpython/issues/104341. */ - if (tstate != NULL && _Py_eval_breaker_bit_is_set(interp, _PY_GIL_DROP_REQUEST_BIT)) { + if (tstate != NULL && + _Py_eval_breaker_bit_is_set(tstate, _PY_GIL_DROP_REQUEST_BIT)) { MUTEX_LOCK(gil->switch_mutex); /* Not switched yet => wait */ if (((PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder)) == tstate) { assert(_PyThreadState_CheckConsistency(tstate)); - RESET_GIL_DROP_REQUEST(tstate->interp); + _Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT); /* NOTE: if COND_WAIT does not atomically start waiting when releasing the mutex, another thread can run through, take the GIL and drop it again, and reset the condition @@ -321,6 +314,8 @@ take_gil(PyThreadState *tstate) _Py_atomic_load_int_relaxed(&gil->locked) && gil->switch_number == saved_switchnum) { + PyThreadState *holder_tstate = + (PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder); if (_PyThreadState_MustExit(tstate)) { MUTEX_UNLOCK(gil->mutex); // gh-96387: If the loop requested a drop request in a previous @@ -330,13 +325,13 @@ take_gil(PyThreadState *tstate) // may have to request again a drop request (iterate one more // time). if (drop_requested) { - RESET_GIL_DROP_REQUEST(interp); + _Py_unset_eval_breaker_bit(holder_tstate, _PY_GIL_DROP_REQUEST_BIT); } PyThread_exit_thread(); } assert(_PyThreadState_CheckConsistency(tstate)); - SET_GIL_DROP_REQUEST(interp); + _Py_set_eval_breaker_bit(holder_tstate, _PY_GIL_DROP_REQUEST_BIT); drop_requested = 1; } } @@ -369,13 +364,15 @@ take_gil(PyThreadState *tstate) in take_gil() while the main thread called wait_for_thread_shutdown() from Py_Finalize(). */ MUTEX_UNLOCK(gil->mutex); - drop_gil(interp, tstate); + /* Passing NULL to drop_gil() indicates that this thread is about to + terminate and will never hold the GIL again. */ + drop_gil(interp, NULL); PyThread_exit_thread(); } assert(_PyThreadState_CheckConsistency(tstate)); - RESET_GIL_DROP_REQUEST(interp); - update_eval_breaker_from_thread(interp, tstate); + _Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT); + update_eval_breaker_for_thread(interp, tstate); MUTEX_UNLOCK(gil->mutex); @@ -590,15 +587,6 @@ _PyEval_ReInitThreads(PyThreadState *tstate) } #endif -/* This function is used to signal that async exceptions are waiting to be - raised. */ - -void -_PyEval_SignalAsyncExc(PyInterpreterState *interp) -{ - _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 1); -} - PyThreadState * PyEval_SaveThread(void) { @@ -646,11 +634,9 @@ PyEval_RestoreThread(PyThreadState *tstate) */ void -_PyEval_SignalReceived(PyInterpreterState *interp) +_PyEval_SignalReceived(void) { - if (_Py_ThreadCanHandleSignals(interp)) { - _Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 1); - } + _Py_set_eval_breaker_bit(_PyRuntime.main_tstate, _PY_SIGNALS_PENDING_BIT); } /* Push one item onto the queue while holding the lock. */ @@ -702,6 +688,26 @@ _pop_pending_call(struct _pending_calls *pending, } } +#ifndef Py_GIL_DISABLED +static void +signal_active_thread(PyInterpreterState *interp, uintptr_t bit) +{ + struct _gil_runtime_state *gil = interp->ceval.gil; + + // If a thread from the targeted interpreter is holding the GIL, signal + // that thread. Otherwise, the next thread to run from the targeted + // interpreter will have its bit set as part of taking the GIL. + MUTEX_LOCK(gil->mutex); + if (_Py_atomic_load_int_relaxed(&gil->locked)) { + PyThreadState *holder = (PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder); + if (holder->interp == interp) { + _Py_set_eval_breaker_bit(holder, bit); + } + } + MUTEX_UNLOCK(gil->mutex); +} +#endif + /* This implementation is thread-safe. It allows scheduling to be made from any thread, and even from an executing callback. @@ -711,10 +717,9 @@ int _PyEval_AddPendingCall(PyInterpreterState *interp, _Py_pending_call_func func, void *arg, int flags) { - assert(!(flags & _Py_PENDING_MAINTHREADONLY) - || _Py_IsMainInterpreter(interp)); struct _pending_calls *pending = &interp->ceval.pending; - if (flags & _Py_PENDING_MAINTHREADONLY) { + int main_only = (flags & _Py_PENDING_MAINTHREADONLY) != 0; + if (main_only) { /* The main thread only exists in the main interpreter. */ assert(_Py_IsMainInterpreter(interp)); pending = &_PyRuntime.ceval.pending_mainthread; @@ -724,8 +729,17 @@ _PyEval_AddPendingCall(PyInterpreterState *interp, int result = _push_pending_call(pending, func, arg, flags); PyMutex_Unlock(&pending->mutex); - /* signal main loop */ - SIGNAL_PENDING_CALLS(interp); + if (main_only) { + _Py_set_eval_breaker_bit(_PyRuntime.main_tstate, _PY_CALLS_TO_DO_BIT); + } + else { +#ifdef Py_GIL_DISABLED + _Py_set_eval_breaker_bit_all(interp, _PY_CALLS_TO_DO_BIT); +#else + signal_active_thread(interp, _PY_CALLS_TO_DO_BIT); +#endif + } + return result; } @@ -742,13 +756,13 @@ static int handle_signals(PyThreadState *tstate) { assert(_PyThreadState_CheckConsistency(tstate)); - _Py_set_eval_breaker_bit(tstate->interp, _PY_SIGNALS_PENDING_BIT, 0); + _Py_unset_eval_breaker_bit(tstate, _PY_SIGNALS_PENDING_BIT); if (!_Py_ThreadCanHandleSignals(tstate->interp)) { return 0; } if (_PyErr_CheckSignalsTstate(tstate) < 0) { /* On failure, re-schedule a call to handle_signals(). */ - _Py_set_eval_breaker_bit(tstate->interp, _PY_SIGNALS_PENDING_BIT, 1); + _Py_set_eval_breaker_bit(tstate, _PY_SIGNALS_PENDING_BIT); return -1; } return 0; @@ -783,9 +797,30 @@ _make_pending_calls(struct _pending_calls *pending) return 0; } +static void +signal_pending_calls(PyThreadState *tstate, PyInterpreterState *interp) +{ +#ifdef Py_GIL_DISABLED + _Py_set_eval_breaker_bit_all(interp, _PY_CALLS_TO_DO_BIT); +#else + _Py_set_eval_breaker_bit(tstate, _PY_CALLS_TO_DO_BIT); +#endif +} + +static void +unsignal_pending_calls(PyThreadState *tstate, PyInterpreterState *interp) +{ +#ifdef Py_GIL_DISABLED + _Py_unset_eval_breaker_bit_all(interp, _PY_CALLS_TO_DO_BIT); +#else + _Py_unset_eval_breaker_bit(tstate, _PY_CALLS_TO_DO_BIT); +#endif +} + static int -make_pending_calls(PyInterpreterState *interp) +make_pending_calls(PyThreadState *tstate) { + PyInterpreterState *interp = tstate->interp; struct _pending_calls *pending = &interp->ceval.pending; struct _pending_calls *pending_main = &_PyRuntime.ceval.pending_mainthread; @@ -811,12 +846,12 @@ make_pending_calls(PyInterpreterState *interp) /* unsignal before starting to call callbacks, so that any callback added in-between re-signals */ - UNSIGNAL_PENDING_CALLS(interp); + unsignal_pending_calls(tstate, interp); if (_make_pending_calls(pending) != 0) { pending->busy = 0; /* There might not be more calls to make, but we play it safe. */ - SIGNAL_PENDING_CALLS(interp); + signal_pending_calls(tstate, interp); return -1; } @@ -824,7 +859,7 @@ make_pending_calls(PyInterpreterState *interp) if (_make_pending_calls(pending_main) != 0) { pending->busy = 0; /* There might not be more calls to make, but we play it safe. */ - SIGNAL_PENDING_CALLS(interp); + signal_pending_calls(tstate, interp); return -1; } } @@ -834,12 +869,36 @@ make_pending_calls(PyInterpreterState *interp) } void +_Py_set_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit) +{ + _PyRuntimeState *runtime = &_PyRuntime; + + HEAD_LOCK(runtime); + for (PyThreadState *tstate = interp->threads.head; tstate != NULL; tstate = tstate->next) { + _Py_set_eval_breaker_bit(tstate, bit); + } + HEAD_UNLOCK(runtime); +} + +void +_Py_unset_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit) +{ + _PyRuntimeState *runtime = &_PyRuntime; + + HEAD_LOCK(runtime); + for (PyThreadState *tstate = interp->threads.head; tstate != NULL; tstate = tstate->next) { + _Py_unset_eval_breaker_bit(tstate, bit); + } + HEAD_UNLOCK(runtime); +} + +void _Py_FinishPendingCalls(PyThreadState *tstate) { assert(PyGILState_Check()); assert(_PyThreadState_CheckConsistency(tstate)); - if (make_pending_calls(tstate->interp) < 0) { + if (make_pending_calls(tstate) < 0) { PyObject *exc = _PyErr_GetRaisedException(tstate); PyErr_BadInternalCall(); _PyErr_ChainExceptions1(exc); @@ -862,7 +921,7 @@ _PyEval_MakePendingCalls(PyThreadState *tstate) } } - res = make_pending_calls(tstate->interp); + res = make_pending_calls(tstate); if (res != 0) { return res; } @@ -955,11 +1014,11 @@ _PyEval_InitState(PyInterpreterState *interp) int _Py_HandlePending(PyThreadState *tstate) { - PyInterpreterState *interp = tstate->interp; + uintptr_t breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); /* Stop-the-world */ - if (_Py_eval_breaker_bit_is_set(interp, _PY_EVAL_PLEASE_STOP_BIT)) { - _Py_set_eval_breaker_bit(interp, _PY_EVAL_PLEASE_STOP_BIT, 0); + if ((breaker & _PY_EVAL_PLEASE_STOP_BIT) != 0) { + _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_PLEASE_STOP_BIT); _PyThreadState_Suspend(tstate); /* The attach blocks until the stop-the-world event is complete. */ @@ -967,35 +1026,35 @@ _Py_HandlePending(PyThreadState *tstate) } /* Pending signals */ - if (_Py_eval_breaker_bit_is_set(interp, _PY_SIGNALS_PENDING_BIT)) { + if ((breaker & _PY_SIGNALS_PENDING_BIT) != 0) { if (handle_signals(tstate) != 0) { return -1; } } /* Pending calls */ - if (_Py_eval_breaker_bit_is_set(interp, _PY_CALLS_TO_DO_BIT)) { - if (make_pending_calls(interp) != 0) { + if ((breaker & _PY_CALLS_TO_DO_BIT) != 0) { + if (make_pending_calls(tstate) != 0) { return -1; } } #ifdef Py_GIL_DISABLED /* Objects with refcounts to merge */ - if (_Py_eval_breaker_bit_is_set(interp, _PY_EVAL_EXPLICIT_MERGE_BIT)) { - _Py_set_eval_breaker_bit(interp, _PY_EVAL_EXPLICIT_MERGE_BIT, 0); + if ((breaker & _PY_EVAL_EXPLICIT_MERGE_BIT) != 0) { + _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_EXPLICIT_MERGE_BIT); _Py_brc_merge_refcounts(tstate); } #endif /* GC scheduled to run */ - if (_Py_eval_breaker_bit_is_set(interp, _PY_GC_SCHEDULED_BIT)) { - _Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 0); + if ((breaker & _PY_GC_SCHEDULED_BIT) != 0) { + _Py_unset_eval_breaker_bit(tstate, _PY_GC_SCHEDULED_BIT); _Py_RunGC(tstate); } /* GIL drop request */ - if (_Py_eval_breaker_bit_is_set(interp, _PY_GIL_DROP_REQUEST_BIT)) { + if ((breaker & _PY_GIL_DROP_REQUEST_BIT) != 0) { /* Give another thread a chance */ _PyThreadState_Detach(tstate); @@ -1005,11 +1064,10 @@ _Py_HandlePending(PyThreadState *tstate) } /* Check for asynchronous exception. */ - if (_Py_eval_breaker_bit_is_set(interp, _PY_ASYNC_EXCEPTION_BIT)) { - _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 0); - if (tstate->async_exc != NULL) { - PyObject *exc = tstate->async_exc; - tstate->async_exc = NULL; + if ((breaker & _PY_ASYNC_EXCEPTION_BIT) != 0) { + _Py_unset_eval_breaker_bit(tstate, _PY_ASYNC_EXCEPTION_BIT); + PyObject *exc = _Py_atomic_exchange_ptr(&tstate->async_exc, NULL); + if (exc != NULL) { _PyErr_SetNone(tstate, exc); Py_DECREF(exc); return -1; @@ -1017,4 +1075,3 @@ _Py_HandlePending(PyThreadState *tstate) } return 0; } - |
