diff options
Diffstat (limited to 'Python')
-rw-r--r-- | Python/ceval_macros.h | 7 | ||||
-rw-r--r-- | Python/pystate.c | 30 | ||||
-rw-r--r-- | Python/qsbr.c | 286 |
3 files changed, 323 insertions, 0 deletions
diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index c2550f5..1043966 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -86,6 +86,12 @@ #define PRE_DISPATCH_GOTO() ((void)0) #endif +#ifdef Py_GIL_DISABLED +#define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr) +#else +#define QSBR_QUIESCENT_STATE(tstate) +#endif + /* Do interpreter dispatch accounting for tracing and instrumentation */ #define DISPATCH() \ @@ -117,6 +123,7 @@ #define CHECK_EVAL_BREAKER() \ _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \ + QSBR_QUIESCENT_STATE(tstate); \ if (_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & _PY_EVAL_EVENTS_MASK) { \ if (_Py_HandlePending(tstate) != 0) { \ GOTO_ERROR(error); \ diff --git a/Python/pystate.c b/Python/pystate.c index 24f9b77..c2ccc27 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -953,6 +953,8 @@ PyInterpreterState_Delete(PyInterpreterState *interp) PyThread_free_lock(interp->id_mutex); } + _Py_qsbr_fini(interp); + _PyObject_FiniState(interp); free_interpreter(interp); @@ -1386,6 +1388,14 @@ new_threadstate(PyInterpreterState *interp, int whence) if (new_tstate == NULL) { return NULL; } +#ifdef Py_GIL_DISABLED + Py_ssize_t qsbr_idx = _Py_qsbr_reserve(interp); + if (qsbr_idx < 0) { + PyMem_RawFree(new_tstate); + return NULL; + } +#endif + /* We serialize concurrent creation to protect global state. */ HEAD_LOCK(runtime); @@ -1420,6 +1430,12 @@ new_threadstate(PyInterpreterState *interp, int whence) // Must be called with lock unlocked to avoid re-entrancy deadlock. PyMem_RawFree(new_tstate); } + +#ifdef Py_GIL_DISABLED + // Must be called with lock unlocked to avoid lock ordering deadlocks. + _Py_qsbr_register(tstate, interp, qsbr_idx); +#endif + return (PyThreadState *)tstate; } @@ -1611,6 +1627,10 @@ tstate_delete_common(PyThreadState *tstate) } HEAD_UNLOCK(runtime); +#ifdef Py_GIL_DISABLED + _Py_qsbr_unregister((_PyThreadStateImpl *)tstate); +#endif + // XXX Unbind in PyThreadState_Clear(), or earlier // (and assert not-equal here)? if (tstate->_status.bound_gilstate) { @@ -1652,6 +1672,9 @@ void _PyThreadState_DeleteCurrent(PyThreadState *tstate) { _Py_EnsureTstateNotNULL(tstate); +#ifdef Py_GIL_DISABLED + _Py_qsbr_detach(((_PyThreadStateImpl *)tstate)->qsbr); +#endif tstate_set_detached(tstate); tstate_delete_common(tstate); current_fast_clear(tstate->interp->runtime); @@ -1873,6 +1896,10 @@ _PyThreadState_Attach(PyThreadState *tstate) tstate_wait_attach(tstate); } +#ifdef Py_GIL_DISABLED + _Py_qsbr_attach(((_PyThreadStateImpl *)tstate)->qsbr); +#endif + // Resume previous critical section. This acquires the lock(s) from the // top-most critical section. if (tstate->critical_section != 0) { @@ -1893,6 +1920,9 @@ detach_thread(PyThreadState *tstate, int detached_state) if (tstate->critical_section != 0) { _PyCriticalSection_SuspendAll(tstate); } +#ifdef Py_GIL_DISABLED + _Py_qsbr_detach(((_PyThreadStateImpl *)tstate)->qsbr); +#endif tstate_deactivate(tstate); tstate_set_detached(tstate); current_fast_clear(&_PyRuntime); diff --git a/Python/qsbr.c b/Python/qsbr.c new file mode 100644 index 0000000..7f7ae03 --- /dev/null +++ b/Python/qsbr.c @@ -0,0 +1,286 @@ +/* + * Implementation of safe memory reclamation scheme using + * quiescent states. + * + * This is dervied from the "GUS" safe memory reclamation technique + * in FreeBSD written by Jeffrey Roberson. It is heavily modified. Any bugs + * in this code are likely due to the modifications. + * + * The original copyright is preserved below. + * + * Copyright (c) 2019,2020 Jeffrey Roberson <jeff@FreeBSD.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "Python.h" +#include "pycore_initconfig.h" // _PyStatus_NO_MEMORY() +#include "pycore_lock.h" // PyMutex_Lock() +#include "pycore_qsbr.h" +#include "pycore_pystate.h" // _PyThreadState_GET() + + +// Wrap-around safe comparison. This is a holdover from the FreeBSD +// implementation, which uses 32-bit sequence numbers. We currently use 64-bit +// sequence numbers, so wrap-around is unlikely. +#define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0) +#define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0) + +// Starting size of the array of qsbr thread states +#define MIN_ARRAY_SIZE 8 + +// For _Py_qsbr_deferred_advance(): the number of deferrals before advancing +// the write sequence. +#define QSBR_DEFERRED_LIMIT 10 + +// Allocate a QSBR thread state from the freelist +static struct _qsbr_thread_state * +qsbr_allocate(struct _qsbr_shared *shared) +{ + struct _qsbr_thread_state *qsbr = shared->freelist; + if (qsbr == NULL) { + return NULL; + } + shared->freelist = qsbr->freelist_next; + qsbr->freelist_next = NULL; + qsbr->shared = shared; + qsbr->allocated = true; + return qsbr; +} + +// Initialize (or reintialize) the freelist of QSBR thread states +static void +initialize_new_array(struct _qsbr_shared *shared) +{ + for (Py_ssize_t i = 0; i != shared->size; i++) { + struct _qsbr_thread_state *qsbr = &shared->array[i].qsbr; + if (qsbr->tstate != NULL) { + // Update the thread state pointer to its QSBR state + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)qsbr->tstate; + tstate->qsbr = qsbr; + } + if (!qsbr->allocated) { + // Push to freelist + qsbr->freelist_next = shared->freelist; + shared->freelist = qsbr; + } + } +} + +// Grow the array of QSBR thread states. Returns 0 on success, -1 on failure. +static int +grow_thread_array(struct _qsbr_shared *shared) +{ + Py_ssize_t new_size = shared->size * 2; + if (new_size < MIN_ARRAY_SIZE) { + new_size = MIN_ARRAY_SIZE; + } + + struct _qsbr_pad *array = PyMem_RawCalloc(new_size, sizeof(*array)); + if (array == NULL) { + return -1; + } + + struct _qsbr_pad *old = shared->array; + if (old != NULL) { + memcpy(array, shared->array, shared->size * sizeof(*array)); + } + + shared->array = array; + shared->size = new_size; + shared->freelist = NULL; + initialize_new_array(shared); + + PyMem_RawFree(old); + return 0; +} + +uint64_t +_Py_qsbr_advance(struct _qsbr_shared *shared) +{ + // NOTE: with 64-bit sequence numbers, we don't have to worry too much + // about the wr_seq getting too far ahead of rd_seq, but if we ever use + // 32-bit sequence numbers, we'll need to be more careful. + return _Py_atomic_add_uint64(&shared->wr_seq, QSBR_INCR) + QSBR_INCR; +} + +uint64_t +_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr) +{ + if (++qsbr->deferrals < QSBR_DEFERRED_LIMIT) { + return _Py_qsbr_shared_current(qsbr->shared) + QSBR_INCR; + } + qsbr->deferrals = 0; + return _Py_qsbr_advance(qsbr->shared); +} + +static uint64_t +qsbr_poll_scan(struct _qsbr_shared *shared) +{ + // Synchronize with store in _Py_qsbr_attach(). We need to ensure that + // the reads from each thread's sequence number are not reordered to see + // earlier "offline" states. + _Py_atomic_fence_seq_cst(); + + // Compute the minimum sequence number of all attached threads + uint64_t min_seq = _Py_atomic_load_uint64(&shared->wr_seq); + struct _qsbr_pad *array = shared->array; + for (Py_ssize_t i = 0, size = shared->size; i != size; i++) { + struct _qsbr_thread_state *qsbr = &array[i].qsbr; + + uint64_t seq = _Py_atomic_load_uint64(&qsbr->seq); + if (seq != QSBR_OFFLINE && QSBR_LT(seq, min_seq)) { + min_seq = seq; + } + } + + // Update the shared read sequence + uint64_t rd_seq = _Py_atomic_load_uint64(&shared->rd_seq); + if (QSBR_LT(rd_seq, min_seq)) { + // It's okay if the compare-exchange failed: another thread updated it + (void)_Py_atomic_compare_exchange_uint64(&shared->rd_seq, &rd_seq, min_seq); + rd_seq = min_seq; + } + + return rd_seq; +} + +bool +_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal) +{ + assert(_PyThreadState_GET()->state == _Py_THREAD_ATTACHED); + + uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq); + if (QSBR_LEQ(goal, rd_seq)) { + return true; + } + + rd_seq = qsbr_poll_scan(qsbr->shared); + return QSBR_LEQ(goal, rd_seq); +} + +void +_Py_qsbr_attach(struct _qsbr_thread_state *qsbr) +{ + assert(qsbr->seq == 0 && "already attached"); + + uint64_t seq = _Py_qsbr_shared_current(qsbr->shared); + _Py_atomic_store_uint64(&qsbr->seq, seq); // needs seq_cst +} + +void +_Py_qsbr_detach(struct _qsbr_thread_state *qsbr) +{ + assert(qsbr->seq != 0 && "already detached"); + + _Py_atomic_store_uint64_release(&qsbr->seq, QSBR_OFFLINE); +} + +Py_ssize_t +_Py_qsbr_reserve(PyInterpreterState *interp) +{ + struct _qsbr_shared *shared = &interp->qsbr; + + PyMutex_Lock(&shared->mutex); + // Try allocating from our internal freelist + struct _qsbr_thread_state *qsbr = qsbr_allocate(shared); + + // If there are no free entries, we pause all threads, grow the array, + // and update the pointers in PyThreadState to entries in the new array. + if (qsbr == NULL) { + _PyEval_StopTheWorld(interp); + if (grow_thread_array(shared) == 0) { + qsbr = qsbr_allocate(shared); + } + _PyEval_StartTheWorld(interp); + } + PyMutex_Unlock(&shared->mutex); + + if (qsbr == NULL) { + return -1; + } + + // Return an index rather than the pointer because the array may be + // resized and the pointer invalidated. + return (struct _qsbr_pad *)qsbr - shared->array; +} + +void +_Py_qsbr_register(_PyThreadStateImpl *tstate, PyInterpreterState *interp, + Py_ssize_t index) +{ + // Associate the QSBR state with the thread state + struct _qsbr_shared *shared = &interp->qsbr; + + PyMutex_Lock(&shared->mutex); + struct _qsbr_thread_state *qsbr = &interp->qsbr.array[index].qsbr; + assert(qsbr->allocated && qsbr->tstate == NULL); + qsbr->tstate = (PyThreadState *)tstate; + tstate->qsbr = qsbr; + PyMutex_Unlock(&shared->mutex); +} + +void +_Py_qsbr_unregister(_PyThreadStateImpl *tstate) +{ + struct _qsbr_thread_state *qsbr = tstate->qsbr; + struct _qsbr_shared *shared = qsbr->shared; + + assert(qsbr->seq == 0 && "thread state must be detached"); + + PyMutex_Lock(&shared->mutex); + assert(qsbr->allocated && qsbr->tstate == (PyThreadState *)tstate); + tstate->qsbr = NULL; + qsbr->tstate = NULL; + qsbr->allocated = false; + qsbr->freelist_next = shared->freelist; + shared->freelist = qsbr; + PyMutex_Unlock(&shared->mutex); +} + +void +_Py_qsbr_fini(PyInterpreterState *interp) +{ + struct _qsbr_shared *shared = &interp->qsbr; + PyMem_RawFree(shared->array); + shared->array = NULL; + shared->size = 0; + shared->freelist = NULL; +} + +void +_Py_qsbr_after_fork(_PyThreadStateImpl *tstate) +{ + struct _qsbr_thread_state *this_qsbr = tstate->qsbr; + struct _qsbr_shared *shared = this_qsbr->shared; + + _PyMutex_at_fork_reinit(&shared->mutex); + + for (Py_ssize_t i = 0; i != shared->size; i++) { + struct _qsbr_thread_state *qsbr = &shared->array[i].qsbr; + if (qsbr != this_qsbr && qsbr->allocated) { + qsbr->tstate = NULL; + qsbr->allocated = false; + qsbr->freelist_next = shared->freelist; + shared->freelist = qsbr; + } + } +} |