3 files changed, 323 insertions, 0 deletions
diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h
index c2550f5..1043966 100644
--- a/Python/ceval_macros.h
+++ b/Python/ceval_macros.h
@@ -86,6 +86,12 @@
 #define PRE_DISPATCH_GOTO() ((void)0)
 #endif
 
+#ifdef Py_GIL_DISABLED
+#define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr)
+#else
+#define QSBR_QUIESCENT_STATE(tstate)
+#endif
+
 
 /* Do interpreter dispatch accounting for tracing and instrumentation */
 #define DISPATCH() \
@@ -117,6 +123,7 @@
 
 #define CHECK_EVAL_BREAKER() \
     _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \
+    QSBR_QUIESCENT_STATE(tstate); \
     if (_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & _PY_EVAL_EVENTS_MASK) { \
         if (_Py_HandlePending(tstate) != 0) { \
             GOTO_ERROR(error); \
diff --git a/Python/pystate.c b/Python/pystate.c
index 24f9b77..c2ccc27 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -953,6 +953,8 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
         PyThread_free_lock(interp->id_mutex);
     }
 
+    _Py_qsbr_fini(interp);
+
     _PyObject_FiniState(interp);
 
     free_interpreter(interp);
@@ -1386,6 +1388,14 @@ new_threadstate(PyInterpreterState *interp, int whence)
     if (new_tstate == NULL) {
         return NULL;
     }
+#ifdef Py_GIL_DISABLED
+    Py_ssize_t qsbr_idx = _Py_qsbr_reserve(interp);
+    if (qsbr_idx < 0) {
+        PyMem_RawFree(new_tstate);
+        return NULL;
+    }
+#endif
+
     /* We serialize concurrent creation to protect global state. */
     HEAD_LOCK(runtime);
 
@@ -1420,6 +1430,12 @@ new_threadstate(PyInterpreterState *interp, int whence)
         // Must be called with lock unlocked to avoid re-entrancy deadlock.
         PyMem_RawFree(new_tstate);
     }
+
+#ifdef Py_GIL_DISABLED
+    // Must be called with lock unlocked to avoid lock ordering deadlocks.
+    _Py_qsbr_register(tstate, interp, qsbr_idx);
+#endif
+
     return (PyThreadState *)tstate;
 }
 
@@ -1611,6 +1627,10 @@ tstate_delete_common(PyThreadState *tstate)
     }
     HEAD_UNLOCK(runtime);
 
+#ifdef Py_GIL_DISABLED
+    _Py_qsbr_unregister((_PyThreadStateImpl *)tstate);
+#endif
+
     // XXX Unbind in PyThreadState_Clear(), or earlier
     // (and assert not-equal here)?
     if (tstate->_status.bound_gilstate) {
@@ -1652,6 +1672,9 @@ void
 _PyThreadState_DeleteCurrent(PyThreadState *tstate)
 {
     _Py_EnsureTstateNotNULL(tstate);
+#ifdef Py_GIL_DISABLED
+    _Py_qsbr_detach(((_PyThreadStateImpl *)tstate)->qsbr);
+#endif
     tstate_set_detached(tstate);
     tstate_delete_common(tstate);
     current_fast_clear(tstate->interp->runtime);
@@ -1873,6 +1896,10 @@ _PyThreadState_Attach(PyThreadState *tstate)
         tstate_wait_attach(tstate);
     }
 
+#ifdef Py_GIL_DISABLED
+    _Py_qsbr_attach(((_PyThreadStateImpl *)tstate)->qsbr);
+#endif
+
     // Resume previous critical section. This acquires the lock(s) from the
     // top-most critical section.
     if (tstate->critical_section != 0) {
@@ -1893,6 +1920,9 @@ detach_thread(PyThreadState *tstate, int detached_state)
     if (tstate->critical_section != 0) {
         _PyCriticalSection_SuspendAll(tstate);
     }
+#ifdef Py_GIL_DISABLED
+    _Py_qsbr_detach(((_PyThreadStateImpl *)tstate)->qsbr);
+#endif
     tstate_deactivate(tstate);
     tstate_set_detached(tstate);
     current_fast_clear(&_PyRuntime);
diff --git a/Python/qsbr.c b/Python/qsbr.c
new file mode 100644
index 0000000..7f7ae03
--- /dev/null
+++ b/Python/qsbr.c
@@ -0,0 +1,286 @@
+/*
+ * Implementation of safe memory reclamation scheme using
+ * quiescent states.
+ *
+ * This is dervied from the "GUS" safe memory reclamation technique
+ * in FreeBSD written by Jeffrey Roberson. It is heavily modified. Any bugs
+ * in this code are likely due to the modifications.
+ *
+ * The original copyright is preserved below.
+ *
+ * Copyright (c) 2019,2020 Jeffrey Roberson <jeff@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "Python.h"
+#include "pycore_initconfig.h"      // _PyStatus_NO_MEMORY()
+#include "pycore_lock.h"            // PyMutex_Lock()
+#include "pycore_qsbr.h"
+#include "pycore_pystate.h"         // _PyThreadState_GET()
+
+
+// Wrap-around safe comparison. This is a holdover from the FreeBSD
+// implementation, which uses 32-bit sequence numbers. We currently use 64-bit
+// sequence numbers, so wrap-around is unlikely.
+#define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0)
+#define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0)
+
+// Starting size of the array of qsbr thread states
+#define MIN_ARRAY_SIZE 8
+
+// For _Py_qsbr_deferred_advance(): the number of deferrals before advancing
+// the write sequence.
+#define QSBR_DEFERRED_LIMIT 10
+
+// Allocate a QSBR thread state from the freelist
+static struct _qsbr_thread_state *
+qsbr_allocate(struct _qsbr_shared *shared)
+{
+    struct _qsbr_thread_state *qsbr = shared->freelist;
+    if (qsbr == NULL) {
+        return NULL;
+    }
+    shared->freelist = qsbr->freelist_next;
+    qsbr->freelist_next = NULL;
+    qsbr->shared = shared;
+    qsbr->allocated = true;
+    return qsbr;
+}
+
+// Initialize (or reintialize) the freelist of QSBR thread states
+static void
+initialize_new_array(struct _qsbr_shared *shared)
+{
+    for (Py_ssize_t i = 0; i != shared->size; i++) {
+        struct _qsbr_thread_state *qsbr = &shared->array[i].qsbr;
+        if (qsbr->tstate != NULL) {
+            // Update the thread state pointer to its QSBR state
+            _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)qsbr->tstate;
+            tstate->qsbr = qsbr;
+        }
+        if (!qsbr->allocated) {
+            // Push to freelist
+            qsbr->freelist_next = shared->freelist;
+            shared->freelist = qsbr;
+        }
+    }
+}
+
+// Grow the array of QSBR thread states. Returns 0 on success, -1 on failure.
+static int
+grow_thread_array(struct _qsbr_shared *shared)
+{
+    Py_ssize_t new_size = shared->size * 2;
+    if (new_size < MIN_ARRAY_SIZE) {
+        new_size = MIN_ARRAY_SIZE;
+    }
+
+    struct _qsbr_pad *array = PyMem_RawCalloc(new_size, sizeof(*array));
+    if (array == NULL) {
+        return -1;
+    }
+
+    struct _qsbr_pad *old = shared->array;
+    if (old != NULL) {
+        memcpy(array, shared->array, shared->size * sizeof(*array));
+    }
+
+    shared->array = array;
+    shared->size = new_size;
+    shared->freelist = NULL;
+    initialize_new_array(shared);
+
+    PyMem_RawFree(old);
+    return 0;
+}
+
+uint64_t
+_Py_qsbr_advance(struct _qsbr_shared *shared)
+{
+    // NOTE: with 64-bit sequence numbers, we don't have to worry too much
+    // about the wr_seq getting too far ahead of rd_seq, but if we ever use
+    // 32-bit sequence numbers, we'll need to be more careful.
+    return _Py_atomic_add_uint64(&shared->wr_seq, QSBR_INCR) + QSBR_INCR;
+}
+
+uint64_t
+_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr)
+{
+    if (++qsbr->deferrals < QSBR_DEFERRED_LIMIT) {
+        return _Py_qsbr_shared_current(qsbr->shared) + QSBR_INCR;
+    }
+    qsbr->deferrals = 0;
+    return _Py_qsbr_advance(qsbr->shared);
+}
+
+static uint64_t
+qsbr_poll_scan(struct _qsbr_shared *shared)
+{
+    // Synchronize with store in _Py_qsbr_attach(). We need to ensure that
+    // the reads from each thread's sequence number are not reordered to see
+    // earlier "offline" states.
+    _Py_atomic_fence_seq_cst();
+
+    // Compute the minimum sequence number of all attached threads
+    uint64_t min_seq = _Py_atomic_load_uint64(&shared->wr_seq);
+    struct _qsbr_pad *array = shared->array;
+    for (Py_ssize_t i = 0, size = shared->size; i != size; i++) {
+        struct _qsbr_thread_state *qsbr = &array[i].qsbr;
+
+        uint64_t seq = _Py_atomic_load_uint64(&qsbr->seq);
+        if (seq != QSBR_OFFLINE && QSBR_LT(seq, min_seq)) {
+            min_seq = seq;
+        }
+    }
+
+    // Update the shared read sequence
+    uint64_t rd_seq = _Py_atomic_load_uint64(&shared->rd_seq);
+    if (QSBR_LT(rd_seq, min_seq)) {
+        // It's okay if the compare-exchange failed: another thread updated it
+        (void)_Py_atomic_compare_exchange_uint64(&shared->rd_seq, &rd_seq, min_seq);
+        rd_seq = min_seq;
+    }
+
+    return rd_seq;
+}
+
+bool
+_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal)
+{
+    assert(_PyThreadState_GET()->state == _Py_THREAD_ATTACHED);
+
+    uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq);
+    if (QSBR_LEQ(goal, rd_seq)) {
+        return true;
+    }
+
+    rd_seq = qsbr_poll_scan(qsbr->shared);
+    return QSBR_LEQ(goal, rd_seq);
+}
+
+void
+_Py_qsbr_attach(struct _qsbr_thread_state *qsbr)
+{
+    assert(qsbr->seq == 0 && "already attached");
+
+    uint64_t seq = _Py_qsbr_shared_current(qsbr->shared);
+    _Py_atomic_store_uint64(&qsbr->seq, seq);  // needs seq_cst
+}
+
+void
+_Py_qsbr_detach(struct _qsbr_thread_state *qsbr)
+{
+    assert(qsbr->seq != 0 && "already detached");
+
+    _Py_atomic_store_uint64_release(&qsbr->seq, QSBR_OFFLINE);
+}
+
+Py_ssize_t
+_Py_qsbr_reserve(PyInterpreterState *interp)
+{
+    struct _qsbr_shared *shared = &interp->qsbr;
+
+    PyMutex_Lock(&shared->mutex);
+    // Try allocating from our internal freelist
+    struct _qsbr_thread_state *qsbr = qsbr_allocate(shared);
+
+    // If there are no free entries, we pause all threads, grow the array,
+    // and update the pointers in PyThreadState to entries in the new array.
+    if (qsbr == NULL) {
+        _PyEval_StopTheWorld(interp);
+        if (grow_thread_array(shared) == 0) {
+            qsbr = qsbr_allocate(shared);
+        }
+        _PyEval_StartTheWorld(interp);
+    }
+    PyMutex_Unlock(&shared->mutex);
+
+    if (qsbr == NULL) {
+        return -1;
+    }
+
+    // Return an index rather than the pointer because the array may be
+    // resized and the pointer invalidated.
+    return (struct _qsbr_pad *)qsbr - shared->array;
+}
+
+void
+_Py_qsbr_register(_PyThreadStateImpl *tstate, PyInterpreterState *interp,
+                  Py_ssize_t index)
+{
+    // Associate the QSBR state with the thread state
+    struct _qsbr_shared *shared = &interp->qsbr;
+
+    PyMutex_Lock(&shared->mutex);
+    struct _qsbr_thread_state *qsbr = &interp->qsbr.array[index].qsbr;
+    assert(qsbr->allocated && qsbr->tstate == NULL);
+    qsbr->tstate = (PyThreadState *)tstate;
+    tstate->qsbr = qsbr;
+    PyMutex_Unlock(&shared->mutex);
+}
+
+void
+_Py_qsbr_unregister(_PyThreadStateImpl *tstate)
+{
+    struct _qsbr_thread_state *qsbr = tstate->qsbr;
+    struct _qsbr_shared *shared = qsbr->shared;
+
+    assert(qsbr->seq == 0 && "thread state must be detached");
+
+    PyMutex_Lock(&shared->mutex);
+    assert(qsbr->allocated && qsbr->tstate == (PyThreadState *)tstate);
+    tstate->qsbr = NULL;
+    qsbr->tstate = NULL;
+    qsbr->allocated = false;
+    qsbr->freelist_next = shared->freelist;
+    shared->freelist = qsbr;
+    PyMutex_Unlock(&shared->mutex);
+}
+
+void
+_Py_qsbr_fini(PyInterpreterState *interp)
+{
+    struct _qsbr_shared *shared = &interp->qsbr;
+    PyMem_RawFree(shared->array);
+    shared->array = NULL;
+    shared->size = 0;
+    shared->freelist = NULL;
+}
+
+void
+_Py_qsbr_after_fork(_PyThreadStateImpl *tstate)
+{
+    struct _qsbr_thread_state *this_qsbr = tstate->qsbr;
+    struct _qsbr_shared *shared = this_qsbr->shared;
+
+    _PyMutex_at_fork_reinit(&shared->mutex);
+
+    for (Py_ssize_t i = 0; i != shared->size; i++) {
+        struct _qsbr_thread_state *qsbr = &shared->array[i].qsbr;
+        if (qsbr != this_qsbr && qsbr->allocated) {
+            qsbr->tstate = NULL;
+            qsbr->allocated = false;
+            qsbr->freelist_next = shared->freelist;
+            shared->freelist = qsbr;
+        }
+    }
+}