diff options
author | Sam Gross <colesbury@gmail.com> | 2024-08-06 18:36:57 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-08-06 18:36:57 (GMT) |
commit | dc093010672207176857a747c61da9c046ad9d3e (patch) | |
tree | 3eeff2ad72faa7781255016cc0f54c45aecd41d7 /Python | |
parent | 1429651a06611a9dbcb1928b746faf52934c12e2 (diff) | |
download | cpython-dc093010672207176857a747c61da9c046ad9d3e.zip cpython-dc093010672207176857a747c61da9c046ad9d3e.tar.gz cpython-dc093010672207176857a747c61da9c046ad9d3e.tar.bz2 |
gh-122417: Implement per-thread heap type refcounts (#122418)
The free-threaded build partially stores heap type reference counts in
distributed manner in per-thread arrays. This avoids reference count
contention when creating or destroying instances.
Co-authored-by: Ken Jin <kenjin@python.org>
Diffstat (limited to 'Python')
-rw-r--r-- | Python/gc_free_threading.c | 69 | ||||
-rw-r--r-- | Python/pystate.c | 13 | ||||
-rw-r--r-- | Python/typeid.c | 200 |
3 files changed, 231 insertions, 51 deletions
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 53f0416..1e02db0 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -15,6 +15,7 @@ #include "pycore_tstate.h" // _PyThreadStateImpl #include "pycore_weakref.h" // _PyWeakref_ClearRef() #include "pydtrace.h" +#include "pycore_typeid.h" // _PyType_MergeThreadLocalRefcounts #ifdef Py_GIL_DISABLED @@ -164,7 +165,15 @@ disable_deferred_refcounting(PyObject *op) { if (_PyObject_HasDeferredRefcount(op)) { op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED; - op->ob_ref_shared -= (1 << _Py_REF_SHARED_SHIFT); + op->ob_ref_shared -= _Py_REF_SHARED(_Py_REF_DEFERRED, 0); + + if (PyType_Check(op)) { + // Disable thread-local refcounting for heap types + PyTypeObject *type = (PyTypeObject *)op; + if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { + _PyType_ReleaseId((PyHeapTypeObject *)op); + } + } } } @@ -329,16 +338,6 @@ merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state) } static void -merge_all_queued_objects(PyInterpreterState *interp, struct collection_state *state) -{ - HEAD_LOCK(&_PyRuntime); - for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { - merge_queued_objects((_PyThreadStateImpl *)p, state); - } - HEAD_UNLOCK(&_PyRuntime); -} - -static void process_delayed_frees(PyInterpreterState *interp) { // In STW status, we can observe the latest write sequence by @@ -389,7 +388,9 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, } Py_ssize_t refcount = Py_REFCNT(op); - refcount -= _PyObject_HasDeferredRefcount(op); + if (_PyObject_HasDeferredRefcount(op)) { + refcount -= _Py_REF_DEFERRED; + } _PyObject_ASSERT(op, refcount >= 0); if (refcount > 0 && !_PyObject_HasDeferredRefcount(op)) { @@ -754,10 +755,6 @@ _PyGC_Init(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; - // gh-117783: immortalize objects that would use deferred refcounting - // once the first non-main thread is created (but not in subinterpreters). - gcstate->immortalize = _Py_IsMainInterpreter(interp) ? 0 : -1; - gcstate->garbage = PyList_New(0); if (gcstate->garbage == NULL) { return _PyStatus_NO_MEMORY(); @@ -1105,8 +1102,18 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, state->gcstate->old[i-1].count = 0; } - // merge refcounts for all queued objects - merge_all_queued_objects(interp, state); + HEAD_LOCK(&_PyRuntime); + for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)p; + + // merge per-thread refcount for types into the type's actual refcount + _PyType_MergeThreadLocalRefcounts(tstate); + + // merge refcounts for all queued objects + merge_queued_objects(tstate, state); + } + HEAD_UNLOCK(&_PyRuntime); + process_delayed_frees(interp); // Find unreachable objects @@ -1835,32 +1842,6 @@ custom_visitor_wrapper(const mi_heap_t *heap, const mi_heap_area_t *area, return true; } -// gh-117783: Immortalize objects that use deferred reference counting to -// temporarily work around scaling bottlenecks. -static bool -immortalize_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, - void *block, size_t block_size, void *args) -{ - PyObject *op = op_from_block(block, args, false); - if (op != NULL && _PyObject_HasDeferredRefcount(op)) { - _Py_SetImmortal(op); - op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED; - } - return true; -} - -void -_PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp) -{ - struct visitor_args args; - _PyEval_StopTheWorld(interp); - if (interp->gc.immortalize == 0) { - gc_visit_heaps(interp, &immortalize_visitor, &args); - interp->gc.immortalize = 1; - } - _PyEval_StartTheWorld(interp); -} - void PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) { diff --git a/Python/pystate.c b/Python/pystate.c index 6fbd17f..8f4818c 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -20,6 +20,7 @@ #include "pycore_runtime_init.h" // _PyRuntimeState_INIT #include "pycore_sysmodule.h" // _PySys_Audit() #include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap() +#include "pycore_typeid.h" // _PyType_FinalizeIdPool /* -------------------------------------------------------------------------- CAUTION @@ -1584,13 +1585,6 @@ new_threadstate(PyInterpreterState *interp, int whence) PyMem_RawFree(new_tstate); } else { -#ifdef Py_GIL_DISABLED - if (_Py_atomic_load_int(&interp->gc.immortalize) == 0) { - // Immortalize objects marked as using deferred reference counting - // the first time a non-main thread is created. - _PyGC_ImmortalizeDeferredObjects(interp); - } -#endif } #ifdef Py_GIL_DISABLED @@ -1741,6 +1735,10 @@ PyThreadState_Clear(PyThreadState *tstate) struct _Py_freelists *freelists = _Py_freelists_GET(); _PyObject_ClearFreeLists(freelists, 1); + // Merge our thread-local refcounts into the type's own refcount and + // free our local refcount array. + _PyType_FinalizeThreadLocalRefcounts((_PyThreadStateImpl *)tstate); + // Remove ourself from the biased reference counting table of threads. _Py_brc_remove_thread(tstate); #endif @@ -1799,6 +1797,7 @@ tstate_delete_common(PyThreadState *tstate, int release_gil) _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; tstate->interp->object_state.reftotal += tstate_impl->reftotal; tstate_impl->reftotal = 0; + assert(tstate_impl->types.refcounts == NULL); #endif HEAD_UNLOCK(runtime); diff --git a/Python/typeid.c b/Python/typeid.c new file mode 100644 index 0000000..83a6872 --- /dev/null +++ b/Python/typeid.c @@ -0,0 +1,200 @@ +#include "Python.h" + +#include "pycore_lock.h" // PyMutex_LockFlags() +#include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_object.h" // _Py_IncRefTotal +#include "pycore_typeid.h" + +// This contains code for allocating unique ids to heap type objects +// and re-using those ids when the type is deallocated. +// +// See Include/internal/pycore_typeid.h for more details. + +#ifdef Py_GIL_DISABLED + +#define POOL_MIN_SIZE 8 + +#define LOCK_POOL(pool) PyMutex_LockFlags(&pool->mutex, _Py_LOCK_DONT_DETACH) +#define UNLOCK_POOL(pool) PyMutex_Unlock(&pool->mutex) + +static int +resize_interp_type_id_pool(struct _Py_type_id_pool *pool) +{ + if ((size_t)pool->size > PY_SSIZE_T_MAX / (2 * sizeof(*pool->table))) { + return -1; + } + + Py_ssize_t new_size = pool->size * 2; + if (new_size < POOL_MIN_SIZE) { + new_size = POOL_MIN_SIZE; + } + + _Py_type_id_entry *table = PyMem_Realloc(pool->table, + new_size * sizeof(*pool->table)); + if (table == NULL) { + return -1; + } + + Py_ssize_t start = pool->size; + for (Py_ssize_t i = start; i < new_size - 1; i++) { + table[i].next = &table[i + 1]; + } + table[new_size - 1].next = NULL; + + pool->table = table; + pool->freelist = &table[start]; + _Py_atomic_store_ssize(&pool->size, new_size); + return 0; +} + +static int +resize_local_refcounts(_PyThreadStateImpl *tstate) +{ + if (tstate->types.is_finalized) { + return -1; + } + + struct _Py_type_id_pool *pool = &tstate->base.interp->type_ids; + Py_ssize_t size = _Py_atomic_load_ssize(&pool->size); + + Py_ssize_t *refcnts = PyMem_Realloc(tstate->types.refcounts, + size * sizeof(Py_ssize_t)); + if (refcnts == NULL) { + return -1; + } + + Py_ssize_t old_size = tstate->types.size; + if (old_size < size) { + memset(refcnts + old_size, 0, (size - old_size) * sizeof(Py_ssize_t)); + } + + tstate->types.refcounts = refcnts; + tstate->types.size = size; + return 0; +} + +void +_PyType_AssignId(PyHeapTypeObject *type) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct _Py_type_id_pool *pool = &interp->type_ids; + + LOCK_POOL(pool); + if (pool->freelist == NULL) { + if (resize_interp_type_id_pool(pool) < 0) { + type->unique_id = -1; + UNLOCK_POOL(pool); + return; + } + } + + _Py_type_id_entry *entry = pool->freelist; + pool->freelist = entry->next; + entry->type = type; + _PyObject_SetDeferredRefcount((PyObject *)type); + type->unique_id = (entry - pool->table); + UNLOCK_POOL(pool); +} + +void +_PyType_ReleaseId(PyHeapTypeObject *type) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct _Py_type_id_pool *pool = &interp->type_ids; + + if (type->unique_id < 0) { + // The type doesn't have an id assigned. + return; + } + + LOCK_POOL(pool); + _Py_type_id_entry *entry = &pool->table[type->unique_id]; + assert(entry->type == type); + entry->next = pool->freelist; + pool->freelist = entry; + + type->unique_id = -1; + UNLOCK_POOL(pool); +} + +void +_PyType_IncrefSlow(PyHeapTypeObject *type) +{ + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); + if (type->unique_id < 0 || resize_local_refcounts(tstate) < 0) { + // just incref the type directly. + Py_INCREF(type); + return; + } + + assert(type->unique_id < tstate->types.size); + tstate->types.refcounts[type->unique_id]++; +#ifdef Py_REF_DEBUG + _Py_IncRefTotal((PyThreadState *)tstate); +#endif + _Py_INCREF_STAT_INC(); +} + +void +_PyType_MergeThreadLocalRefcounts(_PyThreadStateImpl *tstate) +{ + if (tstate->types.refcounts == NULL) { + return; + } + + struct _Py_type_id_pool *pool = &tstate->base.interp->type_ids; + + LOCK_POOL(pool); + for (Py_ssize_t i = 0, n = tstate->types.size; i < n; i++) { + Py_ssize_t refcnt = tstate->types.refcounts[i]; + if (refcnt != 0) { + PyObject *type = (PyObject *)pool->table[i].type; + assert(PyType_Check(type)); + + _Py_atomic_add_ssize(&type->ob_ref_shared, + refcnt << _Py_REF_SHARED_SHIFT); + tstate->types.refcounts[i] = 0; + } + } + UNLOCK_POOL(pool); +} + +void +_PyType_FinalizeThreadLocalRefcounts(_PyThreadStateImpl *tstate) +{ + _PyType_MergeThreadLocalRefcounts(tstate); + + PyMem_Free(tstate->types.refcounts); + tstate->types.refcounts = NULL; + tstate->types.size = 0; + tstate->types.is_finalized = 1; +} + +void +_PyType_FinalizeIdPool(PyInterpreterState *interp) +{ + struct _Py_type_id_pool *pool = &interp->type_ids; + + // First, set the free-list to NULL values + while (pool->freelist) { + _Py_type_id_entry *next = pool->freelist->next; + pool->freelist->type = NULL; + pool->freelist = next; + } + + // Now everything non-NULL is a type. Set the type's id to -1 in case it + // outlives the interpreter. + for (Py_ssize_t i = 0; i < pool->size; i++) { + PyHeapTypeObject *ht = pool->table[i].type; + if (ht) { + ht->unique_id = -1; + pool->table[i].type = NULL; + } + } + PyMem_Free(pool->table); + pool->table = NULL; + pool->freelist = NULL; + pool->size = 0; +} + +#endif /* Py_GIL_DISABLED */ |