summaryrefslogtreecommitdiffstats
path: root/Python
diff options
context:
space:
mode:
authorSam Gross <colesbury@gmail.com>2024-08-06 18:36:57 (GMT)
committerGitHub <noreply@github.com>2024-08-06 18:36:57 (GMT)
commitdc093010672207176857a747c61da9c046ad9d3e (patch)
tree3eeff2ad72faa7781255016cc0f54c45aecd41d7 /Python
parent1429651a06611a9dbcb1928b746faf52934c12e2 (diff)
downloadcpython-dc093010672207176857a747c61da9c046ad9d3e.zip
cpython-dc093010672207176857a747c61da9c046ad9d3e.tar.gz
cpython-dc093010672207176857a747c61da9c046ad9d3e.tar.bz2
gh-122417: Implement per-thread heap type refcounts (#122418)
The free-threaded build partially stores heap type reference counts in distributed manner in per-thread arrays. This avoids reference count contention when creating or destroying instances. Co-authored-by: Ken Jin <kenjin@python.org>
Diffstat (limited to 'Python')
-rw-r--r--Python/gc_free_threading.c69
-rw-r--r--Python/pystate.c13
-rw-r--r--Python/typeid.c200
3 files changed, 231 insertions, 51 deletions
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index 53f0416..1e02db0 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -15,6 +15,7 @@
#include "pycore_tstate.h" // _PyThreadStateImpl
#include "pycore_weakref.h" // _PyWeakref_ClearRef()
#include "pydtrace.h"
+#include "pycore_typeid.h" // _PyType_MergeThreadLocalRefcounts
#ifdef Py_GIL_DISABLED
@@ -164,7 +165,15 @@ disable_deferred_refcounting(PyObject *op)
{
if (_PyObject_HasDeferredRefcount(op)) {
op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED;
- op->ob_ref_shared -= (1 << _Py_REF_SHARED_SHIFT);
+ op->ob_ref_shared -= _Py_REF_SHARED(_Py_REF_DEFERRED, 0);
+
+ if (PyType_Check(op)) {
+ // Disable thread-local refcounting for heap types
+ PyTypeObject *type = (PyTypeObject *)op;
+ if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
+ _PyType_ReleaseId((PyHeapTypeObject *)op);
+ }
+ }
}
}
@@ -329,16 +338,6 @@ merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state)
}
static void
-merge_all_queued_objects(PyInterpreterState *interp, struct collection_state *state)
-{
- HEAD_LOCK(&_PyRuntime);
- for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
- merge_queued_objects((_PyThreadStateImpl *)p, state);
- }
- HEAD_UNLOCK(&_PyRuntime);
-}
-
-static void
process_delayed_frees(PyInterpreterState *interp)
{
// In STW status, we can observe the latest write sequence by
@@ -389,7 +388,9 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
}
Py_ssize_t refcount = Py_REFCNT(op);
- refcount -= _PyObject_HasDeferredRefcount(op);
+ if (_PyObject_HasDeferredRefcount(op)) {
+ refcount -= _Py_REF_DEFERRED;
+ }
_PyObject_ASSERT(op, refcount >= 0);
if (refcount > 0 && !_PyObject_HasDeferredRefcount(op)) {
@@ -754,10 +755,6 @@ _PyGC_Init(PyInterpreterState *interp)
{
GCState *gcstate = &interp->gc;
- // gh-117783: immortalize objects that would use deferred refcounting
- // once the first non-main thread is created (but not in subinterpreters).
- gcstate->immortalize = _Py_IsMainInterpreter(interp) ? 0 : -1;
-
gcstate->garbage = PyList_New(0);
if (gcstate->garbage == NULL) {
return _PyStatus_NO_MEMORY();
@@ -1105,8 +1102,18 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
state->gcstate->old[i-1].count = 0;
}
- // merge refcounts for all queued objects
- merge_all_queued_objects(interp, state);
+ HEAD_LOCK(&_PyRuntime);
+ for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
+ _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)p;
+
+ // merge per-thread refcount for types into the type's actual refcount
+ _PyType_MergeThreadLocalRefcounts(tstate);
+
+ // merge refcounts for all queued objects
+ merge_queued_objects(tstate, state);
+ }
+ HEAD_UNLOCK(&_PyRuntime);
+
process_delayed_frees(interp);
// Find unreachable objects
@@ -1835,32 +1842,6 @@ custom_visitor_wrapper(const mi_heap_t *heap, const mi_heap_area_t *area,
return true;
}
-// gh-117783: Immortalize objects that use deferred reference counting to
-// temporarily work around scaling bottlenecks.
-static bool
-immortalize_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
- void *block, size_t block_size, void *args)
-{
- PyObject *op = op_from_block(block, args, false);
- if (op != NULL && _PyObject_HasDeferredRefcount(op)) {
- _Py_SetImmortal(op);
- op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED;
- }
- return true;
-}
-
-void
-_PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp)
-{
- struct visitor_args args;
- _PyEval_StopTheWorld(interp);
- if (interp->gc.immortalize == 0) {
- gc_visit_heaps(interp, &immortalize_visitor, &args);
- interp->gc.immortalize = 1;
- }
- _PyEval_StartTheWorld(interp);
-}
-
void
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
{
diff --git a/Python/pystate.c b/Python/pystate.c
index 6fbd17f..8f4818c 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -20,6 +20,7 @@
#include "pycore_runtime_init.h" // _PyRuntimeState_INIT
#include "pycore_sysmodule.h" // _PySys_Audit()
#include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap()
+#include "pycore_typeid.h" // _PyType_FinalizeIdPool
/* --------------------------------------------------------------------------
CAUTION
@@ -1584,13 +1585,6 @@ new_threadstate(PyInterpreterState *interp, int whence)
PyMem_RawFree(new_tstate);
}
else {
-#ifdef Py_GIL_DISABLED
- if (_Py_atomic_load_int(&interp->gc.immortalize) == 0) {
- // Immortalize objects marked as using deferred reference counting
- // the first time a non-main thread is created.
- _PyGC_ImmortalizeDeferredObjects(interp);
- }
-#endif
}
#ifdef Py_GIL_DISABLED
@@ -1741,6 +1735,10 @@ PyThreadState_Clear(PyThreadState *tstate)
struct _Py_freelists *freelists = _Py_freelists_GET();
_PyObject_ClearFreeLists(freelists, 1);
+ // Merge our thread-local refcounts into the type's own refcount and
+ // free our local refcount array.
+ _PyType_FinalizeThreadLocalRefcounts((_PyThreadStateImpl *)tstate);
+
// Remove ourself from the biased reference counting table of threads.
_Py_brc_remove_thread(tstate);
#endif
@@ -1799,6 +1797,7 @@ tstate_delete_common(PyThreadState *tstate, int release_gil)
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
tstate->interp->object_state.reftotal += tstate_impl->reftotal;
tstate_impl->reftotal = 0;
+ assert(tstate_impl->types.refcounts == NULL);
#endif
HEAD_UNLOCK(runtime);
diff --git a/Python/typeid.c b/Python/typeid.c
new file mode 100644
index 0000000..83a6872
--- /dev/null
+++ b/Python/typeid.c
@@ -0,0 +1,200 @@
+#include "Python.h"
+
+#include "pycore_lock.h" // PyMutex_LockFlags()
+#include "pycore_pystate.h" // _PyThreadState_GET()
+#include "pycore_object.h" // _Py_IncRefTotal
+#include "pycore_typeid.h"
+
+// This contains code for allocating unique ids to heap type objects
+// and re-using those ids when the type is deallocated.
+//
+// See Include/internal/pycore_typeid.h for more details.
+
+#ifdef Py_GIL_DISABLED
+
+#define POOL_MIN_SIZE 8
+
+#define LOCK_POOL(pool) PyMutex_LockFlags(&pool->mutex, _Py_LOCK_DONT_DETACH)
+#define UNLOCK_POOL(pool) PyMutex_Unlock(&pool->mutex)
+
+static int
+resize_interp_type_id_pool(struct _Py_type_id_pool *pool)
+{
+ if ((size_t)pool->size > PY_SSIZE_T_MAX / (2 * sizeof(*pool->table))) {
+ return -1;
+ }
+
+ Py_ssize_t new_size = pool->size * 2;
+ if (new_size < POOL_MIN_SIZE) {
+ new_size = POOL_MIN_SIZE;
+ }
+
+ _Py_type_id_entry *table = PyMem_Realloc(pool->table,
+ new_size * sizeof(*pool->table));
+ if (table == NULL) {
+ return -1;
+ }
+
+ Py_ssize_t start = pool->size;
+ for (Py_ssize_t i = start; i < new_size - 1; i++) {
+ table[i].next = &table[i + 1];
+ }
+ table[new_size - 1].next = NULL;
+
+ pool->table = table;
+ pool->freelist = &table[start];
+ _Py_atomic_store_ssize(&pool->size, new_size);
+ return 0;
+}
+
+static int
+resize_local_refcounts(_PyThreadStateImpl *tstate)
+{
+ if (tstate->types.is_finalized) {
+ return -1;
+ }
+
+ struct _Py_type_id_pool *pool = &tstate->base.interp->type_ids;
+ Py_ssize_t size = _Py_atomic_load_ssize(&pool->size);
+
+ Py_ssize_t *refcnts = PyMem_Realloc(tstate->types.refcounts,
+ size * sizeof(Py_ssize_t));
+ if (refcnts == NULL) {
+ return -1;
+ }
+
+ Py_ssize_t old_size = tstate->types.size;
+ if (old_size < size) {
+ memset(refcnts + old_size, 0, (size - old_size) * sizeof(Py_ssize_t));
+ }
+
+ tstate->types.refcounts = refcnts;
+ tstate->types.size = size;
+ return 0;
+}
+
+void
+_PyType_AssignId(PyHeapTypeObject *type)
+{
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ struct _Py_type_id_pool *pool = &interp->type_ids;
+
+ LOCK_POOL(pool);
+ if (pool->freelist == NULL) {
+ if (resize_interp_type_id_pool(pool) < 0) {
+ type->unique_id = -1;
+ UNLOCK_POOL(pool);
+ return;
+ }
+ }
+
+ _Py_type_id_entry *entry = pool->freelist;
+ pool->freelist = entry->next;
+ entry->type = type;
+ _PyObject_SetDeferredRefcount((PyObject *)type);
+ type->unique_id = (entry - pool->table);
+ UNLOCK_POOL(pool);
+}
+
+void
+_PyType_ReleaseId(PyHeapTypeObject *type)
+{
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ struct _Py_type_id_pool *pool = &interp->type_ids;
+
+ if (type->unique_id < 0) {
+ // The type doesn't have an id assigned.
+ return;
+ }
+
+ LOCK_POOL(pool);
+ _Py_type_id_entry *entry = &pool->table[type->unique_id];
+ assert(entry->type == type);
+ entry->next = pool->freelist;
+ pool->freelist = entry;
+
+ type->unique_id = -1;
+ UNLOCK_POOL(pool);
+}
+
+void
+_PyType_IncrefSlow(PyHeapTypeObject *type)
+{
+ _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
+ if (type->unique_id < 0 || resize_local_refcounts(tstate) < 0) {
+ // just incref the type directly.
+ Py_INCREF(type);
+ return;
+ }
+
+ assert(type->unique_id < tstate->types.size);
+ tstate->types.refcounts[type->unique_id]++;
+#ifdef Py_REF_DEBUG
+ _Py_IncRefTotal((PyThreadState *)tstate);
+#endif
+ _Py_INCREF_STAT_INC();
+}
+
+void
+_PyType_MergeThreadLocalRefcounts(_PyThreadStateImpl *tstate)
+{
+ if (tstate->types.refcounts == NULL) {
+ return;
+ }
+
+ struct _Py_type_id_pool *pool = &tstate->base.interp->type_ids;
+
+ LOCK_POOL(pool);
+ for (Py_ssize_t i = 0, n = tstate->types.size; i < n; i++) {
+ Py_ssize_t refcnt = tstate->types.refcounts[i];
+ if (refcnt != 0) {
+ PyObject *type = (PyObject *)pool->table[i].type;
+ assert(PyType_Check(type));
+
+ _Py_atomic_add_ssize(&type->ob_ref_shared,
+ refcnt << _Py_REF_SHARED_SHIFT);
+ tstate->types.refcounts[i] = 0;
+ }
+ }
+ UNLOCK_POOL(pool);
+}
+
+void
+_PyType_FinalizeThreadLocalRefcounts(_PyThreadStateImpl *tstate)
+{
+ _PyType_MergeThreadLocalRefcounts(tstate);
+
+ PyMem_Free(tstate->types.refcounts);
+ tstate->types.refcounts = NULL;
+ tstate->types.size = 0;
+ tstate->types.is_finalized = 1;
+}
+
+void
+_PyType_FinalizeIdPool(PyInterpreterState *interp)
+{
+ struct _Py_type_id_pool *pool = &interp->type_ids;
+
+ // First, set the free-list to NULL values
+ while (pool->freelist) {
+ _Py_type_id_entry *next = pool->freelist->next;
+ pool->freelist->type = NULL;
+ pool->freelist = next;
+ }
+
+ // Now everything non-NULL is a type. Set the type's id to -1 in case it
+ // outlives the interpreter.
+ for (Py_ssize_t i = 0; i < pool->size; i++) {
+ PyHeapTypeObject *ht = pool->table[i].type;
+ if (ht) {
+ ht->unique_id = -1;
+ pool->table[i].type = NULL;
+ }
+ }
+ PyMem_Free(pool->table);
+ pool->table = NULL;
+ pool->freelist = NULL;
+ pool->size = 0;
+}
+
+#endif /* Py_GIL_DISABLED */