diff options
author | Sam Gross <colesbury@gmail.com> | 2024-02-16 16:22:27 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-16 16:22:27 (GMT) |
commit | b24c9161a651f549ed48f4b4dba8996fe9cc4e09 (patch) | |
tree | 549acacced5ca0b1923e0df94b40ef633c6cdf8f /Python | |
parent | f92857a93016aa26ba93959d2bdb690ef52e7f07 (diff) | |
download | cpython-b24c9161a651f549ed48f4b4dba8996fe9cc4e09.zip cpython-b24c9161a651f549ed48f4b4dba8996fe9cc4e09.tar.gz cpython-b24c9161a651f549ed48f4b4dba8996fe9cc4e09.tar.bz2 |
gh-112529: Make the GC scheduling thread-safe (#114880)
The GC keeps track of the number of allocations (less deallocations)
since the last GC. This buffers the count in thread-local state and uses
atomic operations to modify the per-interpreter count. The thread-local
buffering avoids contention on shared state.
A consequence is that the GC scheduling is not as precise, so
"test_sneaky_frame_object" is skipped because it requires that the GC be
run exactly after allocating a frame object.
Diffstat (limited to 'Python')
-rw-r--r-- | Python/gc_free_threading.c | 63 |
1 files changed, 48 insertions, 15 deletions
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 3dc1dc1..a758c99 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -23,6 +23,11 @@ typedef struct _gc_runtime_state GCState; # define GC_DEBUG #endif +// Each thread buffers the count of allocated objects in a thread-local +// variable up to +/- this amount to reduce the overhead of updating +// the global count. +#define LOCAL_ALLOC_COUNT_THRESHOLD 512 + // Automatically choose the generation that needs collecting. #define GENERATION_AUTO (-1) @@ -960,6 +965,41 @@ gc_should_collect(GCState *gcstate) } static void +record_allocation(PyThreadState *tstate) +{ + struct _gc_thread_state *gc = &((_PyThreadStateImpl *)tstate)->gc; + + // We buffer the allocation count to avoid the overhead of atomic + // operations for every allocation. + gc->alloc_count++; + if (gc->alloc_count >= LOCAL_ALLOC_COUNT_THRESHOLD) { + // TODO: Use Py_ssize_t for the generation count. + GCState *gcstate = &tstate->interp->gc; + _Py_atomic_add_int(&gcstate->generations[0].count, (int)gc->alloc_count); + gc->alloc_count = 0; + + if (gc_should_collect(gcstate) && + !_Py_atomic_load_int_relaxed(&gcstate->collecting)) + { + _Py_ScheduleGC(tstate->interp); + } + } +} + +static void +record_deallocation(PyThreadState *tstate) +{ + struct _gc_thread_state *gc = &((_PyThreadStateImpl *)tstate)->gc; + + gc->alloc_count--; + if (gc->alloc_count <= -LOCAL_ALLOC_COUNT_THRESHOLD) { + GCState *gcstate = &tstate->interp->gc; + _Py_atomic_add_int(&gcstate->generations[0].count, (int)gc->alloc_count); + gc->alloc_count = 0; + } +} + +static void gc_collect_internal(PyInterpreterState *interp, struct collection_state *state) { _PyEval_StopTheWorld(interp); @@ -981,6 +1021,9 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state) } } + // Record the number of live GC objects + interp->gc.long_lived_total = state->long_lived_total; + // Clear weakrefs and enqueue callbacks (but do not call them). clear_weakrefs(state); _PyEval_StartTheWorld(interp); @@ -1090,7 +1133,6 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) m = state.collected; n = state.uncollectable; - gcstate->long_lived_total = state.long_lived_total; if (gcstate->debug & _PyGC_DEBUG_STATS) { double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1); @@ -1530,15 +1572,7 @@ _Py_ScheduleGC(PyInterpreterState *interp) void _PyObject_GC_Link(PyObject *op) { - PyThreadState *tstate = _PyThreadState_GET(); - GCState *gcstate = &tstate->interp->gc; - gcstate->generations[0].count++; - - if (gc_should_collect(gcstate) && - !_Py_atomic_load_int_relaxed(&gcstate->collecting)) - { - _Py_ScheduleGC(tstate->interp); - } + record_allocation(_PyThreadState_GET()); } void @@ -1564,7 +1598,7 @@ gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize) ((PyObject **)mem)[1] = NULL; } PyObject *op = (PyObject *)(mem + presize); - _PyObject_GC_Link(op); + record_allocation(tstate); return op; } @@ -1646,10 +1680,9 @@ PyObject_GC_Del(void *op) PyErr_SetRaisedException(exc); #endif } - GCState *gcstate = get_gc_state(); - if (gcstate->generations[0].count > 0) { - gcstate->generations[0].count--; - } + + record_deallocation(_PyThreadState_GET()); + PyObject_Free(((char *)op)-presize); } |