diff options
author | T. Wouters <thomas@python.org> | 2024-09-30 21:27:29 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-09-30 21:27:29 (GMT) |
commit | e0eb44ad49926dd131dc639f5506c6769e45b4eb (patch) | |
tree | 200d0b3773a61d1001f4775dada2afd67aa0f0c6 /Python/gc.c | |
parent | bc1fae89af9df3888fab670f83b7aed8afe5a9f5 (diff) | |
download | cpython-e0eb44ad49926dd131dc639f5506c6769e45b4eb.zip cpython-e0eb44ad49926dd131dc639f5506c6769e45b4eb.tar.gz cpython-e0eb44ad49926dd131dc639f5506c6769e45b4eb.tar.bz2 |
[3.13] GH-124567: Revert the Incremental GC in 3.13 (#124770)
Revert the incremental GC in 3.13, since it's not clear that without further turning, the benefits outweigh the costs.
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Diffstat (limited to 'Python/gc.c')
-rw-r--r-- | Python/gc.c | 890 |
1 files changed, 326 insertions, 564 deletions
diff --git a/Python/gc.c b/Python/gc.c index b79d4b5..8dbcb34 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -45,7 +45,7 @@ typedef struct _gc_runtime_state GCState; // move_legacy_finalizers() removes this flag instead. // Between them, unreachable list is not normal list and we can not use // most gc_list_* functions for it. -#define NEXT_MASK_UNREACHABLE 2 +#define NEXT_MASK_UNREACHABLE (1) #define AS_GC(op) _Py_AS_GC(op) #define FROM_GC(gc) _Py_FROM_GC(gc) @@ -95,48 +95,9 @@ gc_decref(PyGC_Head *g) g->_gc_prev -= 1 << _PyGC_PREV_SHIFT; } -static inline int -gc_old_space(PyGC_Head *g) -{ - return g->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1; -} -static inline int -flip_old_space(int space) -{ - assert(space == 0 || space == 1); - return space ^ _PyGC_NEXT_MASK_OLD_SPACE_1; -} +#define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head) -static inline void -gc_flip_old_space(PyGC_Head *g) -{ - g->_gc_next ^= _PyGC_NEXT_MASK_OLD_SPACE_1; -} - -static inline void -gc_set_old_space(PyGC_Head *g, int space) -{ - assert(space == 0 || space == _PyGC_NEXT_MASK_OLD_SPACE_1); - g->_gc_next &= ~_PyGC_NEXT_MASK_OLD_SPACE_1; - g->_gc_next |= space; -} - -static PyGC_Head * -GEN_HEAD(GCState *gcstate, int n) -{ - assert((gcstate->visited_space & (~1)) == 0); - switch(n) { - case 0: - return &gcstate->young.head; - case 1: - return &gcstate->old[gcstate->visited_space].head; - case 2: - return &gcstate->old[gcstate->visited_space^1].head; - default: - Py_UNREACHABLE(); - } -} static GCState * get_gc_state(void) @@ -155,12 +116,11 @@ _PyGC_InitState(GCState *gcstate) GEN.head._gc_prev = (uintptr_t)&GEN.head; \ } while (0) - assert(gcstate->young.count == 0); - assert(gcstate->old[0].count == 0); - assert(gcstate->old[1].count == 0); - INIT_HEAD(gcstate->young); - INIT_HEAD(gcstate->old[0]); - INIT_HEAD(gcstate->old[1]); + for (int i = 0; i < NUM_GENERATIONS; i++) { + assert(gcstate->generations[i].count == 0); + INIT_HEAD(gcstate->generations[i]); + }; + gcstate->generation0 = GEN_HEAD(gcstate, 0); INIT_HEAD(gcstate->permanent_generation); #undef INIT_HEAD @@ -181,7 +141,6 @@ _PyGC_Init(PyInterpreterState *interp) if (gcstate->callbacks == NULL) { return _PyStatus_NO_MEMORY(); } - gcstate->heap_size = 0; return _PyStatus_OK(); } @@ -259,7 +218,6 @@ gc_list_is_empty(PyGC_Head *list) static inline void gc_list_append(PyGC_Head *node, PyGC_Head *list) { - assert((list->_gc_prev & ~_PyGC_PREV_MASK) == 0); PyGC_Head *last = (PyGC_Head *)list->_gc_prev; // last <-> node @@ -317,8 +275,6 @@ gc_list_merge(PyGC_Head *from, PyGC_Head *to) PyGC_Head *from_tail = GC_PREV(from); assert(from_head != from); assert(from_tail != from); - assert(gc_list_is_empty(to) || - gc_old_space(to_tail) == gc_old_space(from_tail)); _PyGCHead_SET_NEXT(to_tail, from_head); _PyGCHead_SET_PREV(from_head, to_tail); @@ -387,8 +343,8 @@ enum flagstates {collecting_clear_unreachable_clear, static void validate_list(PyGC_Head *head, enum flagstates flags) { - assert((head->_gc_prev & ~_PyGC_PREV_MASK) == 0); - assert((head->_gc_next & ~_PyGC_PREV_MASK) == 0); + assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0); + assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0); uintptr_t prev_value = 0, next_value = 0; switch (flags) { case collecting_clear_unreachable_clear: @@ -410,7 +366,7 @@ validate_list(PyGC_Head *head, enum flagstates flags) PyGC_Head *gc = GC_NEXT(head); while (gc != head) { PyGC_Head *trueprev = GC_PREV(gc); - PyGC_Head *truenext = GC_NEXT(gc); + PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE); assert(truenext != NULL); assert(trueprev == prev); assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value); @@ -420,54 +376,8 @@ validate_list(PyGC_Head *head, enum flagstates flags) } assert(prev == GC_PREV(head)); } - -static void -validate_old(GCState *gcstate) -{ - for (int space = 0; space < 2; space++) { - PyGC_Head *head = &gcstate->old[space].head; - PyGC_Head *gc = GC_NEXT(head); - while (gc != head) { - PyGC_Head *next = GC_NEXT(gc); - assert(gc_old_space(gc) == space); - gc = next; - } - } -} - -static void -validate_consistent_old_space(PyGC_Head *head) -{ - PyGC_Head *prev = head; - PyGC_Head *gc = GC_NEXT(head); - if (gc == head) { - return; - } - int old_space = gc_old_space(gc); - while (gc != head) { - PyGC_Head *truenext = GC_NEXT(gc); - assert(truenext != NULL); - assert(gc_old_space(gc) == old_space); - prev = gc; - gc = truenext; - } - assert(prev == GC_PREV(head)); -} - -static void -gc_list_validate_space(PyGC_Head *head, int space) { - PyGC_Head *gc = GC_NEXT(head); - while (gc != head) { - assert(gc_old_space(gc) == space); - gc = GC_NEXT(gc); - } -} - #else #define validate_list(x, y) do{}while(0) -#define validate_old(g) do{}while(0) -#define validate_consistent_old_space(l) do{}while(0) -#define gc_list_validate_space(l, s) do{}while(0) #endif /*** end of list stuff ***/ @@ -485,6 +395,10 @@ update_refs(PyGC_Head *containers) while (gc != containers) { next = GC_NEXT(gc); PyObject *op = FROM_GC(gc); + /* Move any object that might have become immortal to the + * permanent generation as the reference count is not accurately + * reflecting the actual number of live references to this object + */ if (_Py_IsImmortal(op)) { gc_list_move(gc, &get_gc_state()->permanent_generation.head); gc = next; @@ -587,13 +501,12 @@ visit_reachable(PyObject *op, void *arg) // Manually unlink gc from unreachable list because the list functions // don't work right in the presence of NEXT_MASK_UNREACHABLE flags. PyGC_Head *prev = GC_PREV(gc); - PyGC_Head *next = GC_NEXT(gc); + PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE); _PyObject_ASSERT(FROM_GC(prev), prev->_gc_next & NEXT_MASK_UNREACHABLE); _PyObject_ASSERT(FROM_GC(next), next->_gc_next & NEXT_MASK_UNREACHABLE); - prev->_gc_next = gc->_gc_next; // copy flag bits - gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; + prev->_gc_next = gc->_gc_next; // copy NEXT_MASK_UNREACHABLE _PyGCHead_SET_PREV(next, prev); gc_list_append(gc, reachable); @@ -645,9 +558,6 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) * or to the right have been scanned yet. */ - validate_consistent_old_space(young); - /* Record which old space we are in, and set NEXT_MASK_UNREACHABLE bit for convenience */ - uintptr_t flags = NEXT_MASK_UNREACHABLE | (gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1); while (gc != young) { if (gc_get_refs(gc)) { /* gc is definitely reachable from outside the @@ -693,18 +603,17 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) // But this may pollute the unreachable list head's 'next' pointer // too. That's semantically senseless but expedient here - the // damage is repaired when this function ends. - last->_gc_next = flags | (uintptr_t)gc; + last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc); _PyGCHead_SET_PREV(gc, last); - gc->_gc_next = flags | (uintptr_t)unreachable; + gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable); unreachable->_gc_prev = (uintptr_t)gc; } - gc = _PyGCHead_NEXT(prev); + gc = (PyGC_Head*)prev->_gc_next; } // young->_gc_prev must be last element remained in the list. young->_gc_prev = (uintptr_t)prev; - young->_gc_next &= _PyGC_PREV_MASK; // don't let the pollution of the list head's next pointer leak - unreachable->_gc_next &= _PyGC_PREV_MASK; + unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE; } static void @@ -763,8 +672,8 @@ move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) PyObject *op = FROM_GC(gc); _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE); - next = GC_NEXT(gc); gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; + next = (PyGC_Head*)gc->_gc_next; if (has_legacy_finalizer(op)) { gc_clear_collecting(gc); @@ -787,8 +696,8 @@ clear_unreachable_mask(PyGC_Head *unreachable) PyGC_Head *gc, *next; for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE); - next = GC_NEXT(gc); gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; + next = (PyGC_Head*)gc->_gc_next; } validate_list(unreachable, collecting_set_unreachable_clear); } @@ -958,7 +867,6 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) /* Invoke the callbacks we decided to honor. It's safe to invoke them * because they can't reference unreachable objects. */ - int visited_space = get_gc_state()->visited_space; while (! gc_list_is_empty(&wrcb_to_call)) { PyObject *temp; PyObject *callback; @@ -993,7 +901,6 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) Py_DECREF(op); if (wrcb_to_call._gc_next == (uintptr_t)gc) { /* object is still alive -- move it */ - gc_set_old_space(gc, visited_space); gc_list_move(gc, old); } else { @@ -1122,6 +1029,25 @@ delete_garbage(PyThreadState *tstate, GCState *gcstate, } +// Show stats for objects in each generations +static void +show_stats_each_generations(GCState *gcstate) +{ + char buf[100]; + size_t pos = 0; + + for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) { + pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos, + " %zd", + gc_list_size(GEN_HEAD(gcstate, i))); + } + + PySys_FormatStderr( + "gc: objects in each generation:%s\n" + "gc: objects in permanent generation: %zd\n", + buf, gc_list_size(&gcstate->permanent_generation.head)); +} + /* Deduce which objects among "base" are unreachable from outside the list and move them to 'unreachable'. The process consist in the following steps: @@ -1195,6 +1121,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { * the reachable objects instead. But this is a one-time cost, probably not * worth complicating the code to speed just a little. */ + gc_list_init(unreachable); move_unreachable(base, unreachable); // gc_prev is pointer again validate_list(base, collecting_clear_unreachable_clear); validate_list(unreachable, collecting_set_unreachable_set); @@ -1233,295 +1160,220 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, } -#define UNTRACK_TUPLES 1 -#define UNTRACK_DICTS 2 - -static void -gc_collect_region(PyThreadState *tstate, - PyGC_Head *from, - PyGC_Head *to, - int untrack, - struct gc_collection_stats *stats); - -static inline Py_ssize_t -gc_list_set_space(PyGC_Head *list, int space) -{ - Py_ssize_t size = 0; - PyGC_Head *gc; - for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) { - gc_set_old_space(gc, space); - size++; - } - return size; -} - -/* Making progress in the incremental collector - * In order to eventually collect all cycles - * the incremental collector must progress through the old - * space faster than objects are added to the old space. - * - * Each young or incremental collection adds a numebr of - * objects, S (for survivors) to the old space, and - * incremental collectors scan I objects from the old space. - * I > S must be true. We also want I > S * N to be where - * N > 1. Higher values of N mean that the old space is - * scanned more rapidly. - * The default incremental threshold of 10 translates to - * N == 1.4 (1 + 4/threshold) +/* Invoke progress callbacks to notify clients that garbage collection + * is starting or stopping */ - -/* Divide by 10, so that the default incremental threshold of 10 - * scans objects at 1% of the heap size */ -#define SCAN_RATE_DIVISOR 10 - static void -add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) +invoke_gc_callback(PyThreadState *tstate, const char *phase, + int generation, Py_ssize_t collected, + Py_ssize_t uncollectable) { - gcstate->generation_stats[gen].collected += stats->collected; - gcstate->generation_stats[gen].uncollectable += stats->uncollectable; - gcstate->generation_stats[gen].collections += 1; -} + assert(!_PyErr_Occurred(tstate)); -static void -gc_collect_young(PyThreadState *tstate, - struct gc_collection_stats *stats) -{ + /* we may get called very early */ GCState *gcstate = &tstate->interp->gc; - PyGC_Head *young = &gcstate->young.head; - PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - GC_STAT_ADD(0, collections, 1); -#ifdef Py_STATS - { - Py_ssize_t count = 0; - PyGC_Head *gc; - for (gc = GC_NEXT(young); gc != young; gc = GC_NEXT(gc)) { - count++; - } + if (gcstate->callbacks == NULL) { + return; } -#endif - PyGC_Head survivors; - gc_list_init(&survivors); - gc_collect_region(tstate, young, &survivors, UNTRACK_TUPLES, stats); - Py_ssize_t survivor_count = 0; - if (gcstate->visited_space) { - /* objects in visited space have bit set, so we set it here */ - survivor_count = gc_list_set_space(&survivors, 1); - } - else { - PyGC_Head *gc; - for (gc = GC_NEXT(&survivors); gc != &survivors; gc = GC_NEXT(gc)) { -#ifdef GC_DEBUG - assert(gc_old_space(gc) == 0); -#endif - survivor_count++; + /* The local variable cannot be rebound, check it for sanity */ + assert(PyList_CheckExact(gcstate->callbacks)); + PyObject *info = NULL; + if (PyList_GET_SIZE(gcstate->callbacks) != 0) { + info = Py_BuildValue("{sisnsn}", + "generation", generation, + "collected", collected, + "uncollectable", uncollectable); + if (info == NULL) { + PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); + return; } } - (void)survivor_count; // Silence compiler warning - gc_list_merge(&survivors, visited); - validate_old(gcstate); - gcstate->young.count = 0; - gcstate->old[gcstate->visited_space].count++; - Py_ssize_t scale_factor = gcstate->old[0].threshold; - if (scale_factor < 1) { - scale_factor = 1; - } - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; - add_stats(gcstate, 0, stats); -} -#ifndef NDEBUG -static inline int -IS_IN_VISITED(PyGC_Head *gc, int visited_space) -{ - assert(visited_space == 0 || flip_old_space(visited_space) == 0); - return gc_old_space(gc) == visited_space; -} -#endif - -struct container_and_flag { - PyGC_Head *container; - int visited_space; - uintptr_t size; -}; - -/* A traversal callback for adding to container) */ -static int -visit_add_to_container(PyObject *op, void *arg) -{ - OBJECT_STAT_INC(object_visits); - struct container_and_flag *cf = (struct container_and_flag *)arg; - int visited = cf->visited_space; - assert(visited == get_gc_state()->visited_space); - if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited) { - gc_flip_old_space(gc); - gc_list_move(gc, cf->container); - cf->size++; - } + PyObject *phase_obj = PyUnicode_FromString(phase); + if (phase_obj == NULL) { + Py_XDECREF(info); + PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); + return; } - return 0; -} -static uintptr_t -expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) -{ - validate_list(container, collecting_clear_unreachable_clear); - struct container_and_flag arg = { - .container = container, - .visited_space = gcstate->visited_space, - .size = 0 - }; - assert(GC_NEXT(gc) == container); - while (gc != container) { - /* Survivors will be moved to visited space, so they should - * have been marked as visited */ - assert(IS_IN_VISITED(gc, gcstate->visited_space)); - PyObject *op = FROM_GC(gc); - if (_Py_IsImmortal(op)) { - PyGC_Head *next = GC_NEXT(gc); - gc_list_move(gc, &get_gc_state()->permanent_generation.head); - gc = next; - continue; + PyObject *stack[] = {phase_obj, info}; + for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) { + PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i); + Py_INCREF(cb); /* make sure cb doesn't go away */ + r = PyObject_Vectorcall(cb, stack, 2, NULL); + if (r == NULL) { + PyErr_WriteUnraisable(cb); } - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - visit_add_to_container, - &arg); - gc = GC_NEXT(gc); + else { + Py_DECREF(r); + } + Py_DECREF(cb); } - return arg.size; + Py_DECREF(phase_obj); + Py_XDECREF(info); + assert(!_PyErr_Occurred(tstate)); } -/* Do bookkeeping for a completed GC cycle */ -static void -completed_cycle(GCState *gcstate) -{ -#ifdef Py_DEBUG - PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; - assert(gc_list_is_empty(not_visited)); -#endif - gcstate->visited_space = flip_old_space(gcstate->visited_space); - /* Make sure all young objects have old space bit set correctly */ - PyGC_Head *young = &gcstate->young.head; - PyGC_Head *gc = GC_NEXT(young); - while (gc != young) { - PyGC_Head *next = GC_NEXT(gc); - gc_set_old_space(gc, gcstate->visited_space); - gc = next; - } - gcstate->work_to_do = 0; -} -static void -gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) -{ - GC_STAT_ADD(1, collections, 1); - GCState *gcstate = &tstate->interp->gc; - PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; - PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - PyGC_Head increment; - gc_list_init(&increment); - Py_ssize_t scale_factor = gcstate->old[0].threshold; - if (scale_factor < 1) { - scale_factor = 1; - } - gc_list_merge(&gcstate->young.head, &increment); - gcstate->young.count = 0; - gc_list_validate_space(&increment, gcstate->visited_space); - Py_ssize_t increment_size = 0; - while (increment_size < gcstate->work_to_do) { - if (gc_list_is_empty(not_visited)) { - break; +/* Find the oldest generation (highest numbered) where the count + * exceeds the threshold. Objects in the that generation and + * generations younger than it will be collected. */ +static int +gc_select_generation(GCState *gcstate) +{ + for (int i = NUM_GENERATIONS-1; i >= 0; i--) { + if (gcstate->generations[i].count > gcstate->generations[i].threshold) { + /* Avoid quadratic performance degradation in number + of tracked objects (see also issue #4074): + + To limit the cost of garbage collection, there are two strategies; + - make each collection faster, e.g. by scanning fewer objects + - do less collections + This heuristic is about the latter strategy. + + In addition to the various configurable thresholds, we only trigger a + full collection if the ratio + + long_lived_pending / long_lived_total + + is above a given value (hardwired to 25%). + + The reason is that, while "non-full" collections (i.e., collections of + the young and middle generations) will always examine roughly the same + number of objects -- determined by the aforementioned thresholds --, + the cost of a full collection is proportional to the total number of + long-lived objects, which is virtually unbounded. + + Indeed, it has been remarked that doing a full collection every + <constant number> of object creations entails a dramatic performance + degradation in workloads which consist in creating and storing lots of + long-lived objects (e.g. building a large list of GC-tracked objects would + show quadratic performance, instead of linear as expected: see issue #4074). + + Using the above ratio, instead, yields amortized linear performance in + the total number of objects (the effect of which can be summarized + thusly: "each full garbage collection is more and more costly as the + number of objects grows, but we do fewer and fewer of them"). + + This heuristic was suggested by Martin von Löwis on python-dev in + June 2008. His original analysis and proposal can be found at: + http://mail.python.org/pipermail/python-dev/2008-June/080579.html + */ + if (i == NUM_GENERATIONS - 1 + && gcstate->long_lived_pending < gcstate->long_lived_total / 4) + { + continue; + } + return i; } - PyGC_Head *gc = _PyGCHead_NEXT(not_visited); - gc_list_move(gc, &increment); - increment_size++; - gc_set_old_space(gc, gcstate->visited_space); - increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); - } - gc_list_validate_space(&increment, gcstate->visited_space); - PyGC_Head survivors; - gc_list_init(&survivors); - gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats); - gc_list_validate_space(&survivors, gcstate->visited_space); - gc_list_merge(&survivors, visited); - assert(gc_list_is_empty(&increment)); - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; - gcstate->work_to_do -= increment_size; - - validate_old(gcstate); - add_stats(gcstate, 1, stats); - if (gc_list_is_empty(not_visited)) { - completed_cycle(gcstate); } + return -1; } -static void -gc_collect_full(PyThreadState *tstate, - struct gc_collection_stats *stats) -{ - GC_STAT_ADD(2, collections, 1); - GCState *gcstate = &tstate->interp->gc; - validate_old(gcstate); - PyGC_Head *young = &gcstate->young.head; - PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head; - PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - /* merge all generations into visited */ - gc_list_validate_space(young, gcstate->visited_space); - gc_list_set_space(pending, gcstate->visited_space); - gc_list_merge(young, pending); - gcstate->young.count = 0; - gc_list_merge(pending, visited); - - gc_collect_region(tstate, visited, visited, - UNTRACK_TUPLES | UNTRACK_DICTS, - stats); - gcstate->young.count = 0; - gcstate->old[0].count = 0; - gcstate->old[1].count = 0; - - gcstate->work_to_do = - gcstate->young.threshold * 2; - _PyGC_ClearAllFreeLists(tstate->interp); - validate_old(gcstate); - add_stats(gcstate, 2, stats); -} - -/* This is the main function. Read this to understand how the +/* This is the main function. Read this to understand how the * collection process works. */ -static void -gc_collect_region(PyThreadState *tstate, - PyGC_Head *from, - PyGC_Head *to, - int untrack, - struct gc_collection_stats *stats) +static Py_ssize_t +gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) { + int i; + Py_ssize_t m = 0; /* # objects collected */ + Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */ + PyGC_Head *young; /* the generation we are examining */ + PyGC_Head *old; /* next older generation */ PyGC_Head unreachable; /* non-problematic unreachable trash */ PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ - PyGC_Head *gc; /* initialize to prevent a compiler warning */ + PyGC_Head *gc; + PyTime_t t1 = 0; /* initialize to prevent a compiler warning */ GCState *gcstate = &tstate->interp->gc; + // gc_collect_main() must not be called before _PyGC_Init + // or after _PyGC_Fini() assert(gcstate->garbage != NULL); assert(!_PyErr_Occurred(tstate)); - gc_list_init(&unreachable); - deduce_unreachable(from, &unreachable); - validate_consistent_old_space(from); - if (untrack & UNTRACK_TUPLES) { - untrack_tuples(from); + int expected = 0; + if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) { + // Don't start a garbage collection if one is already in progress. + return 0; } - if (untrack & UNTRACK_DICTS) { - untrack_dicts(from); + + if (generation == GENERATION_AUTO) { + // Select the oldest generation that needs collecting. We will collect + // objects from that generation and all generations younger than it. + generation = gc_select_generation(gcstate); + if (generation < 0) { + // No generation needs to be collected. + _Py_atomic_store_int(&gcstate->collecting, 0); + return 0; + } } - validate_consistent_old_space(to); - if (from != to) { - gc_list_merge(from, to); + + assert(generation >= 0 && generation < NUM_GENERATIONS); + +#ifdef Py_STATS + if (_Py_stats) { + _Py_stats->object_stats.object_visits = 0; + } +#endif + GC_STAT_ADD(generation, collections, 1); + + if (reason != _Py_GC_REASON_SHUTDOWN) { + invoke_gc_callback(tstate, "start", generation, 0, 0); + } + + if (gcstate->debug & _PyGC_DEBUG_STATS) { + PySys_WriteStderr("gc: collecting generation %d...\n", generation); + show_stats_each_generations(gcstate); + // ignore error: don't interrupt the GC if reading the clock fails + (void)PyTime_PerfCounterRaw(&t1); + } + + if (PyDTrace_GC_START_ENABLED()) { + PyDTrace_GC_START(generation); + } + + /* update collection and allocation counters */ + if (generation+1 < NUM_GENERATIONS) { + gcstate->generations[generation+1].count += 1; + } + for (i = 0; i <= generation; i++) { + gcstate->generations[i].count = 0; + } + + /* merge younger generations with one we are currently collecting */ + for (i = 0; i < generation; i++) { + gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation)); + } + + /* handy references */ + young = GEN_HEAD(gcstate, generation); + if (generation < NUM_GENERATIONS-1) { + old = GEN_HEAD(gcstate, generation+1); } - validate_consistent_old_space(to); + else { + old = young; + } + validate_list(old, collecting_clear_unreachable_clear); + + deduce_unreachable(young, &unreachable); + + untrack_tuples(young); /* Move reachable objects to next generation. */ + if (young != old) { + if (generation == NUM_GENERATIONS - 2) { + gcstate->long_lived_pending += gc_list_size(young); + } + gc_list_merge(young, old); + } + else { + /* We only un-track dicts in full collections, to avoid quadratic + dict build-up. See issue #14775. */ + untrack_dicts(young); + gcstate->long_lived_pending = 0; + gcstate->long_lived_total = gc_list_size(young); + } /* All objects in unreachable are trash, but objects reachable from * legacy finalizers (e.g. tp_del) can't safely be deleted. @@ -1535,8 +1387,10 @@ gc_collect_region(PyThreadState *tstate, * and we move those into the finalizers list too. */ move_legacy_finalizer_reachable(&finalizers); + validate_list(&finalizers, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); + /* Print debugging information. */ if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) { for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) { @@ -1545,101 +1399,91 @@ gc_collect_region(PyThreadState *tstate, } /* Clear weakrefs and invoke callbacks as necessary. */ - stats->collected += handle_weakrefs(&unreachable, to); - gc_list_validate_space(to, gcstate->visited_space); - validate_list(to, collecting_clear_unreachable_clear); + m += handle_weakrefs(&unreachable, old); + + validate_list(old, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); /* Call tp_finalize on objects which have one. */ finalize_garbage(tstate, &unreachable); + /* Handle any objects that may have resurrected after the call * to 'finalize_garbage' and continue the collection with the * objects that are still unreachable */ PyGC_Head final_unreachable; - gc_list_init(&final_unreachable); - handle_resurrected_objects(&unreachable, &final_unreachable, to); + handle_resurrected_objects(&unreachable, &final_unreachable, old); /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects * in finalizers to be freed. */ - stats->collected += gc_list_size(&final_unreachable); - delete_garbage(tstate, gcstate, &final_unreachable, to); + m += gc_list_size(&final_unreachable); + delete_garbage(tstate, gcstate, &final_unreachable, old); /* Collect statistics on uncollectable objects found and print * debugging information. */ - Py_ssize_t n = 0; for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) { n++; - if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) + if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) debug_cycle("uncollectable", FROM_GC(gc)); } - stats->uncollectable = n; + if (gcstate->debug & _PyGC_DEBUG_STATS) { + PyTime_t t2; + (void)PyTime_PerfCounterRaw(&t2); + double d = PyTime_AsSecondsDouble(t2 - t1); + PySys_WriteStderr( + "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n", + n+m, n, d); + } + /* Append instances in the uncollectable set to a Python * reachable list of garbage. The programmer has to deal with * this if they insist on creating this type of structure. */ - handle_legacy_finalizers(tstate, gcstate, &finalizers, to); - gc_list_validate_space(to, gcstate->visited_space); - validate_list(to, collecting_clear_unreachable_clear); -} + handle_legacy_finalizers(tstate, gcstate, &finalizers, old); + validate_list(old, collecting_clear_unreachable_clear); -/* Invoke progress callbacks to notify clients that garbage collection - * is starting or stopping - */ -static void -do_gc_callback(GCState *gcstate, const char *phase, - int generation, struct gc_collection_stats *stats) -{ - assert(!PyErr_Occurred()); + /* Clear free list only during the collection of the highest + * generation */ + if (generation == NUM_GENERATIONS-1) { + _PyGC_ClearAllFreeLists(tstate->interp); + } - /* The local variable cannot be rebound, check it for sanity */ - assert(PyList_CheckExact(gcstate->callbacks)); - PyObject *info = NULL; - if (PyList_GET_SIZE(gcstate->callbacks) != 0) { - info = Py_BuildValue("{sisnsn}", - "generation", generation, - "collected", stats->collected, - "uncollectable", stats->uncollectable); - if (info == NULL) { - PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); - return; + if (_PyErr_Occurred(tstate)) { + if (reason == _Py_GC_REASON_SHUTDOWN) { + _PyErr_Clear(tstate); + } + else { + PyErr_FormatUnraisable("Exception ignored in garbage collection"); } } - PyObject *phase_obj = PyUnicode_FromString(phase); - if (phase_obj == NULL) { - Py_XDECREF(info); - PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); - return; + /* Update stats */ + struct gc_generation_stats *stats = &gcstate->generation_stats[generation]; + stats->collections++; + stats->collected += m; + stats->uncollectable += n; + + GC_STAT_ADD(generation, objects_collected, m); +#ifdef Py_STATS + if (_Py_stats) { + GC_STAT_ADD(generation, object_visits, + _Py_stats->object_stats.object_visits); + _Py_stats->object_stats.object_visits = 0; } +#endif - PyObject *stack[] = {phase_obj, info}; - for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) { - PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i); - Py_INCREF(cb); /* make sure cb doesn't go away */ - r = PyObject_Vectorcall(cb, stack, 2, NULL); - if (r == NULL) { - PyErr_WriteUnraisable(cb); - } - else { - Py_DECREF(r); - } - Py_DECREF(cb); + if (PyDTrace_GC_DONE_ENABLED()) { + PyDTrace_GC_DONE(n + m); } - Py_DECREF(phase_obj); - Py_XDECREF(info); - assert(!PyErr_Occurred()); -} -static void -invoke_gc_callback(GCState *gcstate, const char *phase, - int generation, struct gc_collection_stats *stats) -{ - if (gcstate->callbacks == NULL) { - return; + if (reason != _Py_GC_REASON_SHUTDOWN) { + invoke_gc_callback(tstate, "stop", generation, m, n); } - do_gc_callback(gcstate, phase, generation, stats); + + assert(!_PyErr_Occurred(tstate)); + _Py_atomic_store_int(&gcstate->collecting, 0); + return n + m; } static int @@ -1701,25 +1545,20 @@ _PyGC_GetObjects(PyInterpreterState *interp, int generation) GCState *gcstate = &interp->gc; PyObject *result = PyList_New(0); - /* Generation: - * -1: Return all objects - * 0: All young objects - * 1: No objects - * 2: All old objects - */ - if (result == NULL || generation == 1) { - return result; + if (result == NULL) { + return NULL; } - if (generation <= 0) { - if (append_objects(result, &gcstate->young.head)) { - goto error; + + if (generation == -1) { + /* If generation is -1, get all objects from all generations */ + for (int i = 0; i < NUM_GENERATIONS; i++) { + if (append_objects(result, GEN_HEAD(gcstate, i))) { + goto error; + } } } - if (generation != 0) { - if (append_objects(result, &gcstate->old[0].head)) { - goto error; - } - if (append_objects(result, &gcstate->old[1].head)) { + else { + if (append_objects(result, GEN_HEAD(gcstate, generation))) { goto error; } } @@ -1734,20 +1573,10 @@ void _PyGC_Freeze(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; - /* The permanent_generation has its old space bit set to zero */ - if (gcstate->visited_space) { - gc_list_set_space(&gcstate->young.head, 0); - } - gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head); - gcstate->young.count = 0; - PyGC_Head*old0 = &gcstate->old[0].head; - PyGC_Head*old1 = &gcstate->old[1].head; - gc_list_merge(old0, &gcstate->permanent_generation.head); - gcstate->old[0].count = 0; - gc_list_set_space(old1, 0); - gc_list_merge(old1, &gcstate->permanent_generation.head); - gcstate->old[1].count = 0; - validate_old(gcstate); + for (int i = 0; i < NUM_GENERATIONS; ++i) { + gc_list_merge(GEN_HEAD(gcstate, i), &gcstate->permanent_generation.head); + gcstate->generations[i].count = 0; + } } void @@ -1755,8 +1584,7 @@ _PyGC_Unfreeze(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; gc_list_merge(&gcstate->permanent_generation.head, - &gcstate->old[0].head); - validate_old(gcstate); + GEN_HEAD(gcstate, NUM_GENERATIONS-1)); } Py_ssize_t @@ -1792,85 +1620,29 @@ PyGC_IsEnabled(void) return gcstate->enabled; } -// Show stats for objects in each generations -static void -show_stats_each_generations(GCState *gcstate) -{ - char buf[100]; - size_t pos = 0; - - for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) { - pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos, - " %zd", - gc_list_size(GEN_HEAD(gcstate, i))); - } - PySys_FormatStderr( - "gc: objects in each generation:%s\n" - "gc: objects in permanent generation: %zd\n", - buf, gc_list_size(&gcstate->permanent_generation.head)); -} - +/* Public API to invoke gc.collect() from C */ Py_ssize_t -_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) +PyGC_Collect(void) { + PyThreadState *tstate = _PyThreadState_GET(); GCState *gcstate = &tstate->interp->gc; - int expected = 0; - if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) { - // Don't start a garbage collection if one is already in progress. + if (!gcstate->enabled) { return 0; } - struct gc_collection_stats stats = { 0 }; - if (reason != _Py_GC_REASON_SHUTDOWN) { - invoke_gc_callback(gcstate, "start", generation, &stats); - } - if (gcstate->debug & _PyGC_DEBUG_STATS) { - PySys_WriteStderr("gc: collecting generation %d...\n", generation); - show_stats_each_generations(gcstate); - } - if (PyDTrace_GC_START_ENABLED()) { - PyDTrace_GC_START(generation); - } + Py_ssize_t n; PyObject *exc = _PyErr_GetRaisedException(tstate); - switch(generation) { - case 0: - gc_collect_young(tstate, &stats); - break; - case 1: - gc_collect_increment(tstate, &stats); - break; - case 2: - gc_collect_full(tstate, &stats); - break; - default: - Py_UNREACHABLE(); - } - if (PyDTrace_GC_DONE_ENABLED()) { - PyDTrace_GC_DONE(stats.uncollectable + stats.collected); - } - if (reason != _Py_GC_REASON_SHUTDOWN) { - invoke_gc_callback(gcstate, "stop", generation, &stats); - } + n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL); _PyErr_SetRaisedException(tstate, exc); - GC_STAT_ADD(generation, objects_collected, stats.collected); -#ifdef Py_STATS - if (_Py_stats) { - GC_STAT_ADD(generation, object_visits, - _Py_stats->object_stats.object_visits); - _Py_stats->object_stats.object_visits = 0; - } -#endif - validate_old(gcstate); - _Py_atomic_store_int(&gcstate->collecting, 0); - return stats.uncollectable + stats.collected; + + return n; } -/* Public API to invoke gc.collect() from C */ Py_ssize_t -PyGC_Collect(void) +_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) { - return _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_MANUAL); + return gc_collect_main(tstate, generation, reason); } void @@ -1882,7 +1654,7 @@ _PyGC_CollectNoFail(PyThreadState *tstate) during interpreter shutdown (and then never finish it). See http://bugs.python.org/issue8713#msg195178 for an example. */ - _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_SHUTDOWN); + gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN); } void @@ -1956,9 +1728,9 @@ _PyGC_Fini(PyInterpreterState *interp) * This bug was originally fixed when reported as gh-90228. The bug was * re-introduced in gh-94673. */ - finalize_unlink_gc_head(&gcstate->young.head); - finalize_unlink_gc_head(&gcstate->old[0].head); - finalize_unlink_gc_head(&gcstate->old[1].head); + for (int i = 0; i < NUM_GENERATIONS; i++) { + finalize_unlink_gc_head(&gcstate->generations[i].head); + } finalize_unlink_gc_head(&gcstate->permanent_generation.head); } @@ -2044,11 +1816,10 @@ _PyObject_GC_Link(PyObject *op) GCState *gcstate = &tstate->interp->gc; gc->_gc_next = 0; gc->_gc_prev = 0; - gcstate->young.count++; /* number of allocated GC objects */ - gcstate->heap_size++; - if (gcstate->young.count > gcstate->young.threshold && + gcstate->generations[0].count++; /* number of allocated GC objects */ + if (gcstate->generations[0].count > gcstate->generations[0].threshold && gcstate->enabled && - gcstate->young.threshold && + gcstate->generations[0].threshold && !_Py_atomic_load_int_relaxed(&gcstate->collecting) && !_PyErr_Occurred(tstate)) { @@ -2059,9 +1830,11 @@ _PyObject_GC_Link(PyObject *op) void _Py_RunGC(PyThreadState *tstate) { - if (tstate->interp->gc.enabled) { - _PyGC_Collect(tstate, 1, _Py_GC_REASON_HEAP); + GCState *gcstate = get_gc_state(); + if (!gcstate->enabled) { + return; } + gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP); } static PyObject * @@ -2169,10 +1942,9 @@ PyObject_GC_Del(void *op) #endif } GCState *gcstate = get_gc_state(); - if (gcstate->young.count > 0) { - gcstate->young.count--; + if (gcstate->generations[0].count > 0) { + gcstate->generations[0].count--; } - gcstate->heap_size--; PyObject_Free(((char *)op)-presize); } @@ -2194,36 +1966,26 @@ PyObject_GC_IsFinalized(PyObject *obj) return 0; } -static int -visit_generation(gcvisitobjects_t callback, void *arg, struct gc_generation *gen) -{ - PyGC_Head *gc_list, *gc; - gc_list = &gen->head; - for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) { - PyObject *op = FROM_GC(gc); - Py_INCREF(op); - int res = callback(op, arg); - Py_DECREF(op); - if (!res) { - return -1; - } - } - return 0; -} - void PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) { + size_t i; GCState *gcstate = get_gc_state(); int origenstate = gcstate->enabled; gcstate->enabled = 0; - if (visit_generation(callback, arg, &gcstate->young)) { - goto done; - } - if (visit_generation(callback, arg, &gcstate->old[0])) { - goto done; + for (i = 0; i < NUM_GENERATIONS; i++) { + PyGC_Head *gc_list, *gc; + gc_list = GEN_HEAD(gcstate, i); + for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) { + PyObject *op = FROM_GC(gc); + Py_INCREF(op); + int res = callback(op, arg); + Py_DECREF(op); + if (!res) { + goto done; + } + } } - visit_generation(callback, arg, &gcstate->old[1]); done: gcstate->enabled = origenstate; } |