diff options
author | Mark Shannon <mark@hotpy.org> | 2024-12-02 10:12:17 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-12-02 10:12:17 (GMT) |
commit | a8dd821d5b25b42c0adeae6642e9b3f9228580f9 (patch) | |
tree | c6f62d9d50569a4dd7aae23db945fc21bfa82e4b /Python | |
parent | 2a373da7700cf928e0a5ce3998d19351a3565df4 (diff) | |
download | cpython-a8dd821d5b25b42c0adeae6642e9b3f9228580f9.zip cpython-a8dd821d5b25b42c0adeae6642e9b3f9228580f9.tar.gz cpython-a8dd821d5b25b42c0adeae6642e9b3f9228580f9.tar.bz2 |
GH-126491: GC: Mark objects reachable from roots before doing cycle collection (GH-127110)
* Mark almost all reachable objects before doing collection phase
* Add stats for objects marked
* Visit new frames before each increment
* Update docs
* Clearer calculation of work to do.
Diffstat (limited to 'Python')
-rw-r--r-- | Python/ceval.c | 1 | ||||
-rw-r--r-- | Python/gc.c | 355 | ||||
-rw-r--r-- | Python/specialize.c | 2 |
3 files changed, 271 insertions, 87 deletions
diff --git a/Python/ceval.c b/Python/ceval.c index eba0f23..f9514a6 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -818,6 +818,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int entry_frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1; entry_frame.stackpointer = entry_frame.localsplus; entry_frame.owner = FRAME_OWNED_BY_CSTACK; + entry_frame.visited = 0; entry_frame.return_offset = 0; /* Push frame */ entry_frame.previous = tstate->current_frame; diff --git a/Python/gc.c b/Python/gc.c index 63adecf..5b9588c 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -106,7 +106,7 @@ gc_old_space(PyGC_Head *g) } static inline int -flip_old_space(int space) +other_space(int space) { assert(space == 0 || space == 1); return space ^ _PyGC_NEXT_MASK_OLD_SPACE_1; @@ -430,24 +430,32 @@ validate_list(PyGC_Head *head, enum flagstates flags) #endif #ifdef GC_EXTRA_DEBUG + + static void -validate_old(GCState *gcstate) +gc_list_validate_space(PyGC_Head *head, int space) { + PyGC_Head *gc = GC_NEXT(head); + while (gc != head) { + assert(gc_old_space(gc) == space); + gc = GC_NEXT(gc); + } +} + +static void +validate_spaces(GCState *gcstate) { + int visited = gcstate->visited_space; + int not_visited = other_space(visited); + gc_list_validate_space(&gcstate->young.head, not_visited); for (int space = 0; space < 2; space++) { - PyGC_Head *head = &gcstate->old[space].head; - PyGC_Head *gc = GC_NEXT(head); - while (gc != head) { - PyGC_Head *next = GC_NEXT(gc); - assert(gc_old_space(gc) == space); - gc = next; - } + gc_list_validate_space(&gcstate->old[space].head, space); } + gc_list_validate_space(&gcstate->permanent_generation.head, visited); } static void validate_consistent_old_space(PyGC_Head *head) { - PyGC_Head *prev = head; PyGC_Head *gc = GC_NEXT(head); if (gc == head) { return; @@ -457,23 +465,13 @@ validate_consistent_old_space(PyGC_Head *head) PyGC_Head *truenext = GC_NEXT(gc); assert(truenext != NULL); assert(gc_old_space(gc) == old_space); - prev = gc; gc = truenext; } - assert(prev == GC_PREV(head)); } -static void -gc_list_validate_space(PyGC_Head *head, int space) { - PyGC_Head *gc = GC_NEXT(head); - while (gc != head) { - assert(gc_old_space(gc) == space); - gc = GC_NEXT(gc); - } -} #else -#define validate_old(g) do{}while(0) +#define validate_spaces(g) do{}while(0) #define validate_consistent_old_space(l) do{}while(0) #define gc_list_validate_space(l, s) do{}while(0) #endif @@ -494,7 +492,7 @@ update_refs(PyGC_Head *containers) next = GC_NEXT(gc); PyObject *op = FROM_GC(gc); if (_Py_IsImmortal(op)) { - gc_list_move(gc, &get_gc_state()->permanent_generation.head); + _PyObject_GC_UNTRACK(op); gc = next; continue; } @@ -733,13 +731,25 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) unreachable->_gc_next &= _PyGC_PREV_MASK; } +/* In theory, all tuples should be younger than the +* objects they refer to, as tuples are immortal. +* Therefore, untracking tuples in oldest-first order in the +* young generation before promoting them should have tracked +* all the tuples that can be untracked. +* +* Unfortunately, the C API allows tuples to be created +* and then filled in. So this won't untrack all tuples +* that can be untracked. It should untrack most of them +* and is much faster than a more complex approach that +* would untrack all relevant tuples. +*/ static void untrack_tuples(PyGC_Head *head) { - PyGC_Head *next, *gc = GC_NEXT(head); + PyGC_Head *gc = GC_NEXT(head); while (gc != head) { PyObject *op = FROM_GC(gc); - next = GC_NEXT(gc); + PyGC_Head *next = GC_NEXT(gc); if (PyTuple_CheckExact(op)) { _PyTuple_MaybeUntrack(op); } @@ -1293,8 +1303,10 @@ gc_collect_young(PyThreadState *tstate, struct gc_collection_stats *stats) { GCState *gcstate = &tstate->interp->gc; + validate_spaces(gcstate); PyGC_Head *young = &gcstate->young.head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + untrack_tuples(young); GC_STAT_ADD(0, collections, 1); #ifdef Py_STATS { @@ -1308,39 +1320,21 @@ gc_collect_young(PyThreadState *tstate, PyGC_Head survivors; gc_list_init(&survivors); + gc_list_set_space(young, gcstate->visited_space); gc_collect_region(tstate, young, &survivors, stats); - Py_ssize_t survivor_count = 0; - if (gcstate->visited_space) { - /* objects in visited space have bit set, so we set it here */ - survivor_count = gc_list_set_space(&survivors, 1); - } - else { - PyGC_Head *gc; - for (gc = GC_NEXT(&survivors); gc != &survivors; gc = GC_NEXT(gc)) { -#ifdef GC_DEBUG - assert(gc_old_space(gc) == 0); -#endif - survivor_count++; - } - } - (void)survivor_count; // Silence compiler warning gc_list_merge(&survivors, visited); - validate_old(gcstate); + validate_spaces(gcstate); gcstate->young.count = 0; gcstate->old[gcstate->visited_space].count++; - Py_ssize_t scale_factor = gcstate->old[0].threshold; - if (scale_factor < 1) { - scale_factor = 1; - } - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; add_stats(gcstate, 0, stats); + validate_spaces(gcstate); } #ifndef NDEBUG static inline int IS_IN_VISITED(PyGC_Head *gc, int visited_space) { - assert(visited_space == 0 || flip_old_space(visited_space) == 0); + assert(visited_space == 0 || other_space(visited_space) == 0); return gc_old_space(gc) == visited_space; } #endif @@ -1348,7 +1342,7 @@ IS_IN_VISITED(PyGC_Head *gc, int visited_space) struct container_and_flag { PyGC_Head *container; int visited_space; - uintptr_t size; + intptr_t size; }; /* A traversal callback for adding to container) */ @@ -1371,7 +1365,7 @@ visit_add_to_container(PyObject *op, void *arg) return 0; } -static uintptr_t +static intptr_t expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) { struct container_and_flag arg = { @@ -1385,6 +1379,7 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat * have been marked as visited */ assert(IS_IN_VISITED(gc, gcstate->visited_space)); PyObject *op = FROM_GC(gc); + assert(_PyObject_GC_IS_TRACKED(op)); if (_Py_IsImmortal(op)) { PyGC_Head *next = GC_NEXT(gc); gc_list_move(gc, &get_gc_state()->permanent_generation.head); @@ -1402,22 +1397,191 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat /* Do bookkeeping for a completed GC cycle */ static void -completed_cycle(GCState *gcstate) -{ -#ifdef Py_DEBUG - PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; - assert(gc_list_is_empty(not_visited)); -#endif - gcstate->visited_space = flip_old_space(gcstate->visited_space); - /* Make sure all young objects have old space bit set correctly */ - PyGC_Head *young = &gcstate->young.head; - PyGC_Head *gc = GC_NEXT(young); - while (gc != young) { - PyGC_Head *next = GC_NEXT(gc); - gc_set_old_space(gc, gcstate->visited_space); - gc = next; +completed_scavenge(GCState *gcstate) +{ + /* We must observe two invariants: + * 1. Members of the permanent generation must be marked visited. + * 2. We cannot touch members of the permanent generation. */ + int visited; + if (gc_list_is_empty(&gcstate->permanent_generation.head)) { + /* Permanent generation is empty so we can flip spaces bit */ + int not_visited = gcstate->visited_space; + visited = other_space(not_visited); + gcstate->visited_space = visited; + /* Make sure all objects have visited bit set correctly */ + gc_list_set_space(&gcstate->young.head, not_visited); } + else { + /* We must move the objects from visited to pending space. */ + visited = gcstate->visited_space; + int not_visited = other_space(visited); + assert(gc_list_is_empty(&gcstate->old[not_visited].head)); + gc_list_merge(&gcstate->old[visited].head, &gcstate->old[not_visited].head); + gc_list_set_space(&gcstate->old[not_visited].head, not_visited); + } + assert(gc_list_is_empty(&gcstate->old[visited].head)); gcstate->work_to_do = 0; + gcstate->phase = GC_PHASE_MARK; +} + +static intptr_t +move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) +{ + if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op) && + gc_old_space(gc) != visited_space) { + gc_flip_old_space(gc); + gc_list_move(gc, reachable); + return 1; + } + } + return 0; +} + +static intptr_t +mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) +{ + // Transitively traverse all objects from reachable, until empty + struct container_and_flag arg = { + .container = reachable, + .visited_space = visited_space, + .size = 0 + }; + while (!gc_list_is_empty(reachable)) { + PyGC_Head *gc = _PyGCHead_NEXT(reachable); + assert(gc_old_space(gc) == visited_space); + gc_list_move(gc, visited); + PyObject *op = FROM_GC(gc); + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, + visit_add_to_container, + &arg); + } + gc_list_validate_space(visited, visited_space); + return arg.size; +} + +static intptr_t +mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start) +{ + PyGC_Head reachable; + gc_list_init(&reachable); + Py_ssize_t objects_marked = 0; + // Move all objects on stacks to reachable + _PyRuntimeState *runtime = &_PyRuntime; + HEAD_LOCK(runtime); + PyThreadState* ts = PyInterpreterState_ThreadHead(interp); + HEAD_UNLOCK(runtime); + while (ts) { + _PyInterpreterFrame *frame = ts->current_frame; + while (frame) { + if (frame->owner == FRAME_OWNED_BY_CSTACK) { + frame = frame->previous; + continue; + } + _PyStackRef *locals = frame->localsplus; + _PyStackRef *sp = frame->stackpointer; + objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space); + PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); + objects_marked += move_to_reachable(func, &reachable, visited_space); + while (sp > locals) { + sp--; + if (PyStackRef_IsNull(*sp)) { + continue; + } + PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); + if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op) && + gc_old_space(gc) != visited_space) { + gc_flip_old_space(gc); + objects_marked++; + gc_list_move(gc, &reachable); + } + } + } + if (!start && frame->visited) { + // If this frame has already been visited, then the lower frames + // will have already been visited and will not have changed + break; + } + frame->visited = 1; + frame = frame->previous; + } + HEAD_LOCK(runtime); + ts = PyThreadState_Next(ts); + HEAD_UNLOCK(runtime); + } + objects_marked += mark_all_reachable(&reachable, visited, visited_space); + assert(gc_list_is_empty(&reachable)); + return objects_marked; +} + +static intptr_t +mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space) +{ + PyGC_Head reachable; + gc_list_init(&reachable); + Py_ssize_t objects_marked = 0; + objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space); + objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space); + objects_marked += move_to_reachable(interp->dict, &reachable, visited_space); + struct types_state *types = &interp->types; + for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { + objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space); + objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); + } + for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { + objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); + objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); + } + objects_marked += mark_all_reachable(&reachable, visited, visited_space); + assert(gc_list_is_empty(&reachable)); + return objects_marked; +} + +static intptr_t +mark_at_start(PyThreadState *tstate) +{ + // TO DO -- Make this incremental + GCState *gcstate = &tstate->interp->gc; + PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space); + objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); + gcstate->work_to_do -= objects_marked; + gcstate->phase = GC_PHASE_COLLECT; + validate_spaces(gcstate); + return objects_marked; +} + +static intptr_t +assess_work_to_do(GCState *gcstate) +{ + /* The amount of work we want to do depends on three things. + * 1. The number of new objects created + * 2. The growth in heap size since the last collection + * 3. The heap size (up to the number of new objects, to avoid quadratic effects) + * + * For a steady state heap, the amount of work to do is three times the number + * of new objects added to the heap. This ensures that we stay ahead in the + * worst case of all new objects being garbage. + * + * This could be improved by tracking survival rates, but it is still a + * large improvement on the non-marking approach. + */ + intptr_t scale_factor = gcstate->old[0].threshold; + if (scale_factor < 2) { + scale_factor = 2; + } + intptr_t new_objects = gcstate->young.count; + intptr_t max_heap_fraction = new_objects*3/2; + intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + if (heap_fraction > max_heap_fraction) { + heap_fraction = max_heap_fraction; + } + gcstate->young.count = 0; + return new_objects + heap_fraction; } static void @@ -1425,18 +1589,30 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) { GC_STAT_ADD(1, collections, 1); GCState *gcstate = &tstate->interp->gc; + gcstate->work_to_do += assess_work_to_do(gcstate); + untrack_tuples(&gcstate->young.head); + if (gcstate->phase == GC_PHASE_MARK) { + Py_ssize_t objects_marked = mark_at_start(tstate); + GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); + gcstate->work_to_do -= objects_marked; + validate_spaces(gcstate); + return; + } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; PyGC_Head increment; gc_list_init(&increment); - Py_ssize_t scale_factor = gcstate->old[0].threshold; - if (scale_factor < 1) { - scale_factor = 1; - } + int scale_factor = gcstate->old[0].threshold; + if (scale_factor < 2) { + scale_factor = 2; + } + intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); + GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); + gcstate->work_to_do -= objects_marked; + gc_list_set_space(&gcstate->young.head, gcstate->visited_space); gc_list_merge(&gcstate->young.head, &increment); - gcstate->young.count = 0; gc_list_validate_space(&increment, gcstate->visited_space); - Py_ssize_t increment_size = 0; + Py_ssize_t increment_size = gc_list_size(&increment); while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; @@ -1444,54 +1620,56 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) PyGC_Head *gc = _PyGCHead_NEXT(not_visited); gc_list_move(gc, &increment); increment_size++; + assert(!_Py_IsImmortal(FROM_GC(gc))); gc_set_old_space(gc, gcstate->visited_space); increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); } + GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); validate_list(&increment, collecting_clear_unreachable_clear); gc_list_validate_space(&increment, gcstate->visited_space); PyGC_Head survivors; gc_list_init(&survivors); gc_collect_region(tstate, &increment, &survivors, stats); - gc_list_validate_space(&survivors, gcstate->visited_space); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; - validate_old(gcstate); add_stats(gcstate, 1, stats); if (gc_list_is_empty(not_visited)) { - completed_cycle(gcstate); + completed_scavenge(gcstate); } + validate_spaces(gcstate); } - static void gc_collect_full(PyThreadState *tstate, struct gc_collection_stats *stats) { GC_STAT_ADD(2, collections, 1); GCState *gcstate = &tstate->interp->gc; - validate_old(gcstate); + validate_spaces(gcstate); PyGC_Head *young = &gcstate->young.head; PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + untrack_tuples(young); /* merge all generations into visited */ - gc_list_validate_space(young, gcstate->visited_space); - gc_list_set_space(pending, gcstate->visited_space); gc_list_merge(young, pending); + gc_list_validate_space(pending, 1-gcstate->visited_space); + gc_list_set_space(pending, gcstate->visited_space); gcstate->young.count = 0; gc_list_merge(pending, visited); + validate_spaces(gcstate); gc_collect_region(tstate, visited, visited, stats); + validate_spaces(gcstate); gcstate->young.count = 0; gcstate->old[0].count = 0; gcstate->old[1].count = 0; - - gcstate->work_to_do = - gcstate->young.threshold * 2; + completed_scavenge(gcstate); _PyGC_ClearAllFreeLists(tstate->interp); - validate_old(gcstate); + validate_spaces(gcstate); add_stats(gcstate, 2, stats); } @@ -1733,20 +1911,23 @@ void _PyGC_Freeze(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; - /* The permanent_generation has its old space bit set to zero */ - if (gcstate->visited_space) { - gc_list_set_space(&gcstate->young.head, 0); - } + /* The permanent_generation must be visited */ + gc_list_set_space(&gcstate->young.head, gcstate->visited_space); gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head); gcstate->young.count = 0; PyGC_Head*old0 = &gcstate->old[0].head; PyGC_Head*old1 = &gcstate->old[1].head; + if (gcstate->visited_space) { + gc_list_set_space(old0, 1); + } + else { + gc_list_set_space(old1, 0); + } gc_list_merge(old0, &gcstate->permanent_generation.head); gcstate->old[0].count = 0; - gc_list_set_space(old1, 0); gc_list_merge(old1, &gcstate->permanent_generation.head); gcstate->old[1].count = 0; - validate_old(gcstate); + validate_spaces(gcstate); } void @@ -1754,8 +1935,8 @@ _PyGC_Unfreeze(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; gc_list_merge(&gcstate->permanent_generation.head, - &gcstate->old[0].head); - validate_old(gcstate); + &gcstate->old[gcstate->visited_space].head); + validate_spaces(gcstate); } Py_ssize_t @@ -1860,7 +2041,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) _Py_stats->object_stats.object_visits = 0; } #endif - validate_old(gcstate); + validate_spaces(gcstate); _Py_atomic_store_int(&gcstate->collecting, 0); return stats.uncollectable + stats.collected; } diff --git a/Python/specialize.c b/Python/specialize.c index d03310d..504eef4 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -231,6 +231,8 @@ print_gc_stats(FILE *out, GCStats *stats) fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections); fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits); fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected); + fprintf(out, "GC[%d] objects reachable from roots: %" PRIu64 "\n", i, stats[i].objects_transitively_reachable); + fprintf(out, "GC[%d] objects not reachable from roots: %" PRIu64 "\n", i, stats[i].objects_not_transitively_reachable); } } |