From 023b7d2141467017abc27de864f3f44677768cb3 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 6 Dec 2024 10:46:59 +0000 Subject: GH-126491: Lower heap size limit with faster marking (GH-127519) * Faster marking of reachable objects * Changes calculation of work to do and work done. * Merges transitive closure calculations --- InternalDocs/garbage_collector.md | 50 ++++++- Lib/test/test_gc.py | 14 +- Objects/dictobject.c | 4 +- Objects/genobject.c | 69 +-------- Objects/typeobject.c | 13 ++ Python/gc.c | 301 ++++++++++++++++++-------------------- 6 files changed, 208 insertions(+), 243 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 08db080..4761f78 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -199,22 +199,22 @@ unreachable: ```pycon >>> import gc ->>> +>>> >>> class Link: ... def __init__(self, next_link=None): ... self.next_link = next_link -... +... >>> link_3 = Link() >>> link_2 = Link(link_3) >>> link_1 = Link(link_2) >>> link_3.next_link = link_1 >>> A = link_1 >>> del link_1, link_2, link_3 ->>> +>>> >>> link_4 = Link() >>> link_4.next_link = link_4 >>> del link_4 ->>> +>>> >>> # Collect the unreachable Link object (and its .__dict__ dict). >>> gc.collect() 2 @@ -459,11 +459,11 @@ specifically in a generation by calling `gc.collect(generation=NUM)`. >>> # Create a reference cycle. >>> x = MyObj() >>> x.self = x ->>> +>>> >>> # Initially the object is in the young generation. >>> gc.get_objects(generation=0) [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] ->>> +>>> >>> # After a collection of the youngest generation the object >>> # moves to the old generation. >>> gc.collect(generation=0) @@ -515,6 +515,44 @@ increment. All objects directly referred to from those stack frames are added to the working set. Then the above algorithm is repeated, starting from step 2. +Determining how much work to do +------------------------------- + +We need to do a certain amount of work to enusre that garbage is collected, +but doing too much work slows down execution. + +To work out how much work we need to do, consider a heap with `L` live objects +and `G0` garbage objects at the start of a full scavenge and `G1` garbage objects +at the end of the scavenge. We don't want the amount of garbage to grow, `G1 ≤ G0`, and +we don't want too much garbage (say 1/3 of the heap maximum), `G0 ≤ L/2`. +For each full scavenge we must visit all objects, `T == L + G0 + G1`, during which +`G1` garbage objects are created. + +The number of new objects created `N` must be at least the new garbage created, `N ≥ G1`, +assuming that the number of live objects remains roughly constant. +If we set `T == 4*N` we get `T > 4*G1` and `T = L + G0 + G1` => `L + G0 > 3G1` +For a steady state heap (`G0 == G1`) we get `L > 2G0` and the desired garbage ratio. + +In other words, to keep the garbage fraction to 1/3 or less we need to visit +4 times as many objects as are newly created. + +We can do better than this though. Not all new objects will be garbage. +Consider the heap at the end of the scavenge with `L1` live objects and `G1` +garbage. Also, note that `T == M + I` where `M` is the number of objects marked +as reachable and `I` is the number of objects visited in increments. +Everything in `M` is live, so `I ≥ G0` and in practice `I` is closer to `G0 + G1`. + +If we choose the amount of work done such that `2*M + I == 6N` then we can do +less work in most cases, but are still guaranteed to keep up. +Since `I ≳ G0 + G1` (not strictly true, but close enough) +`T == M + I == (6N + I)/2` and `(6N + I)/2 ≳ 4G`, so we can keep up. + +The reason that this improves performance is that `M` is usually much larger +than `I`. If `M == 10I`, then `T ≅ 3N`. + +Finally, instead of using a fixed multiple of 8, we gradually increase it as the +heap grows. This avoids wasting work for small heaps and during startup. + Optimization: reusing fields to save memory =========================================== diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index b514005..baf8e95 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1161,27 +1161,19 @@ class IncrementalGCTests(unittest.TestCase): return head head = make_ll(1000) - count = 1000 - - # There will be some objects we aren't counting, - # e.g. the gc stats dicts. This test checks - # that the counts don't grow, so we try to - # correct for the uncounted objects - # This is just an estimate. - CORRECTION = 20 enabled = gc.isenabled() gc.enable() olds = [] initial_heap_size = _testinternalcapi.get_tracked_heap_size() - for i in range(20_000): + iterations = max(20_000, initial_heap_size) + for i in range(iterations): newhead = make_ll(20) - count += 20 newhead.surprise = head olds.append(newhead) if len(olds) == 20: new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size - self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations") + self.assertLess(new_objects, initial_heap_size/2, f"Heap growing. Reached limit after {i} iterations") del olds[:] if not enabled: gc.disable() diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 1c9f864..de518b8 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -7064,9 +7064,7 @@ int PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg) { PyTypeObject *tp = Py_TYPE(obj); - if((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) { - return 0; - } + assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) { PyDictValues *values = _PyObject_InlineValues(obj); if (values->valid) { diff --git a/Objects/genobject.c b/Objects/genobject.c index e87f199..33679af 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -882,25 +882,7 @@ PyTypeObject PyGen_Type = { gen_methods, /* tp_methods */ gen_memberlist, /* tp_members */ gen_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, }; static PyObject * @@ -1242,24 +1224,7 @@ PyTypeObject PyCoro_Type = { coro_methods, /* tp_methods */ coro_memberlist, /* tp_members */ coro_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, }; static void @@ -1464,7 +1429,6 @@ typedef struct _PyAsyncGenWrappedValue { (assert(_PyAsyncGenWrappedValue_CheckExact(op)), \ _Py_CAST(_PyAsyncGenWrappedValue*, (op))) - static int async_gen_traverse(PyObject *self, visitproc visit, void *arg) { @@ -1673,24 +1637,7 @@ PyTypeObject PyAsyncGen_Type = { async_gen_methods, /* tp_methods */ async_gen_memberlist, /* tp_members */ async_gen_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, }; @@ -1935,16 +1882,6 @@ PyTypeObject _PyAsyncGenASend_Type = { PyObject_SelfIter, /* tp_iter */ async_gen_asend_iternext, /* tp_iternext */ async_gen_asend_methods, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ .tp_finalize = async_gen_asend_finalize, }; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 2068d6a..cc95b98 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -2355,6 +2355,16 @@ subtype_traverse(PyObject *self, visitproc visit, void *arg) return 0; } + +static int +plain_object_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyTypeObject *type = Py_TYPE(self); + assert(type->tp_flags & Py_TPFLAGS_MANAGED_DICT); + Py_VISIT(type); + return PyObject_VisitManagedDict(self, visit, arg); +} + static void clear_slots(PyTypeObject *type, PyObject *self) { @@ -4147,6 +4157,9 @@ type_new_descriptors(const type_new_ctx *ctx, PyTypeObject *type) assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); type->tp_flags |= Py_TPFLAGS_MANAGED_DICT; type->tp_dictoffset = -1; + if (type->tp_basicsize == sizeof(PyObject)) { + type->tp_traverse = plain_object_traverse; + } } type->tp_basicsize = slotoffset; diff --git a/Python/gc.c b/Python/gc.c index 5b9588c..fd29a48 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1277,18 +1277,13 @@ gc_list_set_space(PyGC_Head *list, int space) * space faster than objects are added to the old space. * * Each young or incremental collection adds a number of - * objects, S (for survivors) to the old space, and - * incremental collectors scan I objects from the old space. - * I > S must be true. We also want I > S * N to be where - * N > 1. Higher values of N mean that the old space is + * new objects (N) to the heap, and incremental collectors + * scan I objects from the old space. + * I > N must be true. We also want I > N * K to be where + * K > 2. Higher values of K mean that the old space is * scanned more rapidly. - * The default incremental threshold of 10 translates to - * N == 1.4 (1 + 4/threshold) */ - -/* Divide by 10, so that the default incremental threshold of 10 - * scans objects at 1% of the heap size */ -#define SCAN_RATE_DIVISOR 10 +#define SCAN_RATE_DIVISOR 5 static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) @@ -1330,69 +1325,76 @@ gc_collect_young(PyThreadState *tstate, validate_spaces(gcstate); } -#ifndef NDEBUG -static inline int -IS_IN_VISITED(PyGC_Head *gc, int visited_space) +typedef struct work_stack { + PyGC_Head *top; + int visited_space; +} WorkStack; + +/* Remove gc from the list it is currently in and push it to the stack */ +static inline void +push_to_stack(PyGC_Head *gc, WorkStack *stack) { - assert(visited_space == 0 || other_space(visited_space) == 0); - return gc_old_space(gc) == visited_space; + PyGC_Head *prev = GC_PREV(gc); + PyGC_Head *next = GC_NEXT(gc); + _PyGCHead_SET_NEXT(prev, next); + _PyGCHead_SET_PREV(next, prev); + _PyGCHead_SET_PREV(gc, stack->top); + stack->top = gc; } -#endif -struct container_and_flag { - PyGC_Head *container; - int visited_space; - intptr_t size; -}; +static inline PyGC_Head * +pop_from_stack(WorkStack *stack) +{ + PyGC_Head *gc = stack->top; + stack->top = _PyGCHead_PREV(gc); + return gc; +} -/* A traversal callback for adding to container) */ -static int -visit_add_to_container(PyObject *op, void *arg) +/* append list `from` to `stack`; `from` becomes an empty list */ +static void +move_list_to_stack(PyGC_Head *from, WorkStack *stack) { - OBJECT_STAT_INC(object_visits); - struct container_and_flag *cf = (struct container_and_flag *)arg; - int visited = cf->visited_space; - assert(visited == get_gc_state()->visited_space); - if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + if (!gc_list_is_empty(from)) { + PyGC_Head *from_head = GC_NEXT(from); + PyGC_Head *from_tail = GC_PREV(from); + _PyGCHead_SET_PREV(from_head, stack->top); + stack->top = from_tail; + gc_list_init(from); + } +} + +static inline void +move_to_stack(PyObject *op, WorkStack *stack, int visited_space) +{ + assert(op != NULL); + if (_PyObject_IS_GC(op)) { PyGC_Head *gc = AS_GC(op); if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited) { + gc_old_space(gc) != visited_space) { + assert(!_Py_IsImmortal(op)); gc_flip_old_space(gc); - gc_list_move(gc, cf->container); - cf->size++; + push_to_stack(gc, stack); } } - return 0; } -static intptr_t -expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) -{ - struct container_and_flag arg = { - .container = container, - .visited_space = gcstate->visited_space, - .size = 0 - }; - assert(GC_NEXT(gc) == container); - while (gc != container) { - /* Survivors will be moved to visited space, so they should - * have been marked as visited */ - assert(IS_IN_VISITED(gc, gcstate->visited_space)); - PyObject *op = FROM_GC(gc); - assert(_PyObject_GC_IS_TRACKED(op)); - if (_Py_IsImmortal(op)) { - PyGC_Head *next = GC_NEXT(gc); - gc_list_move(gc, &get_gc_state()->permanent_generation.head); - gc = next; - continue; - } - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - visit_add_to_container, - &arg); - gc = GC_NEXT(gc); - } - return arg.size; +static void +move_unvisited(PyObject *op, WorkStack *stack, int visited_space) +{ + move_to_stack(op, stack, visited_space); +} + +#define MOVE_UNVISITED(O, T, V) if ((O) != NULL) move_unvisited((O), (T), (V)) + +/* A traversal callback for adding to container */ +static int +visit_add_to_container(PyObject *op, void *arg) +{ + OBJECT_STAT_INC(object_visits); + WorkStack *stack = (WorkStack *)arg; + assert(stack->visited_space == get_gc_state()->visited_space); + move_to_stack(op, stack, stack->visited_space); + return 0; } /* Do bookkeeping for a completed GC cycle */ @@ -1420,54 +1422,62 @@ completed_scavenge(GCState *gcstate) gc_list_set_space(&gcstate->old[not_visited].head, not_visited); } assert(gc_list_is_empty(&gcstate->old[visited].head)); - gcstate->work_to_do = 0; gcstate->phase = GC_PHASE_MARK; } -static intptr_t -move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) -{ - if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited_space) { - gc_flip_old_space(gc); - gc_list_move(gc, reachable); - return 1; +static void +frame_move_unvisited(_PyInterpreterFrame *frame, WorkStack *stack, int visited_space) +{ + _PyStackRef *locals = frame->localsplus; + _PyStackRef *sp = frame->stackpointer; + if (frame->f_locals != NULL) { + move_unvisited(frame->f_locals, stack, visited_space); + } + PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); + move_unvisited(func, stack, visited_space); + while (sp > locals) { + sp--; + _PyStackRef ref = *sp; + if (!PyStackRef_IsNull(ref)) { + PyObject *op = PyStackRef_AsPyObjectBorrow(ref); + if (!_Py_IsImmortal(op)) { + move_unvisited(op, stack, visited_space); + } } } - return 0; } -static intptr_t -mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) +static Py_ssize_t +move_all_transitively_reachable(WorkStack *stack, PyGC_Head *visited, int visited_space) { // Transitively traverse all objects from reachable, until empty - struct container_and_flag arg = { - .container = reachable, - .visited_space = visited_space, - .size = 0 - }; - while (!gc_list_is_empty(reachable)) { - PyGC_Head *gc = _PyGCHead_NEXT(reachable); + Py_ssize_t objects_marked = 0; + while (stack->top != NULL) { + PyGC_Head *gc = pop_from_stack(stack); assert(gc_old_space(gc) == visited_space); - gc_list_move(gc, visited); + gc_list_append(gc, visited); + objects_marked++; PyObject *op = FROM_GC(gc); - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - visit_add_to_container, - &arg); + assert(PyObject_IS_GC(op)); + assert(_PyObject_GC_IS_TRACKED(op)); + if (_Py_IsImmortal(op)) { + _PyObject_GC_UNTRACK(op); + } + else { + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, visit_add_to_container, stack); + } } gc_list_validate_space(visited, visited_space); - return arg.size; + return objects_marked; } static intptr_t mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start) { - PyGC_Head reachable; - gc_list_init(&reachable); - Py_ssize_t objects_marked = 0; + WorkStack stack; + stack.top = NULL; + stack.visited_space = visited_space; // Move all objects on stacks to reachable _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); @@ -1480,27 +1490,7 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b frame = frame->previous; continue; } - _PyStackRef *locals = frame->localsplus; - _PyStackRef *sp = frame->stackpointer; - objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space); - PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); - objects_marked += move_to_reachable(func, &reachable, visited_space); - while (sp > locals) { - sp--; - if (PyStackRef_IsNull(*sp)) { - continue; - } - PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); - if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited_space) { - gc_flip_old_space(gc); - objects_marked++; - gc_list_move(gc, &reachable); - } - } - } + frame_move_unvisited(frame, &stack, visited_space); if (!start && frame->visited) { // If this frame has already been visited, then the lower frames // will have already been visited and will not have changed @@ -1513,31 +1503,31 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b ts = PyThreadState_Next(ts); HEAD_UNLOCK(runtime); } - objects_marked += mark_all_reachable(&reachable, visited, visited_space); - assert(gc_list_is_empty(&reachable)); + Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); + assert(stack.top == NULL); return objects_marked; } static intptr_t mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space) { - PyGC_Head reachable; - gc_list_init(&reachable); - Py_ssize_t objects_marked = 0; - objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space); - objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space); - objects_marked += move_to_reachable(interp->dict, &reachable, visited_space); + WorkStack stack; + stack.top = NULL; + stack.visited_space = visited_space; + MOVE_UNVISITED(interp->sysdict, &stack, visited_space); + MOVE_UNVISITED(interp->builtins, &stack, visited_space); + MOVE_UNVISITED(interp->dict, &stack, visited_space); struct types_state *types = &interp->types; for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { - objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space); - objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); + MOVE_UNVISITED(types->builtins.initialized[i].tp_dict, &stack, visited_space); + MOVE_UNVISITED(types->builtins.initialized[i].tp_subclasses, &stack, visited_space); } for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { - objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); - objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); + MOVE_UNVISITED(types->for_extensions.initialized[i].tp_dict, &stack, visited_space); + MOVE_UNVISITED(types->for_extensions.initialized[i].tp_subclasses, &stack, visited_space); } - objects_marked += mark_all_reachable(&reachable, visited, visited_space); - assert(gc_list_is_empty(&reachable)); + Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); + assert(stack.top == NULL); return objects_marked; } @@ -1549,39 +1539,35 @@ mark_at_start(PyThreadState *tstate) PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space); objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); - gcstate->work_to_do -= objects_marked; gcstate->phase = GC_PHASE_COLLECT; validate_spaces(gcstate); return objects_marked; } + +/* See InternalDocs/garbage_collector.md for more details. */ +#define MAX_HEAP_PORTION_MULTIPLIER 5 +#define MARKING_PROGRESS_MULTIPLIER 2 + static intptr_t assess_work_to_do(GCState *gcstate) { - /* The amount of work we want to do depends on three things. + /* The amount of work we want to do depends on two things. * 1. The number of new objects created - * 2. The growth in heap size since the last collection - * 3. The heap size (up to the number of new objects, to avoid quadratic effects) - * - * For a steady state heap, the amount of work to do is three times the number - * of new objects added to the heap. This ensures that we stay ahead in the - * worst case of all new objects being garbage. - * - * This could be improved by tracking survival rates, but it is still a - * large improvement on the non-marking approach. + * 2. The heap size (up to a multiple of the number of new objects, to avoid quadratic effects) */ intptr_t scale_factor = gcstate->old[0].threshold; if (scale_factor < 2) { scale_factor = 2; } intptr_t new_objects = gcstate->young.count; - intptr_t max_heap_fraction = new_objects*3/2; - intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; - if (heap_fraction > max_heap_fraction) { - heap_fraction = max_heap_fraction; + intptr_t max_heap_portion = new_objects * MAX_HEAP_PORTION_MULTIPLIER; + intptr_t heap_portion = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + if (heap_portion > max_heap_portion) { + heap_portion = max_heap_portion; } gcstate->young.count = 0; - return new_objects + heap_fraction; + return new_objects + heap_portion; } static void @@ -1594,36 +1580,37 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (gcstate->phase == GC_PHASE_MARK) { Py_ssize_t objects_marked = mark_at_start(tstate); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked; + gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; validate_spaces(gcstate); return; } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - PyGC_Head increment; - gc_list_init(&increment); - int scale_factor = gcstate->old[0].threshold; - if (scale_factor < 2) { - scale_factor = 2; - } intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked; + gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; gc_list_set_space(&gcstate->young.head, gcstate->visited_space); - gc_list_merge(&gcstate->young.head, &increment); + PyGC_Head increment; + gc_list_init(&increment); + WorkStack working; + working.top = 0; + working.visited_space = gcstate->visited_space; + move_list_to_stack(&gcstate->young.head, &working); + Py_ssize_t increment_size = move_all_transitively_reachable(&working, &increment, gcstate->visited_space); gc_list_validate_space(&increment, gcstate->visited_space); - Py_ssize_t increment_size = gc_list_size(&increment); + assert(working.top == NULL); while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; } PyGC_Head *gc = _PyGCHead_NEXT(not_visited); - gc_list_move(gc, &increment); - increment_size++; - assert(!_Py_IsImmortal(FROM_GC(gc))); gc_set_old_space(gc, gcstate->visited_space); - increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); + push_to_stack(gc, &working); + assert(!_Py_IsImmortal(FROM_GC(gc))); + increment_size += move_all_transitively_reachable(&working, &increment, gcstate->visited_space); + assert(working.top == NULL); } + assert(increment_size == gc_list_size(&increment)); GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); validate_list(&increment, collecting_clear_unreachable_clear); gc_list_validate_space(&increment, gcstate->visited_space); @@ -1632,7 +1619,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_collect_region(tstate, &increment, &survivors, stats); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; add_stats(gcstate, 1, stats); @@ -1668,6 +1654,7 @@ gc_collect_full(PyThreadState *tstate, gcstate->old[0].count = 0; gcstate->old[1].count = 0; completed_scavenge(gcstate); + gcstate->work_to_do = -gcstate->young.threshold; _PyGC_ClearAllFreeLists(tstate->interp); validate_spaces(gcstate); add_stats(gcstate, 2, stats); -- cgit v0.12