summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--InternalDocs/garbage_collector.md51
-rw-r--r--Lib/test/test_gc.py14
-rw-r--r--Objects/dictobject.c4
-rw-r--r--Objects/genobject.c69
-rw-r--r--Objects/typeobject.c13
-rw-r--r--Python/gc.c301
6 files changed, 243 insertions, 209 deletions
diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md
index 394e4ef..e4cb9e4 100644
--- a/InternalDocs/garbage_collector.md
+++ b/InternalDocs/garbage_collector.md
@@ -199,22 +199,22 @@ unreachable:
```pycon
>>> import gc
->>>
+>>>
>>> class Link:
... def __init__(self, next_link=None):
... self.next_link = next_link
-...
+...
>>> link_3 = Link()
>>> link_2 = Link(link_3)
>>> link_1 = Link(link_2)
>>> link_3.next_link = link_1
>>> A = link_1
>>> del link_1, link_2, link_3
->>>
+>>>
>>> link_4 = Link()
>>> link_4.next_link = link_4
>>> del link_4
->>>
+>>>
>>> # Collect the unreachable Link object (and its .__dict__ dict).
>>> gc.collect()
2
@@ -459,11 +459,11 @@ specifically in a generation by calling `gc.collect(generation=NUM)`.
>>> # Create a reference cycle.
>>> x = MyObj()
>>> x.self = x
->>>
+>>>
>>> # Initially the object is in the young generation.
>>> gc.get_objects(generation=0)
[..., <__main__.MyObj object at 0x7fbcc12a3400>, ...]
->>>
+>>>
>>> # After a collection of the youngest generation the object
>>> # moves to the old generation.
>>> gc.collect(generation=0)
@@ -515,45 +515,6 @@ increment. All objects directly referred to from those stack frames are
added to the working set.
Then the above algorithm is repeated, starting from step 2.
-Determining how much work to do
--------------------------------
-
-We need to do a certain amount of work to ensure that garbage is collected,
-but doing too much work slows down execution.
-
-To work out how much work we need to do, consider a heap with `L` live objects
-and `G0` garbage objects at the start of a full scavenge and `G1` garbage objects
-at the end of the scavenge. We don't want the amount of garbage to grow, `G1 ≤ G0`, and
-we don't want too much garbage (say 1/3 of the heap maximum), `G0 ≤ L/2`.
-For each full scavenge we must visit all objects, `T == L + G0 + G1`, during which
-`G1` garbage objects are created.
-
-The number of new objects created `N` must be at least the new garbage created, `N ≥ G1`,
-assuming that the number of live objects remains roughly constant.
-If we set `T == 4*N` we get `T > 4*G1` and `T = L + G0 + G1` => `L + G0 > 3G1`
-For a steady state heap (`G0 == G1`) we get `L > 2G0` and the desired garbage ratio.
-
-In other words, to keep the garbage fraction to 1/3 or less we need to visit
-4 times as many objects as are newly created.
-
-We can do better than this though. Not all new objects will be garbage.
-Consider the heap at the end of the scavenge with `L1` live objects and `G1`
-garbage. Also, note that `T == M + I` where `M` is the number of objects marked
-as reachable and `I` is the number of objects visited in increments.
-Everything in `M` is live, so `I ≥ G0` and in practice `I` is closer to `G0 + G1`.
-
-If we choose the amount of work done such that `2*M + I == 6N` then we can do
-less work in most cases, but are still guaranteed to keep up.
-Since `I ≳ G0 + G1` (not strictly true, but close enough)
-`T == M + I == (6N + I)/2` and `(6N + I)/2 ≳ 4G`, so we can keep up.
-
-The reason that this improves performance is that `M` is usually much larger
-than `I`. If `M == 10I`, then `T ≅ 3N`.
-
-Finally, instead of using a fixed multiple of 8, we gradually increase it as the
-heap grows. This avoids wasting work for small heaps and during startup.
-
-
Optimization: reusing fields to save memory
===========================================
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
index baf8e95..b514005 100644
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -1161,19 +1161,27 @@ class IncrementalGCTests(unittest.TestCase):
return head
head = make_ll(1000)
+ count = 1000
+
+ # There will be some objects we aren't counting,
+ # e.g. the gc stats dicts. This test checks
+ # that the counts don't grow, so we try to
+ # correct for the uncounted objects
+ # This is just an estimate.
+ CORRECTION = 20
enabled = gc.isenabled()
gc.enable()
olds = []
initial_heap_size = _testinternalcapi.get_tracked_heap_size()
- iterations = max(20_000, initial_heap_size)
- for i in range(iterations):
+ for i in range(20_000):
newhead = make_ll(20)
+ count += 20
newhead.surprise = head
olds.append(newhead)
if len(olds) == 20:
new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size
- self.assertLess(new_objects, initial_heap_size/2, f"Heap growing. Reached limit after {i} iterations")
+ self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations")
del olds[:]
if not enabled:
gc.disable()
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index de518b8..1c9f864 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -7064,7 +7064,9 @@ int
PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg)
{
PyTypeObject *tp = Py_TYPE(obj);
- assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT);
+ if((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) {
+ return 0;
+ }
if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
PyDictValues *values = _PyObject_InlineValues(obj);
if (values->valid) {
diff --git a/Objects/genobject.c b/Objects/genobject.c
index 33679af..e87f199 100644
--- a/Objects/genobject.c
+++ b/Objects/genobject.c
@@ -882,7 +882,25 @@ PyTypeObject PyGen_Type = {
gen_methods, /* tp_methods */
gen_memberlist, /* tp_members */
gen_getsetlist, /* tp_getset */
- .tp_finalize = _PyGen_Finalize,
+ 0, /* tp_base */
+ 0, /* tp_dict */
+
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ 0, /* tp_new */
+ 0, /* tp_free */
+ 0, /* tp_is_gc */
+ 0, /* tp_bases */
+ 0, /* tp_mro */
+ 0, /* tp_cache */
+ 0, /* tp_subclasses */
+ 0, /* tp_weaklist */
+ 0, /* tp_del */
+ 0, /* tp_version_tag */
+ _PyGen_Finalize, /* tp_finalize */
};
static PyObject *
@@ -1224,7 +1242,24 @@ PyTypeObject PyCoro_Type = {
coro_methods, /* tp_methods */
coro_memberlist, /* tp_members */
coro_getsetlist, /* tp_getset */
- .tp_finalize = _PyGen_Finalize,
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ 0, /* tp_new */
+ 0, /* tp_free */
+ 0, /* tp_is_gc */
+ 0, /* tp_bases */
+ 0, /* tp_mro */
+ 0, /* tp_cache */
+ 0, /* tp_subclasses */
+ 0, /* tp_weaklist */
+ 0, /* tp_del */
+ 0, /* tp_version_tag */
+ _PyGen_Finalize, /* tp_finalize */
};
static void
@@ -1429,6 +1464,7 @@ typedef struct _PyAsyncGenWrappedValue {
(assert(_PyAsyncGenWrappedValue_CheckExact(op)), \
_Py_CAST(_PyAsyncGenWrappedValue*, (op)))
+
static int
async_gen_traverse(PyObject *self, visitproc visit, void *arg)
{
@@ -1637,7 +1673,24 @@ PyTypeObject PyAsyncGen_Type = {
async_gen_methods, /* tp_methods */
async_gen_memberlist, /* tp_members */
async_gen_getsetlist, /* tp_getset */
- .tp_finalize = _PyGen_Finalize,
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ 0, /* tp_new */
+ 0, /* tp_free */
+ 0, /* tp_is_gc */
+ 0, /* tp_bases */
+ 0, /* tp_mro */
+ 0, /* tp_cache */
+ 0, /* tp_subclasses */
+ 0, /* tp_weaklist */
+ 0, /* tp_del */
+ 0, /* tp_version_tag */
+ _PyGen_Finalize, /* tp_finalize */
};
@@ -1882,6 +1935,16 @@ PyTypeObject _PyAsyncGenASend_Type = {
PyObject_SelfIter, /* tp_iter */
async_gen_asend_iternext, /* tp_iternext */
async_gen_asend_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ 0, /* tp_new */
.tp_finalize = async_gen_asend_finalize,
};
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index cc95b98..2068d6a 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -2355,16 +2355,6 @@ subtype_traverse(PyObject *self, visitproc visit, void *arg)
return 0;
}
-
-static int
-plain_object_traverse(PyObject *self, visitproc visit, void *arg)
-{
- PyTypeObject *type = Py_TYPE(self);
- assert(type->tp_flags & Py_TPFLAGS_MANAGED_DICT);
- Py_VISIT(type);
- return PyObject_VisitManagedDict(self, visit, arg);
-}
-
static void
clear_slots(PyTypeObject *type, PyObject *self)
{
@@ -4157,9 +4147,6 @@ type_new_descriptors(const type_new_ctx *ctx, PyTypeObject *type)
assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0);
type->tp_flags |= Py_TPFLAGS_MANAGED_DICT;
type->tp_dictoffset = -1;
- if (type->tp_basicsize == sizeof(PyObject)) {
- type->tp_traverse = plain_object_traverse;
- }
}
type->tp_basicsize = slotoffset;
diff --git a/Python/gc.c b/Python/gc.c
index fd29a48..5b9588c 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -1277,13 +1277,18 @@ gc_list_set_space(PyGC_Head *list, int space)
* space faster than objects are added to the old space.
*
* Each young or incremental collection adds a number of
- * new objects (N) to the heap, and incremental collectors
- * scan I objects from the old space.
- * I > N must be true. We also want I > N * K to be where
- * K > 2. Higher values of K mean that the old space is
+ * objects, S (for survivors) to the old space, and
+ * incremental collectors scan I objects from the old space.
+ * I > S must be true. We also want I > S * N to be where
+ * N > 1. Higher values of N mean that the old space is
* scanned more rapidly.
+ * The default incremental threshold of 10 translates to
+ * N == 1.4 (1 + 4/threshold)
*/
-#define SCAN_RATE_DIVISOR 5
+
+/* Divide by 10, so that the default incremental threshold of 10
+ * scans objects at 1% of the heap size */
+#define SCAN_RATE_DIVISOR 10
static void
add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats)
@@ -1325,76 +1330,69 @@ gc_collect_young(PyThreadState *tstate,
validate_spaces(gcstate);
}
-typedef struct work_stack {
- PyGC_Head *top;
- int visited_space;
-} WorkStack;
-
-/* Remove gc from the list it is currently in and push it to the stack */
-static inline void
-push_to_stack(PyGC_Head *gc, WorkStack *stack)
-{
- PyGC_Head *prev = GC_PREV(gc);
- PyGC_Head *next = GC_NEXT(gc);
- _PyGCHead_SET_NEXT(prev, next);
- _PyGCHead_SET_PREV(next, prev);
- _PyGCHead_SET_PREV(gc, stack->top);
- stack->top = gc;
-}
-
-static inline PyGC_Head *
-pop_from_stack(WorkStack *stack)
+#ifndef NDEBUG
+static inline int
+IS_IN_VISITED(PyGC_Head *gc, int visited_space)
{
- PyGC_Head *gc = stack->top;
- stack->top = _PyGCHead_PREV(gc);
- return gc;
+ assert(visited_space == 0 || other_space(visited_space) == 0);
+ return gc_old_space(gc) == visited_space;
}
+#endif
-/* append list `from` to `stack`; `from` becomes an empty list */
-static void
-move_list_to_stack(PyGC_Head *from, WorkStack *stack)
-{
- if (!gc_list_is_empty(from)) {
- PyGC_Head *from_head = GC_NEXT(from);
- PyGC_Head *from_tail = GC_PREV(from);
- _PyGCHead_SET_PREV(from_head, stack->top);
- stack->top = from_tail;
- gc_list_init(from);
- }
-}
+struct container_and_flag {
+ PyGC_Head *container;
+ int visited_space;
+ intptr_t size;
+};
-static inline void
-move_to_stack(PyObject *op, WorkStack *stack, int visited_space)
+/* A traversal callback for adding to container) */
+static int
+visit_add_to_container(PyObject *op, void *arg)
{
- assert(op != NULL);
- if (_PyObject_IS_GC(op)) {
+ OBJECT_STAT_INC(object_visits);
+ struct container_and_flag *cf = (struct container_and_flag *)arg;
+ int visited = cf->visited_space;
+ assert(visited == get_gc_state()->visited_space);
+ if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
PyGC_Head *gc = AS_GC(op);
if (_PyObject_GC_IS_TRACKED(op) &&
- gc_old_space(gc) != visited_space) {
- assert(!_Py_IsImmortal(op));
+ gc_old_space(gc) != visited) {
gc_flip_old_space(gc);
- push_to_stack(gc, stack);
+ gc_list_move(gc, cf->container);
+ cf->size++;
}
}
+ return 0;
}
-static void
-move_unvisited(PyObject *op, WorkStack *stack, int visited_space)
-{
- move_to_stack(op, stack, visited_space);
-}
-
-#define MOVE_UNVISITED(O, T, V) if ((O) != NULL) move_unvisited((O), (T), (V))
-
-/* A traversal callback for adding to container */
-static int
-visit_add_to_container(PyObject *op, void *arg)
-{
- OBJECT_STAT_INC(object_visits);
- WorkStack *stack = (WorkStack *)arg;
- assert(stack->visited_space == get_gc_state()->visited_space);
- move_to_stack(op, stack, stack->visited_space);
- return 0;
+static intptr_t
+expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate)
+{
+ struct container_and_flag arg = {
+ .container = container,
+ .visited_space = gcstate->visited_space,
+ .size = 0
+ };
+ assert(GC_NEXT(gc) == container);
+ while (gc != container) {
+ /* Survivors will be moved to visited space, so they should
+ * have been marked as visited */
+ assert(IS_IN_VISITED(gc, gcstate->visited_space));
+ PyObject *op = FROM_GC(gc);
+ assert(_PyObject_GC_IS_TRACKED(op));
+ if (_Py_IsImmortal(op)) {
+ PyGC_Head *next = GC_NEXT(gc);
+ gc_list_move(gc, &get_gc_state()->permanent_generation.head);
+ gc = next;
+ continue;
+ }
+ traverseproc traverse = Py_TYPE(op)->tp_traverse;
+ (void) traverse(op,
+ visit_add_to_container,
+ &arg);
+ gc = GC_NEXT(gc);
+ }
+ return arg.size;
}
/* Do bookkeeping for a completed GC cycle */
@@ -1422,62 +1420,54 @@ completed_scavenge(GCState *gcstate)
gc_list_set_space(&gcstate->old[not_visited].head, not_visited);
}
assert(gc_list_is_empty(&gcstate->old[visited].head));
+ gcstate->work_to_do = 0;
gcstate->phase = GC_PHASE_MARK;
}
-static void
-frame_move_unvisited(_PyInterpreterFrame *frame, WorkStack *stack, int visited_space)
-{
- _PyStackRef *locals = frame->localsplus;
- _PyStackRef *sp = frame->stackpointer;
- if (frame->f_locals != NULL) {
- move_unvisited(frame->f_locals, stack, visited_space);
- }
- PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
- move_unvisited(func, stack, visited_space);
- while (sp > locals) {
- sp--;
- _PyStackRef ref = *sp;
- if (!PyStackRef_IsNull(ref)) {
- PyObject *op = PyStackRef_AsPyObjectBorrow(ref);
- if (!_Py_IsImmortal(op)) {
- move_unvisited(op, stack, visited_space);
- }
+static intptr_t
+move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space)
+{
+ if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
+ PyGC_Head *gc = AS_GC(op);
+ if (_PyObject_GC_IS_TRACKED(op) &&
+ gc_old_space(gc) != visited_space) {
+ gc_flip_old_space(gc);
+ gc_list_move(gc, reachable);
+ return 1;
}
}
+ return 0;
}
-static Py_ssize_t
-move_all_transitively_reachable(WorkStack *stack, PyGC_Head *visited, int visited_space)
+static intptr_t
+mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space)
{
// Transitively traverse all objects from reachable, until empty
- Py_ssize_t objects_marked = 0;
- while (stack->top != NULL) {
- PyGC_Head *gc = pop_from_stack(stack);
+ struct container_and_flag arg = {
+ .container = reachable,
+ .visited_space = visited_space,
+ .size = 0
+ };
+ while (!gc_list_is_empty(reachable)) {
+ PyGC_Head *gc = _PyGCHead_NEXT(reachable);
assert(gc_old_space(gc) == visited_space);
- gc_list_append(gc, visited);
- objects_marked++;
+ gc_list_move(gc, visited);
PyObject *op = FROM_GC(gc);
- assert(PyObject_IS_GC(op));
- assert(_PyObject_GC_IS_TRACKED(op));
- if (_Py_IsImmortal(op)) {
- _PyObject_GC_UNTRACK(op);
- }
- else {
- traverseproc traverse = Py_TYPE(op)->tp_traverse;
- (void) traverse(op, visit_add_to_container, stack);
- }
+ traverseproc traverse = Py_TYPE(op)->tp_traverse;
+ (void) traverse(op,
+ visit_add_to_container,
+ &arg);
}
gc_list_validate_space(visited, visited_space);
- return objects_marked;
+ return arg.size;
}
static intptr_t
mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start)
{
- WorkStack stack;
- stack.top = NULL;
- stack.visited_space = visited_space;
+ PyGC_Head reachable;
+ gc_list_init(&reachable);
+ Py_ssize_t objects_marked = 0;
// Move all objects on stacks to reachable
_PyRuntimeState *runtime = &_PyRuntime;
HEAD_LOCK(runtime);
@@ -1490,7 +1480,27 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b
frame = frame->previous;
continue;
}
- frame_move_unvisited(frame, &stack, visited_space);
+ _PyStackRef *locals = frame->localsplus;
+ _PyStackRef *sp = frame->stackpointer;
+ objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space);
+ PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
+ objects_marked += move_to_reachable(func, &reachable, visited_space);
+ while (sp > locals) {
+ sp--;
+ if (PyStackRef_IsNull(*sp)) {
+ continue;
+ }
+ PyObject *op = PyStackRef_AsPyObjectBorrow(*sp);
+ if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
+ PyGC_Head *gc = AS_GC(op);
+ if (_PyObject_GC_IS_TRACKED(op) &&
+ gc_old_space(gc) != visited_space) {
+ gc_flip_old_space(gc);
+ objects_marked++;
+ gc_list_move(gc, &reachable);
+ }
+ }
+ }
if (!start && frame->visited) {
// If this frame has already been visited, then the lower frames
// will have already been visited and will not have changed
@@ -1503,31 +1513,31 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b
ts = PyThreadState_Next(ts);
HEAD_UNLOCK(runtime);
}
- Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space);
- assert(stack.top == NULL);
+ objects_marked += mark_all_reachable(&reachable, visited, visited_space);
+ assert(gc_list_is_empty(&reachable));
return objects_marked;
}
static intptr_t
mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space)
{
- WorkStack stack;
- stack.top = NULL;
- stack.visited_space = visited_space;
- MOVE_UNVISITED(interp->sysdict, &stack, visited_space);
- MOVE_UNVISITED(interp->builtins, &stack, visited_space);
- MOVE_UNVISITED(interp->dict, &stack, visited_space);
+ PyGC_Head reachable;
+ gc_list_init(&reachable);
+ Py_ssize_t objects_marked = 0;
+ objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space);
+ objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space);
+ objects_marked += move_to_reachable(interp->dict, &reachable, visited_space);
struct types_state *types = &interp->types;
for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) {
- MOVE_UNVISITED(types->builtins.initialized[i].tp_dict, &stack, visited_space);
- MOVE_UNVISITED(types->builtins.initialized[i].tp_subclasses, &stack, visited_space);
+ objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space);
+ objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space);
}
for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) {
- MOVE_UNVISITED(types->for_extensions.initialized[i].tp_dict, &stack, visited_space);
- MOVE_UNVISITED(types->for_extensions.initialized[i].tp_subclasses, &stack, visited_space);
+ objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space);
+ objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space);
}
- Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space);
- assert(stack.top == NULL);
+ objects_marked += mark_all_reachable(&reachable, visited, visited_space);
+ assert(gc_list_is_empty(&reachable));
return objects_marked;
}
@@ -1539,35 +1549,39 @@ mark_at_start(PyThreadState *tstate)
PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space);
objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true);
+ gcstate->work_to_do -= objects_marked;
gcstate->phase = GC_PHASE_COLLECT;
validate_spaces(gcstate);
return objects_marked;
}
-
-/* See InternalDocs/garbage_collector.md for more details. */
-#define MAX_HEAP_PORTION_MULTIPLIER 5
-#define MARKING_PROGRESS_MULTIPLIER 2
-
static intptr_t
assess_work_to_do(GCState *gcstate)
{
- /* The amount of work we want to do depends on two things.
+ /* The amount of work we want to do depends on three things.
* 1. The number of new objects created
- * 2. The heap size (up to a multiple of the number of new objects, to avoid quadratic effects)
+ * 2. The growth in heap size since the last collection
+ * 3. The heap size (up to the number of new objects, to avoid quadratic effects)
+ *
+ * For a steady state heap, the amount of work to do is three times the number
+ * of new objects added to the heap. This ensures that we stay ahead in the
+ * worst case of all new objects being garbage.
+ *
+ * This could be improved by tracking survival rates, but it is still a
+ * large improvement on the non-marking approach.
*/
intptr_t scale_factor = gcstate->old[0].threshold;
if (scale_factor < 2) {
scale_factor = 2;
}
intptr_t new_objects = gcstate->young.count;
- intptr_t max_heap_portion = new_objects * MAX_HEAP_PORTION_MULTIPLIER;
- intptr_t heap_portion = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
- if (heap_portion > max_heap_portion) {
- heap_portion = max_heap_portion;
+ intptr_t max_heap_fraction = new_objects*3/2;
+ intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
+ if (heap_fraction > max_heap_fraction) {
+ heap_fraction = max_heap_fraction;
}
gcstate->young.count = 0;
- return new_objects + heap_portion;
+ return new_objects + heap_fraction;
}
static void
@@ -1580,37 +1594,36 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
if (gcstate->phase == GC_PHASE_MARK) {
Py_ssize_t objects_marked = mark_at_start(tstate);
GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
- gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER;
+ gcstate->work_to_do -= objects_marked;
validate_spaces(gcstate);
return;
}
PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
+ PyGC_Head increment;
+ gc_list_init(&increment);
+ int scale_factor = gcstate->old[0].threshold;
+ if (scale_factor < 2) {
+ scale_factor = 2;
+ }
intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false);
GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
- gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER;
+ gcstate->work_to_do -= objects_marked;
gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
- PyGC_Head increment;
- gc_list_init(&increment);
- WorkStack working;
- working.top = 0;
- working.visited_space = gcstate->visited_space;
- move_list_to_stack(&gcstate->young.head, &working);
- Py_ssize_t increment_size = move_all_transitively_reachable(&working, &increment, gcstate->visited_space);
+ gc_list_merge(&gcstate->young.head, &increment);
gc_list_validate_space(&increment, gcstate->visited_space);
- assert(working.top == NULL);
+ Py_ssize_t increment_size = gc_list_size(&increment);
while (increment_size < gcstate->work_to_do) {
if (gc_list_is_empty(not_visited)) {
break;
}
PyGC_Head *gc = _PyGCHead_NEXT(not_visited);
- gc_set_old_space(gc, gcstate->visited_space);
- push_to_stack(gc, &working);
+ gc_list_move(gc, &increment);
+ increment_size++;
assert(!_Py_IsImmortal(FROM_GC(gc)));
- increment_size += move_all_transitively_reachable(&working, &increment, gcstate->visited_space);
- assert(working.top == NULL);
+ gc_set_old_space(gc, gcstate->visited_space);
+ increment_size += expand_region_transitively_reachable(&increment, gc, gcstate);
}
- assert(increment_size == gc_list_size(&increment));
GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size);
validate_list(&increment, collecting_clear_unreachable_clear);
gc_list_validate_space(&increment, gcstate->visited_space);
@@ -1619,6 +1632,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
gc_collect_region(tstate, &increment, &survivors, stats);
gc_list_merge(&survivors, visited);
assert(gc_list_is_empty(&increment));
+ gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
gcstate->work_to_do -= increment_size;
add_stats(gcstate, 1, stats);
@@ -1654,7 +1668,6 @@ gc_collect_full(PyThreadState *tstate,
gcstate->old[0].count = 0;
gcstate->old[1].count = 0;
completed_scavenge(gcstate);
- gcstate->work_to_do = -gcstate->young.threshold;
_PyGC_ClearAllFreeLists(tstate->interp);
validate_spaces(gcstate);
add_stats(gcstate, 2, stats);