summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2002-07-01 03:52:19 (GMT)
committerTim Peters <tim.peters@gmail.com>2002-07-01 03:52:19 (GMT)
commit19b74c786842ba11b9c99b2fd2f9fca2ebd785b5 (patch)
tree19cc4e1f5e568584624159d4f6f4439b49ca6798 /Modules
parent93cd83e4aed7b926cb04d1b656b094973ae3552b (diff)
downloadcpython-19b74c786842ba11b9c99b2fd2f9fca2ebd785b5.zip
cpython-19b74c786842ba11b9c99b2fd2f9fca2ebd785b5.tar.gz
cpython-19b74c786842ba11b9c99b2fd2f9fca2ebd785b5.tar.bz2
OK, I couldn't stand it <0.5 wink>: removed all uncertainty about what's
in gc_refs, even at the cost of putting back a test+branch in visit_decref. The good news: since gc_refs became utterly tame then, it became clear that another special value could be useful. The move_roots() and move_root_reachable() passes have now been replaced by a single move_unreachable() pass. Besides saving a pass over the generation, this has a better effect: most of the time everything turns out to be reachable, so we were breaking the generation list apart and moving it into into the reachable list, one element at a time. Now the reachable stuff stays in the generation list, and the unreachable stuff is moved instead. This isn't quite as good as it sounds, since sometimes we guess wrongly that a thing is unreachable, and have to move it back again. Still, overall, it yields a significant (but not dramatic) boost in collection speed.
Diffstat (limited to 'Modules')
-rw-r--r--Modules/gcmodule.c259
1 files changed, 163 insertions, 96 deletions
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index 29d62bf..cb56253 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -74,17 +74,20 @@ static int debug;
/* When a collection begins, gc_refs is set to ob_refcnt for, and only for,
* the objects in the generation being collected, called the "young"
- * generation at that point. As collection proceeds, when it's determined
- * that one of these can't be collected (e.g., because it's reachable from
- * outside, or has a __del__ method), the object is moved out of young, and
- * gc_refs is set to a negative value. The latter is so we can distinguish
- * collection candidates from non-candidates just by looking at the object.
+ * generation at that point. As collection proceeds, the gc_refs members
+ * of young objects are set to GC_REACHABLE when it becomes known that they're
+ * uncollectable, and to GC_TENTATIVELY_UNREACHABLE when the evidence
+ * suggests they are collectable (this can't be known for certain until all
+ * of the young generation is scanned).
*/
-/* Special gc_refs value, although any negative value means "moved". */
-#define GC_MOVED -123
-/* True iff an object is still a candidate for collection. */
-#define STILL_A_CANDIDATE(o) ((AS_GC(o))->gc.gc_refs >= 0)
+/* Special gc_refs values. */
+#define GC_REACHABLE -123
+#define GC_TENTATIVELY_UNREACHABLE -42
+
+#define IS_REACHABLE(o) ((AS_GC(o))->gc.gc_refs == GC_REACHABLE)
+#define IS_TENTATIVELY_UNREACHABLE(o) ( \
+ (AS_GC(o))->gc.gc_refs == GC_TENTATIVELY_UNREACHABLE)
/* list of uncollectable objects */
static PyObject *garbage;
@@ -168,41 +171,40 @@ gc_list_size(PyGC_Head *list)
/*** end of list stuff ***/
-
-/* Set all gc_refs = ob_refcnt. After this, STILL_A_CANDIDATE(o) is true
- * for all objects in containers, and false for all tracked gc objects not
- * in containers (although see the comment in visit_decref).
+/* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects
+ * in containers, and is GC_REACHABLE for all tracked gc objects not in
+ * containers.
*/
static void
update_refs(PyGC_Head *containers)
{
PyGC_Head *gc = containers->gc.gc_next;
- for (; gc != containers; gc=gc->gc.gc_next) {
+ for (; gc != containers; gc = gc->gc.gc_next)
gc->gc.gc_refs = FROM_GC(gc)->ob_refcnt;
- }
}
+/* A traversal callback for subtract_refs. */
static int
visit_decref(PyObject *op, void *data)
{
- /* There's no point to decrementing gc_refs unless
- * STILL_A_CANDIDATE(op) is true. It would take extra cycles to
- * check that, though. If STILL_A_CANDIDATE(op) is false,
- * decrementing gc_refs almost always makes it "even more negative",
- * so doesn't change that STILL_A_CANDIDATE is false, and no harm is
- * done. However, it's possible that, after many collections, this
- * could underflow gc_refs in a long-lived old object. In that case,
- * visit_move() may move the old object back to the generation
- * getting collected. That would be a waste of time, but wouldn't
- * cause an error.
- */
assert(op != NULL);
- if (PyObject_IS_GC(op))
- AS_GC(op)->gc.gc_refs--;
+ if (PyObject_IS_GC(op)) {
+ PyGC_Head *gc = AS_GC(op);
+ /* We're only interested in gc_refs for objects in the
+ * generation being collected, which can be recognized
+ * because only they have positive gc_refs.
+ */
+ if (gc->gc.gc_refs > 0)
+ gc->gc.gc_refs--;
+ }
return 0;
}
-/* Subtract internal references from gc_refs */
+/* Subtract internal references from gc_refs. After this, gc_refs is >= 0
+ * for all objects in containers, and is GC_REACHABLE for all tracked gc
+ * objects not in containers. The ones with gc_refs > 0 are directly
+ * reachable from outside containers, and so can't be collected.
+ */
static void
subtract_refs(PyGC_Head *containers)
{
@@ -216,52 +218,100 @@ subtract_refs(PyGC_Head *containers)
}
}
-/* Move objects with gc_refs > 0 to roots list. They can't be collected. */
-static void
-move_roots(PyGC_Head *containers, PyGC_Head *roots)
-{
- PyGC_Head *next;
- PyGC_Head *gc = containers->gc.gc_next;
- while (gc != containers) {
- next = gc->gc.gc_next;
- if (gc->gc.gc_refs > 0) {
- gc_list_remove(gc);
- gc_list_append(gc, roots);
- gc->gc.gc_refs = GC_MOVED;
- }
- gc = next;
- }
-}
-
+/* A traversal callback for move_unreachable. */
static int
-visit_move(PyObject *op, PyGC_Head *tolist)
+visit_reachable(PyObject *op, PyGC_Head *reachable)
{
- if (PyObject_IS_GC(op)) {
- if (IS_TRACKED(op) && STILL_A_CANDIDATE(op)) {
- PyGC_Head *gc = AS_GC(op);
+ if (PyObject_IS_GC(op) && IS_TRACKED(op)) {
+ PyGC_Head *gc = AS_GC(op);
+ const int gc_refs = gc->gc.gc_refs;
+
+ if (gc_refs == 0) {
+ /* This is in move_unreachable's 'young' list, but
+ * the traversal hasn't yet gotten to it. All
+ * we need to do is tell move_unreachable that it's
+ * reachable.
+ */
+ gc->gc.gc_refs = 1;
+ }
+ else if (gc_refs == GC_TENTATIVELY_UNREACHABLE) {
+ /* This had gc_refs = 0 when move_unreachable got
+ * to it, but turns out it's reachable after all.
+ * Move it back to move_unreachable's 'young' list,
+ * and move_unreachable will eventually get to it
+ * again.
+ */
gc_list_remove(gc);
- gc_list_append(gc, tolist);
- gc->gc.gc_refs = GC_MOVED;
+ gc_list_append(gc, reachable);
+ gc->gc.gc_refs = 1;
}
+ /* Else there's nothing to do.
+ * If gc_refs > 0, it must be in move_unreachable's 'young'
+ * list, and move_unreachable will eventually get to it.
+ * If gc_refs == GC_REACHABLE, it's either in some other
+ * generation so we don't care about it, or move_unreachable
+ * already dealt with it.
+ */
}
return 0;
}
-/* Move candidates referenced from reachable to reachable set (they're no
- * longer candidates).
+/* Move the unreachable objects from young to unreachable. After this,
+ * all objects in young have gc_refs = GC_REACHABLE, and all objects in
+ * unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE. All tracked
+ * gc objects not in young or unreachable still have gc_refs = GC_REACHABLE.
+ * All objects in young after this are directly or indirectly reachable
+ * from outside the original young; and all objects in unreachable are
+ * not.
*/
static void
-move_root_reachable(PyGC_Head *reachable)
+move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
{
- traverseproc traverse;
- PyGC_Head *gc = reachable->gc.gc_next;
- for (; gc != reachable; gc=gc->gc.gc_next) {
- /* careful, reachable list is growing here */
- PyObject *op = FROM_GC(gc);
- traverse = op->ob_type->tp_traverse;
- (void) traverse(op,
- (visitproc)visit_move,
- (void *)reachable);
+ PyGC_Head *gc = young->gc.gc_next;
+
+ /* Invariants: all objects "to the left" of us in young have gc_refs
+ * = GC_REACHABLE, and are indeed reachable (directly or indirectly)
+ * from outside the young list as it was at entry. All other objects
+ * from the original young "to the left" of us are in unreachable now,
+ * and have gc_refs = GC_TENTATIVELY_UNREACHABLE. All objects to the
+ * left of us in 'young' now have been scanned, and no objects here
+ * or to the right have been scanned yet.
+ */
+
+ while (gc != young) {
+ PyGC_Head *next;
+
+ if (gc->gc.gc_refs == 0) {
+ /* This *may* be unreachable. To make progress,
+ * assume it is. gc isn't directly reachable from
+ * any object we've already traversed, but may be
+ * reachable from an object we haven't gotten to yet.
+ * visit_reachable will eventually move gc back into
+ * young if that's so, and we'll see it again.
+ */
+ next = gc->gc.gc_next;
+ gc_list_remove(gc);
+ gc_list_append(gc, unreachable);
+ gc->gc.gc_refs = GC_TENTATIVELY_UNREACHABLE;
+ }
+ else {
+ /* gc is definitely reachable from outside the
+ * original 'young'. Mark it as such, and traverse
+ * its pointers to find any other objects that may
+ * be directly reachable from it. Note that the
+ * call to tp_traverse may append objects to young,
+ * so we have to wait until it returns to determine
+ * the next object to visit.
+ */
+ PyObject *op = FROM_GC(gc);
+ traverseproc traverse = op->ob_type->tp_traverse;
+ gc->gc.gc_refs = GC_REACHABLE;
+ (void) traverse(op,
+ (visitproc)visit_reachable,
+ (void *)young);
+ next = gc->gc.gc_next;
+ }
+ gc = next;
}
}
@@ -292,12 +342,29 @@ move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
if (has_finalizer(op)) {
gc_list_remove(gc);
gc_list_append(gc, finalizers);
- gc->gc.gc_refs = GC_MOVED;
+ gc->gc.gc_refs = GC_REACHABLE;
+ }
+ }
+}
+
+/* A traversal callback for move_finalizer_reachable. */
+static int
+visit_move(PyObject *op, PyGC_Head *tolist)
+{
+ if (PyObject_IS_GC(op)) {
+ if (IS_TRACKED(op) && IS_TENTATIVELY_UNREACHABLE(op)) {
+ PyGC_Head *gc = AS_GC(op);
+ gc_list_remove(gc);
+ gc_list_append(gc, tolist);
+ gc->gc.gc_refs = GC_REACHABLE;
}
}
+ return 0;
}
-/* Move objects referenced from roots to roots */
+/* Move objects that are reachable from finalizers, from the unreachable set
+ * into the finalizers set.
+ */
static void
move_finalizer_reachable(PyGC_Head *finalizers)
{
@@ -353,11 +420,12 @@ handle_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
/* If SAVEALL is not set then just append objects with
* finalizers to the list of garbage. All objects in
* the finalizers list are reachable from those
- * objects. */
+ * objects.
+ */
PyList_Append(garbage, op);
}
/* object is now reachable again */
- assert(!STILL_A_CANDIDATE(op));
+ assert(IS_REACHABLE(op));
gc_list_remove(gc);
gc_list_append(gc, old);
}
@@ -365,7 +433,8 @@ handle_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
/* Break reference cycles by clearing the containers involved. This is
* tricky business as the lists can be changing and we don't know which
- * objects may be freed. It is possible I screwed something up here. */
+ * objects may be freed. It is possible I screwed something up here.
+ */
static void
delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
{
@@ -375,7 +444,7 @@ delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
PyGC_Head *gc = unreachable->gc.gc_next;
PyObject *op = FROM_GC(gc);
- assert(STILL_A_CANDIDATE(op));
+ assert(IS_TENTATIVELY_UNREACHABLE(op));
if (debug & DEBUG_SAVEALL) {
PyList_Append(garbage, op);
}
@@ -390,7 +459,7 @@ delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
/* object is still alive, move it, it may die later */
gc_list_remove(gc);
gc_list_append(gc, old);
- gc->gc.gc_refs = GC_MOVED;
+ gc->gc.gc_refs = GC_REACHABLE;
}
}
}
@@ -401,11 +470,10 @@ static long
collect(int generation)
{
int i;
- long n = 0;
- long m = 0;
+ long m = 0; /* # objects collected */
+ long n = 0; /* # unreachable objects that couldn't be collected */
PyGC_Head *young; /* the generation we are examining */
PyGC_Head *old; /* next older generation */
- PyGC_Head reachable;
PyGC_Head unreachable;
PyGC_Head finalizers;
PyGC_Head *gc;
@@ -433,38 +501,37 @@ collect(int generation)
/* handy references */
young = GEN_HEAD(generation);
- if (generation < NUM_GENERATIONS-1) {
+ if (generation < NUM_GENERATIONS-1)
old = GEN_HEAD(generation+1);
- } else {
- old = GEN_HEAD(NUM_GENERATIONS-1);
- }
+ else
+ old = young;
/* Using ob_refcnt and gc_refs, calculate which objects in the
* container set are reachable from outside the set (ie. have a
* refcount greater than 0 when all the references within the
- * set are taken into account */
+ * set are taken into account
+ */
update_refs(young);
subtract_refs(young);
- /* Move everything reachable from outside the set into the
- * reachable set (ie. gc_refs > 0). Next, move everything
- * reachable from objects in the reachable set. */
- gc_list_init(&reachable);
- move_roots(young, &reachable);
- move_root_reachable(&reachable);
-
- /* move unreachable objects to a temporary list, new objects can be
- * allocated after this point */
+ /* Leave everything reachable from outside young in young, and move
+ * everything else (in young) to unreachable.
+ * NOTE: This used to move the reachable objects into a reachable
+ * set instead. But most things usually turn out to be reachable,
+ * so it's more efficient to move the unreachable things.
+ */
gc_list_init(&unreachable);
- gc_list_move(young, &unreachable);
+ move_unreachable(young, &unreachable);
- /* move reachable objects to next generation */
- gc_list_merge(&reachable, old);
+ /* Move reachable objects to next generation. */
+ if (young != old)
+ gc_list_merge(young, old);
- /* Move objects reachable from finalizers, we can't safely delete
- * them. Python programmers should take care not to create such
- * things. For Python finalizers means instance objects with
- * __del__ methods. */
+ /* All objects in unreachable are trash, but objects reachable from
+ * finalizers can't safely be deleted. Python programmers should take
+ * care not to create such things. For Python, finalizers means
+ * instance objects with __del__ methods.
+ */
gc_list_init(&finalizers);
move_finalizers(&unreachable, &finalizers);
move_finalizer_reachable(&finalizers);
@@ -478,7 +545,7 @@ collect(int generation)
debug_cycle("collectable", FROM_GC(gc));
}
}
- /* call tp_clear on objects in the collectable set. This will cause
+ /* Call tp_clear on objects in the collectable set. This will cause
* the reference cycles to be broken. It may also cause some objects in
* finalizers to be freed */
delete_garbage(&unreachable, old);