summaryrefslogtreecommitdiffstats
path: root/Modules/gcmodule.c
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2002-06-30 17:56:40 (GMT)
committerTim Peters <tim.peters@gmail.com>2002-06-30 17:56:40 (GMT)
commit8839617cc9f026f349f7f371025131f224271b9f (patch)
tree5c366183298fbb858118b5a83cf57e21028a5f3b /Modules/gcmodule.c
parent6238d2b024f061159b2613387ff700695c10deef (diff)
downloadcpython-8839617cc9f026f349f7f371025131f224271b9f.zip
cpython-8839617cc9f026f349f7f371025131f224271b9f.tar.gz
cpython-8839617cc9f026f349f7f371025131f224271b9f.tar.bz2
SF bug #574132: Major GC related performance regression
"The regression" is actually due to that 2.2.1 had a bug that prevented the regression (which isn't a regression at all) from showing up. "The regression" is actually a glitch in cyclic gc that's been there forever. As the generation being collected is analyzed, objects that can't be collected (because, e.g., we find they're externally referenced, or are in an unreachable cycle but have a __del__ method) are moved out of the list of candidates. A tricksy scheme uses negative values of gc_refs to mark such objects as being moved. However, the exact negative value set at the start may become "more negative" over time for objects not in the generation being collected, and the scheme was checking for an exact match on the negative value originally assigned. As a result, objects in generations older than the one being collected could get scanned too, and yanked back into a younger generation. Doing so doesn't lead to an error, but doesn't do any good, and can burn an unbounded amount of time doing useless work. A test case is simple (thanks to Kevin Jacobs for finding it!): x = [] for i in xrange(200000): x.append((1,)) Without the patch, this ends up scanning all of x on every gen0 collection, scans all of x twice on every gen1 collection, and x gets yanked back into gen1 on every gen0 collection. With the patch, once x gets to gen2, it's never scanned again until another gen2 collection, and stays in gen2. Bugfix candidate, although the code has changed enough that I think I'll need to port it by hand. 2.2.1 also has a different bug that causes bound method objects not to get tracked at all (so the test case doesn't burn absurd amounts of time in 2.2.1, but *should* <wink>).
Diffstat (limited to 'Modules/gcmodule.c')
-rw-r--r--Modules/gcmodule.c56
1 files changed, 43 insertions, 13 deletions
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index a26adae..b3a688c 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -1,5 +1,5 @@
/*
-
+
Reference Cycle Garbage Collection
==================================
@@ -72,11 +72,19 @@ static int collecting;
DEBUG_SAVEALL
static int debug;
-/* Special gc_refs value */
+/* When a collection begins, gc_refs is set to ob_refcnt for, and only for,
+ * the objects in the generation being collected, called the "young"
+ * generation at that point. As collection proceeds, when it's determined
+ * that one of these can't be collected (e.g., because it's reachable from
+ * outside, or has a __del__ method), the object is moved out of young, and
+ * gc_refs is set to a negative value. The latter is so we can distinguish
+ * collection candidates from non-candidates just by looking at the object.
+ */
+/* Special gc_refs value, although any negative value means "moved". */
#define GC_MOVED -123
-/* True if an object has been moved to the older generation */
-#define IS_MOVED(o) ((AS_GC(o))->gc.gc_refs == GC_MOVED)
+/* True iff an object is still a candidate for collection. */
+#define STILL_A_CANDIDATE(o) ((AS_GC(o))->gc.gc_refs >= 0)
/* list of uncollectable objects */
static PyObject *garbage;
@@ -116,7 +124,7 @@ gc_list_remove(PyGC_Head *node)
node->gc.gc_next = NULL; /* object is not currently tracked */
}
-static void
+static void
gc_list_move(PyGC_Head *from, PyGC_Head *to)
{
if (gc_list_is_empty(from)) {
@@ -161,7 +169,10 @@ gc_list_size(PyGC_Head *list)
-/* Set all gc_refs = ob_refcnt */
+/* Set all gc_refs = ob_refcnt. After this, STILL_A_CANDIDATE(o) is true
+ * for all objects in containers, and false for all tracked gc objects not
+ * in containers (although see the comment in visit_decref).
+ */
static void
update_refs(PyGC_Head *containers)
{
@@ -174,9 +185,21 @@ update_refs(PyGC_Head *containers)
static int
visit_decref(PyObject *op, void *data)
{
+ /* There's no point to decrementing gc_refs unless
+ * STILL_A_CANDIDATE(op) is true. It would take extra cycles to
+ * check that, though. If STILL_A_CANDIDATE(op) is false,
+ * decrementing gc_refs almost always makes it "even more negative",
+ * so doesn't change that STILL_A_CANDIDATE is false, and no harm is
+ * done. However, it's possible that, after many collections, this
+ * could underflow gc_refs in a long-lived old object. In that case,
+ * visit_move() may move the old object back to the generation
+ * getting collected. That would be a waste of time, but wouldn't
+ * cause an error.
+ */
if (op && PyObject_IS_GC(op)) {
- if (IS_TRACKED(op))
+ if (IS_TRACKED(op)) {
AS_GC(op)->gc.gc_refs--;
+ }
}
return 0;
}
@@ -195,7 +218,7 @@ subtract_refs(PyGC_Head *containers)
}
}
-/* Append objects with gc_refs > 0 to roots list */
+/* Move objects with gc_refs > 0 to roots list. They can't be collected. */
static void
move_roots(PyGC_Head *containers, PyGC_Head *roots)
{
@@ -216,7 +239,7 @@ static int
visit_move(PyObject *op, PyGC_Head *tolist)
{
if (PyObject_IS_GC(op)) {
- if (IS_TRACKED(op) && !IS_MOVED(op)) {
+ if (IS_TRACKED(op) && STILL_A_CANDIDATE(op)) {
PyGC_Head *gc = AS_GC(op);
gc_list_remove(gc);
gc_list_append(gc, tolist);
@@ -226,7 +249,9 @@ visit_move(PyObject *op, PyGC_Head *tolist)
return 0;
}
-/* Move objects referenced from reachable to reachable set. */
+/* Move candidates referenced from reachable to reachable set (they're no
+ * longer candidates).
+ */
static void
move_root_reachable(PyGC_Head *reachable)
{
@@ -242,7 +267,7 @@ move_root_reachable(PyGC_Head *reachable)
}
}
-/* return true of object has a finalization method */
+/* return true if object has a finalization method */
static int
has_finalizer(PyObject *op)
{
@@ -269,6 +294,7 @@ move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
if (has_finalizer(op)) {
gc_list_remove(gc);
gc_list_append(gc, finalizers);
+ gc->gc.gc_refs = GC_MOVED;
}
}
}
@@ -282,7 +308,7 @@ move_finalizer_reachable(PyGC_Head *finalizers)
for (; gc != finalizers; gc=gc->gc.gc_next) {
/* careful, finalizers list is growing here */
traverse = FROM_GC(gc)->ob_type->tp_traverse;
- (void) traverse(FROM_GC(gc),
+ (void) traverse(FROM_GC(gc),
(visitproc)visit_move,
(void *)finalizers);
}
@@ -332,7 +358,8 @@ handle_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
* objects. */
PyList_Append(garbage, op);
}
- /* object is now reachable again */
+ /* object is now reachable again */
+ assert(!STILL_A_CANDIDATE(op));
gc_list_remove(gc);
gc_list_append(gc, old);
}
@@ -349,6 +376,8 @@ delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
while (!gc_list_is_empty(unreachable)) {
PyGC_Head *gc = unreachable->gc.gc_next;
PyObject *op = FROM_GC(gc);
+
+ assert(STILL_A_CANDIDATE(op));
if (debug & DEBUG_SAVEALL) {
PyList_Append(garbage, op);
}
@@ -363,6 +392,7 @@ delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
/* object is still alive, move it, it may die later */
gc_list_remove(gc);
gc_list_append(gc, old);
+ gc->gc.gc_refs = GC_MOVED;
}
}
}