From 90913985b62845a58f6b9e815121bcf614bd107f Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Fri, 27 Dec 2019 21:55:56 +0000 Subject: Move comment about permanent generation to gcmodule.c (GH-17718) The comment about the collection rules for the permanent generation was incorrectly referenced by a comment in gcmodule.c (the comment has been moved long ago into a header file). Moving the comment into the relevant code helps with readability and avoids broken references. --- Include/internal/pycore_pymem.h | 36 ------------------------------------ Modules/gcmodule.c | 36 ++++++++++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 38 deletions(-) diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index a4e9720..06d0d06 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -16,42 +16,6 @@ extern "C" { /* If we change this, we need to change the default value in the signature of gc.collect. */ #define NUM_GENERATIONS 3 - -/* - NOTE: about the counting of long-lived objects. - - To limit the cost of garbage collection, there are two strategies; - - make each collection faster, e.g. by scanning fewer objects - - do less collections - This heuristic is about the latter strategy. - - In addition to the various configurable thresholds, we only trigger a - full collection if the ratio - long_lived_pending / long_lived_total - is above a given value (hardwired to 25%). - - The reason is that, while "non-full" collections (i.e., collections of - the young and middle generations) will always examine roughly the same - number of objects -- determined by the aforementioned thresholds --, - the cost of a full collection is proportional to the total number of - long-lived objects, which is virtually unbounded. - - Indeed, it has been remarked that doing a full collection every - of object creations entails a dramatic performance - degradation in workloads which consist in creating and storing lots of - long-lived objects (e.g. building a large list of GC-tracked objects would - show quadratic performance, instead of linear as expected: see issue #4074). - - Using the above ratio, instead, yields amortized linear performance in - the total number of objects (the effect of which can be summarized - thusly: "each full garbage collection is more and more costly as the - number of objects grows, but we do fewer and fewer of them"). - - This heuristic was suggested by Martin von Löwis on python-dev in - June 2008. His original analysis and proposal can be found at: - http://mail.python.org/pipermail/python-dev/2008-June/080579.html -*/ - /* NOTE: about untracking of mutable objects. diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 64afe83..b11ae84 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -1381,8 +1381,40 @@ collect_generations(PyThreadState *tstate) for (int i = NUM_GENERATIONS-1; i >= 0; i--) { if (gcstate->generations[i].count > gcstate->generations[i].threshold) { /* Avoid quadratic performance degradation in number - of tracked objects. See comments at the beginning - of this file, and issue #4074. + of tracked objects (see also issue #4074): + + To limit the cost of garbage collection, there are two strategies; + - make each collection faster, e.g. by scanning fewer objects + - do less collections + This heuristic is about the latter strategy. + + In addition to the various configurable thresholds, we only trigger a + full collection if the ratio + + long_lived_pending / long_lived_total + + is above a given value (hardwired to 25%). + + The reason is that, while "non-full" collections (i.e., collections of + the young and middle generations) will always examine roughly the same + number of objects -- determined by the aforementioned thresholds --, + the cost of a full collection is proportional to the total number of + long-lived objects, which is virtually unbounded. + + Indeed, it has been remarked that doing a full collection every + of object creations entails a dramatic performance + degradation in workloads which consist in creating and storing lots of + long-lived objects (e.g. building a large list of GC-tracked objects would + show quadratic performance, instead of linear as expected: see issue #4074). + + Using the above ratio, instead, yields amortized linear performance in + the total number of objects (the effect of which can be summarized + thusly: "each full garbage collection is more and more costly as the + number of objects grows, but we do fewer and fewer of them"). + + This heuristic was suggested by Martin von Löwis on python-dev in + June 2008. His original analysis and proposal can be found at: + http://mail.python.org/pipermail/python-dev/2008-June/080579.html */ if (i == NUM_GENERATIONS - 1 && gcstate->long_lived_pending < gcstate->long_lived_total / 4) -- cgit v0.12