diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2017-09-06 04:43:08 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-09-06 04:43:08 (GMT) |
commit | 05351c1bd8b70d1878527762174cdaaba3572395 (patch) | |
tree | e97ef4ba0ae7ffe5bd2c8969199616bffbbc4d6f /Modules/gcmodule.c | |
parent | 833860615bedfd2484ac0623d6f01ff0578ba09f (diff) | |
download | cpython-05351c1bd8b70d1878527762174cdaaba3572395.zip cpython-05351c1bd8b70d1878527762174cdaaba3572395.tar.gz cpython-05351c1bd8b70d1878527762174cdaaba3572395.tar.bz2 |
Revert "bpo-30860: Consolidate stateful runtime globals." (#3379)
Windows buildbots started failing due to include-related errors.
Diffstat (limited to 'Modules/gcmodule.c')
-rw-r--r-- | Modules/gcmodule.c | 309 |
1 files changed, 213 insertions, 96 deletions
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index fa67f7f..4e5acf3 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -39,9 +39,133 @@ module gc /* Get the object given the GC head */ #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1)) +/*** Global GC state ***/ + +struct gc_generation { + PyGC_Head head; + int threshold; /* collection threshold */ + int count; /* count of allocations or collections of younger + generations */ +}; + +/* If we change this, we need to change the default value in the signature of + gc.collect. */ +#define NUM_GENERATIONS 3 +#define GEN_HEAD(n) (&generations[n].head) + +/* linked lists of container objects */ +static struct gc_generation generations[NUM_GENERATIONS] = { + /* PyGC_Head, threshold, count */ + {{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0}, + {{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0}, + {{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0}, +}; + +PyGC_Head *_PyGC_generation0 = GEN_HEAD(0); + +static int enabled = 1; /* automatic collection enabled? */ + +/* true if we are currently running the collector */ +static int collecting = 0; + +/* list of uncollectable objects */ +static PyObject *garbage = NULL; + /* Python string to use if unhandled exception occurs */ static PyObject *gc_str = NULL; +/* a list of callbacks to be invoked when collection is performed */ +static PyObject *callbacks = NULL; + +/* This is the number of objects that survived the last full collection. It + approximates the number of long lived objects tracked by the GC. + + (by "full collection", we mean a collection of the oldest generation). +*/ +static Py_ssize_t long_lived_total = 0; + +/* This is the number of objects that survived all "non-full" collections, + and are awaiting to undergo a full collection for the first time. + +*/ +static Py_ssize_t long_lived_pending = 0; + +/* + NOTE: about the counting of long-lived objects. + + To limit the cost of garbage collection, there are two strategies; + - make each collection faster, e.g. by scanning fewer objects + - do less collections + This heuristic is about the latter strategy. + + In addition to the various configurable thresholds, we only trigger a + full collection if the ratio + long_lived_pending / long_lived_total + is above a given value (hardwired to 25%). + + The reason is that, while "non-full" collections (i.e., collections of + the young and middle generations) will always examine roughly the same + number of objects -- determined by the aforementioned thresholds --, + the cost of a full collection is proportional to the total number of + long-lived objects, which is virtually unbounded. + + Indeed, it has been remarked that doing a full collection every + <constant number> of object creations entails a dramatic performance + degradation in workloads which consist in creating and storing lots of + long-lived objects (e.g. building a large list of GC-tracked objects would + show quadratic performance, instead of linear as expected: see issue #4074). + + Using the above ratio, instead, yields amortized linear performance in + the total number of objects (the effect of which can be summarized + thusly: "each full garbage collection is more and more costly as the + number of objects grows, but we do fewer and fewer of them"). + + This heuristic was suggested by Martin von Löwis on python-dev in + June 2008. His original analysis and proposal can be found at: + http://mail.python.org/pipermail/python-dev/2008-June/080579.html +*/ + +/* + NOTE: about untracking of mutable objects. + + Certain types of container cannot participate in a reference cycle, and + so do not need to be tracked by the garbage collector. Untracking these + objects reduces the cost of garbage collections. However, determining + which objects may be untracked is not free, and the costs must be + weighed against the benefits for garbage collection. + + There are two possible strategies for when to untrack a container: + + i) When the container is created. + ii) When the container is examined by the garbage collector. + + Tuples containing only immutable objects (integers, strings etc, and + recursively, tuples of immutable objects) do not need to be tracked. + The interpreter creates a large number of tuples, many of which will + not survive until garbage collection. It is therefore not worthwhile + to untrack eligible tuples at creation time. + + Instead, all tuples except the empty tuple are tracked when created. + During garbage collection it is determined whether any surviving tuples + can be untracked. A tuple can be untracked if all of its contents are + already not tracked. Tuples are examined for untracking in all garbage + collection cycles. It may take more than one cycle to untrack a tuple. + + Dictionaries containing only immutable objects also do not need to be + tracked. Dictionaries are untracked when created. If a tracked item is + inserted into a dictionary (either as a key or value), the dictionary + becomes tracked. During a full garbage collection (all generations), + the collector will untrack any dictionaries whose contents are not + tracked. + + The module provides the python function is_tracked(obj), which returns + the CURRENT tracking status of the object. Subsequent garbage + collections may change the tracking status of the object. + + Untracking of certain containers was introduced in issue #4688, and + the algorithm was refined in response to issue #14775. +*/ + /* set for debugging information */ #define DEBUG_STATS (1<<0) /* print collection statistics */ #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */ @@ -50,26 +174,19 @@ static PyObject *gc_str = NULL; #define DEBUG_LEAK DEBUG_COLLECTABLE | \ DEBUG_UNCOLLECTABLE | \ DEBUG_SAVEALL +static int debug; + +/* Running stats per generation */ +struct gc_generation_stats { + /* total number of collections */ + Py_ssize_t collections; + /* total number of collected objects */ + Py_ssize_t collected; + /* total number of uncollectable objects (put into gc.garbage) */ + Py_ssize_t uncollectable; +}; -#define GEN_HEAD(n) (&_PyRuntime.gc.generations[n].head) - -void -_PyGC_Initialize(struct _gc_runtime_state *state) -{ - state->enabled = 1; /* automatic collection enabled? */ - -#define _GEN_HEAD(n) (&state->generations[n].head) - struct gc_generation generations[NUM_GENERATIONS] = { - /* PyGC_Head, threshold, count */ - {{{_GEN_HEAD(0), _GEN_HEAD(0), 0}}, 700, 0}, - {{{_GEN_HEAD(1), _GEN_HEAD(1), 0}}, 10, 0}, - {{{_GEN_HEAD(2), _GEN_HEAD(2), 0}}, 10, 0}, - }; - for (int i = 0; i < NUM_GENERATIONS; i++) { - state->generations[i] = generations[i]; - }; - state->generation0 = GEN_HEAD(0); -} +static struct gc_generation_stats generation_stats[NUM_GENERATIONS]; /*-------------------------------------------------------------------------- gc_refs values. @@ -649,16 +766,16 @@ handle_legacy_finalizers(PyGC_Head *finalizers, PyGC_Head *old) { PyGC_Head *gc = finalizers->gc.gc_next; - if (_PyRuntime.gc.garbage == NULL) { - _PyRuntime.gc.garbage = PyList_New(0); - if (_PyRuntime.gc.garbage == NULL) + if (garbage == NULL) { + garbage = PyList_New(0); + if (garbage == NULL) Py_FatalError("gc couldn't create gc.garbage list"); } for (; gc != finalizers; gc = gc->gc.gc_next) { PyObject *op = FROM_GC(gc); - if ((_PyRuntime.gc.debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) { - if (PyList_Append(_PyRuntime.gc.garbage, op) < 0) + if ((debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) { + if (PyList_Append(garbage, op) < 0) return -1; } } @@ -748,8 +865,8 @@ delete_garbage(PyGC_Head *collectable, PyGC_Head *old) PyGC_Head *gc = collectable->gc.gc_next; PyObject *op = FROM_GC(gc); - if (_PyRuntime.gc.debug & DEBUG_SAVEALL) { - PyList_Append(_PyRuntime.gc.garbage, op); + if (debug & DEBUG_SAVEALL) { + PyList_Append(garbage, op); } else { if ((clear = Py_TYPE(op)->tp_clear) != NULL) { @@ -802,9 +919,9 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, PyGC_Head *gc; _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */ - struct gc_generation_stats *stats = &_PyRuntime.gc.generation_stats[generation]; + struct gc_generation_stats *stats = &generation_stats[generation]; - if (_PyRuntime.gc.debug & DEBUG_STATS) { + if (debug & DEBUG_STATS) { PySys_WriteStderr("gc: collecting generation %d...\n", generation); PySys_WriteStderr("gc: objects in each generation:"); @@ -821,9 +938,9 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, /* update collection and allocation counters */ if (generation+1 < NUM_GENERATIONS) - _PyRuntime.gc.generations[generation+1].count += 1; + generations[generation+1].count += 1; for (i = 0; i <= generation; i++) - _PyRuntime.gc.generations[i].count = 0; + generations[i].count = 0; /* merge younger generations with one we are currently collecting */ for (i = 0; i < generation; i++) { @@ -857,7 +974,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, /* Move reachable objects to next generation. */ if (young != old) { if (generation == NUM_GENERATIONS - 2) { - _PyRuntime.gc.long_lived_pending += gc_list_size(young); + long_lived_pending += gc_list_size(young); } gc_list_merge(young, old); } @@ -865,8 +982,8 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, /* We only untrack dicts in full collections, to avoid quadratic dict build-up. See issue #14775. */ untrack_dicts(young); - _PyRuntime.gc.long_lived_pending = 0; - _PyRuntime.gc.long_lived_total = gc_list_size(young); + long_lived_pending = 0; + long_lived_total = gc_list_size(young); } /* All objects in unreachable are trash, but objects reachable from @@ -886,7 +1003,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, for (gc = unreachable.gc.gc_next; gc != &unreachable; gc = gc->gc.gc_next) { m++; - if (_PyRuntime.gc.debug & DEBUG_COLLECTABLE) { + if (debug & DEBUG_COLLECTABLE) { debug_cycle("collectable", FROM_GC(gc)); } } @@ -915,10 +1032,10 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, gc != &finalizers; gc = gc->gc.gc_next) { n++; - if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE) + if (debug & DEBUG_UNCOLLECTABLE) debug_cycle("uncollectable", FROM_GC(gc)); } - if (_PyRuntime.gc.debug & DEBUG_STATS) { + if (debug & DEBUG_STATS) { _PyTime_t t2 = _PyTime_GetMonotonicClock(); if (m == 0 && n == 0) @@ -981,11 +1098,11 @@ invoke_gc_callback(const char *phase, int generation, PyObject *info = NULL; /* we may get called very early */ - if (_PyRuntime.gc.callbacks == NULL) + if (callbacks == NULL) return; /* The local variable cannot be rebound, check it for sanity */ - assert(_PyRuntime.gc.callbacks != NULL && PyList_CheckExact(_PyRuntime.gc.callbacks)); - if (PyList_GET_SIZE(_PyRuntime.gc.callbacks) != 0) { + assert(callbacks != NULL && PyList_CheckExact(callbacks)); + if (PyList_GET_SIZE(callbacks) != 0) { info = Py_BuildValue("{sisnsn}", "generation", generation, "collected", collected, @@ -995,8 +1112,8 @@ invoke_gc_callback(const char *phase, int generation, return; } } - for (i=0; i<PyList_GET_SIZE(_PyRuntime.gc.callbacks); i++) { - PyObject *r, *cb = PyList_GET_ITEM(_PyRuntime.gc.callbacks, i); + for (i=0; i<PyList_GET_SIZE(callbacks); i++) { + PyObject *r, *cb = PyList_GET_ITEM(callbacks, i); Py_INCREF(cb); /* make sure cb doesn't go away */ r = PyObject_CallFunction(cb, "sO", phase, info); Py_XDECREF(r); @@ -1030,13 +1147,13 @@ collect_generations(void) * exceeds the threshold. Objects in the that generation and * generations younger than it will be collected. */ for (i = NUM_GENERATIONS-1; i >= 0; i--) { - if (_PyRuntime.gc.generations[i].count > _PyRuntime.gc.generations[i].threshold) { + if (generations[i].count > generations[i].threshold) { /* Avoid quadratic performance degradation in number of tracked objects. See comments at the beginning of this file, and issue #4074. */ if (i == NUM_GENERATIONS - 1 - && _PyRuntime.gc.long_lived_pending < _PyRuntime.gc.long_lived_total / 4) + && long_lived_pending < long_lived_total / 4) continue; n = collect_with_callback(i); break; @@ -1057,7 +1174,7 @@ static PyObject * gc_enable_impl(PyObject *module) /*[clinic end generated code: output=45a427e9dce9155c input=81ac4940ca579707]*/ { - _PyRuntime.gc.enabled = 1; + enabled = 1; Py_RETURN_NONE; } @@ -1071,7 +1188,7 @@ static PyObject * gc_disable_impl(PyObject *module) /*[clinic end generated code: output=97d1030f7aa9d279 input=8c2e5a14e800d83b]*/ { - _PyRuntime.gc.enabled = 0; + enabled = 0; Py_RETURN_NONE; } @@ -1085,7 +1202,7 @@ static int gc_isenabled_impl(PyObject *module) /*[clinic end generated code: output=1874298331c49130 input=30005e0422373b31]*/ { - return _PyRuntime.gc.enabled; + return enabled; } /*[clinic input] @@ -1113,12 +1230,12 @@ gc_collect_impl(PyObject *module, int generation) return -1; } - if (_PyRuntime.gc.collecting) + if (collecting) n = 0; /* already collecting, don't do anything */ else { - _PyRuntime.gc.collecting = 1; + collecting = 1; n = collect_with_callback(generation); - _PyRuntime.gc.collecting = 0; + collecting = 0; } return n; @@ -1146,7 +1263,7 @@ static PyObject * gc_set_debug_impl(PyObject *module, int flags) /*[clinic end generated code: output=7c8366575486b228 input=5e5ce15e84fbed15]*/ { - _PyRuntime.gc.debug = flags; + debug = flags; Py_RETURN_NONE; } @@ -1161,7 +1278,7 @@ static int gc_get_debug_impl(PyObject *module) /*[clinic end generated code: output=91242f3506cd1e50 input=91a101e1c3b98366]*/ { - return _PyRuntime.gc.debug; + return debug; } PyDoc_STRVAR(gc_set_thresh__doc__, @@ -1175,13 +1292,13 @@ gc_set_thresh(PyObject *self, PyObject *args) { int i; if (!PyArg_ParseTuple(args, "i|ii:set_threshold", - &_PyRuntime.gc.generations[0].threshold, - &_PyRuntime.gc.generations[1].threshold, - &_PyRuntime.gc.generations[2].threshold)) + &generations[0].threshold, + &generations[1].threshold, + &generations[2].threshold)) return NULL; for (i = 2; i < NUM_GENERATIONS; i++) { /* generations higher than 2 get the same threshold */ - _PyRuntime.gc.generations[i].threshold = _PyRuntime.gc.generations[2].threshold; + generations[i].threshold = generations[2].threshold; } Py_RETURN_NONE; @@ -1198,9 +1315,9 @@ gc_get_threshold_impl(PyObject *module) /*[clinic end generated code: output=7902bc9f41ecbbd8 input=286d79918034d6e6]*/ { return Py_BuildValue("(iii)", - _PyRuntime.gc.generations[0].threshold, - _PyRuntime.gc.generations[1].threshold, - _PyRuntime.gc.generations[2].threshold); + generations[0].threshold, + generations[1].threshold, + generations[2].threshold); } /*[clinic input] @@ -1214,9 +1331,9 @@ gc_get_count_impl(PyObject *module) /*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/ { return Py_BuildValue("(iii)", - _PyRuntime.gc.generations[0].count, - _PyRuntime.gc.generations[1].count, - _PyRuntime.gc.generations[2].count); + generations[0].count, + generations[1].count, + generations[2].count); } static int @@ -1347,7 +1464,7 @@ gc_get_stats_impl(PyObject *module) /* To get consistent values despite allocations while constructing the result list, we use a snapshot of the running stats. */ for (i = 0; i < NUM_GENERATIONS; i++) { - stats[i] = _PyRuntime.gc.generation_stats[i]; + stats[i] = generation_stats[i]; } result = PyList_New(0); @@ -1464,22 +1581,22 @@ PyInit_gc(void) if (m == NULL) return NULL; - if (_PyRuntime.gc.garbage == NULL) { - _PyRuntime.gc.garbage = PyList_New(0); - if (_PyRuntime.gc.garbage == NULL) + if (garbage == NULL) { + garbage = PyList_New(0); + if (garbage == NULL) return NULL; } - Py_INCREF(_PyRuntime.gc.garbage); - if (PyModule_AddObject(m, "garbage", _PyRuntime.gc.garbage) < 0) + Py_INCREF(garbage); + if (PyModule_AddObject(m, "garbage", garbage) < 0) return NULL; - if (_PyRuntime.gc.callbacks == NULL) { - _PyRuntime.gc.callbacks = PyList_New(0); - if (_PyRuntime.gc.callbacks == NULL) + if (callbacks == NULL) { + callbacks = PyList_New(0); + if (callbacks == NULL) return NULL; } - Py_INCREF(_PyRuntime.gc.callbacks); - if (PyModule_AddObject(m, "callbacks", _PyRuntime.gc.callbacks) < 0) + Py_INCREF(callbacks); + if (PyModule_AddObject(m, "callbacks", callbacks) < 0) return NULL; #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return NULL @@ -1498,12 +1615,12 @@ PyGC_Collect(void) { Py_ssize_t n; - if (_PyRuntime.gc.collecting) + if (collecting) n = 0; /* already collecting, don't do anything */ else { - _PyRuntime.gc.collecting = 1; + collecting = 1; n = collect_with_callback(NUM_GENERATIONS - 1); - _PyRuntime.gc.collecting = 0; + collecting = 0; } return n; @@ -1512,7 +1629,7 @@ PyGC_Collect(void) Py_ssize_t _PyGC_CollectIfEnabled(void) { - if (!_PyRuntime.gc.enabled) + if (!enabled) return 0; return PyGC_Collect(); @@ -1529,12 +1646,12 @@ _PyGC_CollectNoFail(void) during interpreter shutdown (and then never finish it). See http://bugs.python.org/issue8713#msg195178 for an example. */ - if (_PyRuntime.gc.collecting) + if (collecting) n = 0; else { - _PyRuntime.gc.collecting = 1; + collecting = 1; n = collect(NUM_GENERATIONS - 1, NULL, NULL, 1); - _PyRuntime.gc.collecting = 0; + collecting = 0; } return n; } @@ -1542,10 +1659,10 @@ _PyGC_CollectNoFail(void) void _PyGC_DumpShutdownStats(void) { - if (!(_PyRuntime.gc.debug & DEBUG_SAVEALL) - && _PyRuntime.gc.garbage != NULL && PyList_GET_SIZE(_PyRuntime.gc.garbage) > 0) { + if (!(debug & DEBUG_SAVEALL) + && garbage != NULL && PyList_GET_SIZE(garbage) > 0) { char *message; - if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE) + if (debug & DEBUG_UNCOLLECTABLE) message = "gc: %zd uncollectable objects at " \ "shutdown"; else @@ -1556,13 +1673,13 @@ _PyGC_DumpShutdownStats(void) already. */ if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0, "gc", NULL, message, - PyList_GET_SIZE(_PyRuntime.gc.garbage))) + PyList_GET_SIZE(garbage))) PyErr_WriteUnraisable(NULL); - if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE) { + if (debug & DEBUG_UNCOLLECTABLE) { PyObject *repr = NULL, *bytes = NULL; - repr = PyObject_Repr(_PyRuntime.gc.garbage); + repr = PyObject_Repr(garbage); if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) - PyErr_WriteUnraisable(_PyRuntime.gc.garbage); + PyErr_WriteUnraisable(garbage); else { PySys_WriteStderr( " %s\n", @@ -1578,7 +1695,7 @@ _PyGC_DumpShutdownStats(void) void _PyGC_Fini(void) { - Py_CLEAR(_PyRuntime.gc.callbacks); + Py_CLEAR(callbacks); } /* for debugging */ @@ -1629,15 +1746,15 @@ _PyObject_GC_Alloc(int use_calloc, size_t basicsize) return PyErr_NoMemory(); g->gc.gc_refs = 0; _PyGCHead_SET_REFS(g, GC_UNTRACKED); - _PyRuntime.gc.generations[0].count++; /* number of allocated GC objects */ - if (_PyRuntime.gc.generations[0].count > _PyRuntime.gc.generations[0].threshold && - _PyRuntime.gc.enabled && - _PyRuntime.gc.generations[0].threshold && - !_PyRuntime.gc.collecting && + generations[0].count++; /* number of allocated GC objects */ + if (generations[0].count > generations[0].threshold && + enabled && + generations[0].threshold && + !collecting && !PyErr_Occurred()) { - _PyRuntime.gc.collecting = 1; + collecting = 1; collect_generations(); - _PyRuntime.gc.collecting = 0; + collecting = 0; } op = FROM_GC(g); return op; @@ -1702,8 +1819,8 @@ PyObject_GC_Del(void *op) PyGC_Head *g = AS_GC(op); if (IS_TRACKED(op)) gc_list_remove(g); - if (_PyRuntime.gc.generations[0].count > 0) { - _PyRuntime.gc.generations[0].count--; + if (generations[0].count > 0) { + generations[0].count--; } PyObject_FREE(g); } |