From 69c635266ec20945142d6fb3beb2555769fed1ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Sun, 15 Apr 2012 11:41:32 +0000 Subject: Issue #10576: Add a progress callback to gcmodule --- Doc/library/gc.rst | 39 ++++++++++++++- Lib/test/test_gc.py | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++- Misc/NEWS | 3 ++ Modules/gcmodule.c | 80 ++++++++++++++++++++++++++++--- 4 files changed, 249 insertions(+), 9 deletions(-) diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst index 0281bb7..da78aa4 100644 --- a/Doc/library/gc.rst +++ b/Doc/library/gc.rst @@ -153,8 +153,8 @@ The :mod:`gc` module provides the following functions: .. versionadded:: 3.1 -The following variable is provided for read-only access (you can mutate its -value but should not rebind it): +The following variables are provided for read-only access (you can mutate the +values but should not rebind them): .. data:: garbage @@ -183,6 +183,41 @@ value but should not rebind it): :const:`DEBUG_UNCOLLECTABLE` is set, in addition all uncollectable objects are printed. +.. data:: callbacks + + A list of callbacks that will be invoked by the garbage collector before and + after collection. The callbacks will be called with two arguments, + :arg:`phase` and :arg:`info`. + + :arg:`phase` can one of two values: + + "start": The garbage collection is about to start. + + "stop": The garbage collection has finished. + + :arg:`info` provides more information for the callback. The following + keys are currently defined: + + "generation": The oldest generation being collected. + + "collected": When :arg:`phase` is "stop", the number of objects + successfully collected. + + "uncollectable": when :arg:`phase` is "stop", the number of objects + that could not be collected and were put in :data:`garbage`. + + Applications can add their own callbacks to this list. The primary + use cases are: + + Gathering statistics about garbage collection, such as how often + various generations are collected, and how long the collection + takes. + + Allowing applications to identify and clear their own uncollectable + types when they appear in :data:`garbage`. + + .. versionadded:: 3.3 + The following constants are provided for use with :func:`set_debug`: diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 19313db..caf5a3d 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -32,6 +32,20 @@ class GC_Detector(object): # gc collects it. self.wr = weakref.ref(C1055820(666), it_happened) +class Uncollectable(object): + """Create a reference cycle with multiple __del__ methods. + + An object in a reference cycle will never have zero references, + and so must be garbage collected. If one or more objects in the + cycle have __del__ methods, the gc refuses to guess an order, + and leaves the cycle uncollected.""" + def __init__(self, partner=None): + if partner is None: + self.partner = Uncollectable(partner=self) + else: + self.partner = partner + def __del__(self): + pass ### Tests ############################################################################### @@ -528,6 +542,126 @@ class GCTests(unittest.TestCase): self.assertNotIn(b"uncollectable objects at shutdown", stderr) +class GCCallbackTests(unittest.TestCase): + def setUp(self): + # Save gc state and disable it. + self.enabled = gc.isenabled() + gc.disable() + self.debug = gc.get_debug() + gc.set_debug(0) + gc.callbacks.append(self.cb1) + gc.callbacks.append(self.cb2) + + def tearDown(self): + # Restore gc state + del self.visit + gc.callbacks.remove(self.cb1) + gc.callbacks.remove(self.cb2) + gc.set_debug(self.debug) + if self.enabled: + gc.enable() + # destroy any uncollectables + gc.collect() + for obj in gc.garbage: + if isinstance(obj, Uncollectable): + obj.partner = None + del gc.garbage[:] + gc.collect() + + othergarbage = [] + def preclean(self): + # Remove all fluff from the system. Invoke this function + # manually rather than through self.setUp() for maximum + # safety. + self.visit = [] + gc.collect() + garbage, gc.garbage[:] = gc.garbage[:], [] + self.othergarbage.append(garbage) + self.visit = [] + + def cb1(self, phase, info): + self.visit.append((1, phase, dict(info))) + + def cb2(self, phase, info): + self.visit.append((2, phase, dict(info))) + if phase == "stop" and hasattr(self, "cleanup"): + # Clean Uncollectable from garbage + uc = [e for e in gc.garbage if isinstance(e, Uncollectable)] + gc.garbage[:] = [e for e in gc.garbage + if not isinstance(e, Uncollectable)] + for e in uc: + e.partner = None + + def testCollect(self): + self.preclean() + gc.collect() + # Algorithmically verify the contents of self.visit + # because it is long and tortuous. + + # Count the number of visits to each callback + n = [v[0] for v in self.visit] + n1 = [i for i in n if i == 1] + n2 = [i for i in n if i == 2] + self.assertEqual(n1, [1]*2) + self.assertEqual(n2, [2]*2) + + # Count that we got the right number of start and stop callbacks. + n = [v[1] for v in self.visit] + n1 = [i for i in n if i == "start"] + n2 = [i for i in n if i == "stop"] + self.assertEqual(n1, ["start"]*2) + self.assertEqual(n2, ["stop"]*2) + + # Check that we got the right info dict for all callbacks + for v in self.visit: + info = v[2] + self.assertTrue("generation" in info) + self.assertTrue("collected" in info) + self.assertTrue("uncollectable" in info) + + def testCollectGen(self): + self.preclean() + gc.collect(2) + for v in self.visit: + info = v[2] + self.assertEqual(info["generation"], 2) + + def testCollectGarbage(self): + self.preclean() + # Each of these cause four objects to be garbage: Two + # Uncolectables and their instance dicts. + Uncollectable() + Uncollectable() + C1055820(666) + gc.collect() + for v in self.visit: + if v[1] != "stop": + continue + info = v[2] + self.assertEqual(info["collected"], 2) + self.assertEqual(info["uncollectable"], 8) + + # We should now have the Uncollectables in gc.garbage + self.assertEqual(len(gc.garbage), 4) + for e in gc.garbage: + self.assertIsInstance(e, Uncollectable) + + # Now, let our callback handle the Uncollectable instances + self.cleanup=True + self.visit = [] + gc.garbage[:] = [] + gc.collect() + for v in self.visit: + if v[1] != "stop": + continue + info = v[2] + self.assertEqual(info["collected"], 0) + self.assertEqual(info["uncollectable"], 4) + + # Uncollectables should be gone + self.assertEqual(len(gc.garbage), 0) + + class GCTogglingTests(unittest.TestCase): def setUp(self): gc.enable() @@ -681,7 +815,7 @@ def test_main(): try: gc.collect() # Delete 2nd generation garbage - run_unittest(GCTests, GCTogglingTests) + run_unittest(GCTests, GCTogglingTests, GCCallbackTests) finally: gc.set_debug(debug) # test gc.enable() even if GC is disabled by default diff --git a/Misc/NEWS b/Misc/NEWS index 467762e..b8202c4 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -275,6 +275,9 @@ Library - Issue #14310: Sockets can now be with other processes on Windows using the api socket.socket.share() and socket.fromshare(). +- Issue #10576: The gc module now has a 'callbacks' member that will get + called when garbage collection takes place. + Build ----- diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index d8893d1..77c5c6e 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -65,14 +65,17 @@ static PyObject *garbage = NULL; /* Python string to use if unhandled exception occurs */ static PyObject *gc_str = NULL; -/* This is the number of objects who survived the last full collection. It +/* a list of callbacks to be invoked when collection is performed */ +static PyObject *callbacks = NULL; + +/* This is the number of objects that survived the last full collection. It approximates the number of long lived objects tracked by the GC. (by "full collection", we mean a collection of the oldest generation). */ static Py_ssize_t long_lived_total = 0; -/* This is the number of objects who survived all "non-full" collections, +/* This is the number of objects that survived all "non-full" collections, and are awaiting to undergo a full collection for the first time. */ @@ -787,7 +790,7 @@ get_time(void) /* This is the main function. Read this to understand how the * collection process works. */ static Py_ssize_t -collect(int generation) +collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable) { int i; Py_ssize_t m = 0; /* # objects collected */ @@ -935,9 +938,64 @@ collect(int generation) PyErr_WriteUnraisable(gc_str); Py_FatalError("unexpected exception during garbage collection"); } + + if (n_collected) + *n_collected = m; + if (n_uncollectable) + *n_uncollectable = n; return n+m; } +/* Invoke progress callbacks to notify clients that garbage collection + * is starting or stopping + */ +static void +invoke_gc_callback(const char *phase, int generation, + Py_ssize_t collected, Py_ssize_t uncollectable) +{ + Py_ssize_t i; + PyObject *info = NULL; + + /* we may get called very early */ + if (callbacks == NULL) + return; + /* The local variable cannot be rebound, check it for sanity */ + assert(callbacks != NULL && PyList_CheckExact(callbacks)); + if (PyList_GET_SIZE(callbacks) != 0) { + info = Py_BuildValue("{sisnsn}", + "generation", generation, + "collected", collected, + "uncollectable", uncollectable); + if (info == NULL) { + PyErr_WriteUnraisable(NULL); + return; + } + } + for (i=0; i is @@ -1352,7 +1419,7 @@ PyGC_Collect(void) n = 0; /* already collecting, don't do anything */ else { collecting = 1; - n = collect(NUM_GENERATIONS - 1); + n = collect_with_callback(NUM_GENERATIONS - 1); collecting = 0; } @@ -1389,6 +1456,7 @@ _PyGC_Fini(void) Py_XDECREF(bytes); } } + Py_CLEAR(callbacks); } /* for debugging */ -- cgit v0.12