From dcedaf6e53fcba48aa8185d0dc27d832da2615aa Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 31 Jul 2013 23:14:08 +0200 Subject: Issue #18214: Improve finalization of Python modules to avoid setting their globals to None, in most cases. --- Lib/rlcompleter.py | 6 ++ Lib/site.py | 39 +++++++++--- Lib/test/final_a.py | 19 ++++++ Lib/test/final_b.py | 19 ++++++ Lib/test/test_module.py | 15 ++++- Lib/test/test_sys.py | 2 +- Misc/NEWS | 3 + Objects/moduleobject.c | 29 ++++++--- Python/import.c | 159 +++++++++++++++++++++--------------------------- 9 files changed, 180 insertions(+), 111 deletions(-) create mode 100644 Lib/test/final_a.py create mode 100644 Lib/test/final_b.py diff --git a/Lib/rlcompleter.py b/Lib/rlcompleter.py index d3a4437..8775730 100644 --- a/Lib/rlcompleter.py +++ b/Lib/rlcompleter.py @@ -29,6 +29,7 @@ Notes: """ +import atexit import builtins import __main__ @@ -158,3 +159,8 @@ except ImportError: pass else: readline.set_completer(Completer().complete) + # Release references early at shutdown (the readline module's + # contents are quasi-immortal, and the completer function holds a + # reference to globals). + atexit.register(lambda: readline.set_completer(None)) + diff --git a/Lib/site.py b/Lib/site.py index 96a4bef..08b98eb 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -68,6 +68,7 @@ site-specific customizations. If this import fails with an ImportError exception, it is silently ignored. """ +import atexit import sys import os import re @@ -86,6 +87,25 @@ USER_SITE = None USER_BASE = None +_no_builtin = object() + +def _patch_builtins(**items): + # When patching builtins, we make some objects almost immortal + # (builtins are only reclaimed at the very end of the interpreter + # shutdown sequence). To avoid keeping to many references alive, + # we register callbacks to undo our builtins additions. + old_items = {k: getattr(builtins, k, _no_builtin) for k in items} + def unpatch(old_items=old_items): + for k, v in old_items.items(): + if v is _no_builtin: + delattr(builtins, k) + else: + setattr(builtins, k, v) + for k, v in items.items(): + setattr(builtins, k, v) + atexit.register(unpatch) + + def makepath(*paths): dir = os.path.join(*paths) try: @@ -357,8 +377,7 @@ def setquit(): except: pass raise SystemExit(code) - builtins.quit = Quitter('quit') - builtins.exit = Quitter('exit') + _patch_builtins(quit=Quitter('quit'), exit=Quitter('exit')) class _Printer(object): @@ -423,20 +442,20 @@ class _Printer(object): def setcopyright(): """Set 'copyright' and 'credits' in builtins""" - builtins.copyright = _Printer("copyright", sys.copyright) + _patch_builtins(copyright=_Printer("copyright", sys.copyright)) if sys.platform[:4] == 'java': - builtins.credits = _Printer( + _patch_builtins(credits=_Printer( "credits", - "Jython is maintained by the Jython developers (www.jython.org).") + "Jython is maintained by the Jython developers (www.jython.org).")) else: - builtins.credits = _Printer("credits", """\ + _patch_builtins(credits=_Printer("credits", """\ Thanks to CWI, CNRI, BeOpen.com, Zope Corporation and a cast of thousands - for supporting Python development. See www.python.org for more information.""") + for supporting Python development. See www.python.org for more information.""")) here = os.path.dirname(os.__file__) - builtins.license = _Printer( + _patch_builtins(license=_Printer( "license", "See http://www.python.org/%.3s/license.html" % sys.version, ["LICENSE.txt", "LICENSE"], - [os.path.join(here, os.pardir), here, os.curdir]) + [os.path.join(here, os.pardir), here, os.curdir])) class _Helper(object): @@ -453,7 +472,7 @@ class _Helper(object): return pydoc.help(*args, **kwds) def sethelper(): - builtins.help = _Helper() + _patch_builtins(help=_Helper()) def enablerlcompleter(): """Enable default readline configuration on interactive prompts, by diff --git a/Lib/test/final_a.py b/Lib/test/final_a.py new file mode 100644 index 0000000..390ee88 --- /dev/null +++ b/Lib/test/final_a.py @@ -0,0 +1,19 @@ +""" +Fodder for module finalization tests in test_module. +""" + +import shutil +import test.final_b + +x = 'a' + +class C: + def __del__(self): + # Inspect module globals and builtins + print("x =", x) + print("final_b.x =", test.final_b.x) + print("shutil.rmtree =", getattr(shutil.rmtree, '__name__', None)) + print("len =", getattr(len, '__name__', None)) + +c = C() +_underscored = C() diff --git a/Lib/test/final_b.py b/Lib/test/final_b.py new file mode 100644 index 0000000..7228d82 --- /dev/null +++ b/Lib/test/final_b.py @@ -0,0 +1,19 @@ +""" +Fodder for module finalization tests in test_module. +""" + +import shutil +import test.final_a + +x = 'b' + +class C: + def __del__(self): + # Inspect module globals and builtins + print("x =", x) + print("final_a.x =", test.final_a.x) + print("shutil.rmtree =", getattr(shutil.rmtree, '__name__', None)) + print("len =", getattr(len, '__name__', None)) + +c = C() +_underscored = C() diff --git a/Lib/test/test_module.py b/Lib/test/test_module.py index b34b30f..3000cec 100644 --- a/Lib/test/test_module.py +++ b/Lib/test/test_module.py @@ -1,6 +1,7 @@ # Test the module type import unittest from test.support import run_unittest, gc_collect +from test.script_helper import assert_python_ok import sys ModuleType = type(sys) @@ -70,7 +71,6 @@ class ModuleTests(unittest.TestCase): "__loader__": None, "__package__": None}) self.assertTrue(foo.__dict__ is d) - @unittest.expectedFailure def test_dont_clear_dict(self): # See issue 7140. def f(): @@ -181,6 +181,19 @@ a = A(destroyed)""" self.assertEqual(r[:25], "") + def test_module_finalization_at_shutdown(self): + # Module globals and builtins should still be available during shutdown + rc, out, err = assert_python_ok("-c", "from test import final_a") + self.assertFalse(err) + lines = out.splitlines() + self.assertEqual(set(lines), { + b"x = a", + b"x = b", + b"final_a.x = a", + b"final_b.x = b", + b"len = len", + b"shutil.rmtree = rmtree"}) + # frozen and namespace module reprs are tested in importlib. diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 3f093e6..26c7ae7 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -810,7 +810,7 @@ class SizeofTest(unittest.TestCase): # memoryview check(memoryview(b''), size('Pnin 2P2n2i5P 3cPn')) # module - check(unittest, size('PnP')) + check(unittest, size('PnPPP')) # None check(None, size('')) # NotImplementedType diff --git a/Misc/NEWS b/Misc/NEWS index e5fd939..df075b0 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.4.0 Alpha 1? Core and Builtins ----------------- +- Issue #18214: Improve finalization of Python modules to avoid setting + their globals to None, in most cases. + - Issue #18112: PEP 442 implementation (safe object finalization). - Issue #18552: Check return value of PyArena_AddPyObject() in diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index 5970901..3ea3be8 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -11,6 +11,8 @@ typedef struct { PyObject *md_dict; struct PyModuleDef *md_def; void *md_state; + PyObject *md_weaklist; + PyObject *md_name; /* for logging purposes after md_dict is cleared */ } PyModuleObject; static PyMemberDef module_members[] = { @@ -27,7 +29,8 @@ static PyTypeObject moduledef_type = { static int -module_init_dict(PyObject *md_dict, PyObject *name, PyObject *doc) +module_init_dict(PyModuleObject *mod, PyObject *md_dict, + PyObject *name, PyObject *doc) { if (md_dict == NULL) return -1; @@ -42,6 +45,11 @@ module_init_dict(PyObject *md_dict, PyObject *name, PyObject *doc) return -1; if (PyDict_SetItemString(md_dict, "__loader__", Py_None) != 0) return -1; + if (PyUnicode_CheckExact(name)) { + Py_INCREF(name); + Py_XDECREF(mod->md_name); + mod->md_name = name; + } return 0; } @@ -56,8 +64,10 @@ PyModule_NewObject(PyObject *name) return NULL; m->md_def = NULL; m->md_state = NULL; + m->md_weaklist = NULL; + m->md_name = NULL; m->md_dict = PyDict_New(); - if (module_init_dict(m->md_dict, name, NULL) != 0) + if (module_init_dict(m, m->md_dict, name, NULL) != 0) goto fail; PyObject_GC_Track(m); return (PyObject *)m; @@ -362,7 +372,7 @@ module_init(PyModuleObject *m, PyObject *args, PyObject *kwds) return -1; m->md_dict = dict; } - if (module_init_dict(dict, name, doc) < 0) + if (module_init_dict(m, dict, name, doc) < 0) return -1; return 0; } @@ -371,12 +381,15 @@ static void module_dealloc(PyModuleObject *m) { PyObject_GC_UnTrack(m); + if (Py_VerboseFlag && m->md_name) { + PySys_FormatStderr("# destroy %S\n", m->md_name); + } + if (m->md_weaklist != NULL) + PyObject_ClearWeakRefs((PyObject *) m); if (m->md_def && m->md_def->m_free) m->md_def->m_free(m); - if (m->md_dict != NULL) { - _PyModule_Clear((PyObject *)m); - Py_DECREF(m->md_dict); - } + Py_XDECREF(m->md_dict); + Py_XDECREF(m->md_name); if (m->md_state != NULL) PyMem_FREE(m->md_state); Py_TYPE(m)->tp_free((PyObject *)m); @@ -522,7 +535,7 @@ PyTypeObject PyModule_Type = { (traverseproc)module_traverse, /* tp_traverse */ (inquiry)module_clear, /* tp_clear */ 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ + offsetof(PyModuleObject, md_weaklist), /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ module_methods, /* tp_methods */ diff --git a/Python/import.c b/Python/import.c index c6222ec..0910655 100644 --- a/Python/import.c +++ b/Python/import.c @@ -277,6 +277,7 @@ static char* sys_deletes[] = { "path", "argv", "ps1", "ps2", "last_type", "last_value", "last_traceback", "path_hooks", "path_importer_cache", "meta_path", + "__interactivehook__", /* misc stuff */ "flags", "float_info", NULL @@ -289,40 +290,17 @@ static char* sys_files[] = { NULL }; -static int -is_essential_module(PyObject *name) -{ - Py_ssize_t name_len; - char *name_str = PyUnicode_AsUTF8AndSize(name, &name_len); - - if (name_str == NULL) { - PyErr_Clear(); - return 0; - } - if (strcmp(name_str, "builtins") == 0) - return 1; - if (strcmp(name_str, "sys") == 0) - return 1; - /* These are all needed for stderr to still function */ - if (strcmp(name_str, "codecs") == 0) - return 1; - if (strcmp(name_str, "_codecs") == 0) - return 1; - if (strncmp(name_str, "encodings.", 10) == 0) - return 1; - return 0; -} - - /* Un-initialize things, as good as we can */ void PyImport_Cleanup(void) { - Py_ssize_t pos, ndone; + Py_ssize_t pos; PyObject *key, *value, *dict; PyInterpreterState *interp = PyThreadState_GET()->interp; PyObject *modules = interp->modules; + PyObject *builtins = interp->builtins; + PyObject *weaklist = NULL; if (modules == NULL) return; /* Already done */ @@ -333,6 +311,8 @@ PyImport_Cleanup(void) deleted *last* of all, they would come too late in the normal destruction order. Sigh. */ + /* XXX Perhaps these precautions are obsolete. Who knows? */ + value = PyDict_GetItemString(modules, "builtins"); if (value != NULL && PyModule_Check(value)) { dict = PyModule_GetDict(value); @@ -360,87 +340,84 @@ PyImport_Cleanup(void) } } - /* First, delete __main__ */ - value = PyDict_GetItemString(modules, "__main__"); - if (value != NULL && PyModule_Check(value)) { - if (Py_VerboseFlag) - PySys_WriteStderr("# cleanup __main__\n"); - _PyModule_Clear(value); - PyDict_SetItemString(modules, "__main__", Py_None); - } - - /* The special treatment of "builtins" here is because even - when it's not referenced as a module, its dictionary is - referenced by almost every module's __builtins__. Since - deleting a module clears its dictionary (even if there are - references left to it), we need to delete the "builtins" - module last. Likewise, we don't delete sys until the very - end because it is implicitly referenced (e.g. by print). - - Also note that we 'delete' modules by replacing their entry - in the modules dict with None, rather than really deleting - them; this avoids a rehash of the modules dictionary and - also marks them as "non existent" so they won't be - re-imported. */ - - /* Next, repeatedly delete modules with a reference count of - one (skipping builtins and sys) and delete them */ - do { - ndone = 0; - pos = 0; - while (PyDict_Next(modules, &pos, &key, &value)) { - if (value->ob_refcnt != 1) - continue; - if (PyUnicode_Check(key) && PyModule_Check(value)) { - if (is_essential_module(key)) - continue; - if (Py_VerboseFlag) - PySys_FormatStderr( - "# cleanup[1] %U\n", key); - _PyModule_Clear(value); - PyDict_SetItem(modules, key, Py_None); - ndone++; - } - } - } while (ndone > 0); - - /* Next, delete all modules (still skipping builtins and sys) */ + /* We prepare a list which will receive (name, weakref) tuples of + modules when they are removed from sys.modules. The name is used + for diagnosis messages (in verbose mode), while the weakref helps + detect those modules which have been held alive. */ + weaklist = PyList_New(0); + +#define STORE_MODULE_WEAKREF(mod) \ + if (weaklist != NULL) { \ + PyObject *name = PyModule_GetNameObject(mod); \ + PyObject *wr = PyWeakref_NewRef(mod, NULL); \ + if (name && wr) { \ + PyObject *tup = PyTuple_Pack(2, name, wr); \ + PyList_Append(weaklist, tup); \ + Py_XDECREF(tup); \ + } \ + Py_XDECREF(name); \ + Py_XDECREF(wr); \ + if (PyErr_Occurred()) \ + PyErr_Clear(); \ + } + + /* Remove all modules from sys.modules, hoping that garbage collection + can reclaim most of them. */ pos = 0; while (PyDict_Next(modules, &pos, &key, &value)) { - if (PyUnicode_Check(key) && PyModule_Check(value)) { - if (is_essential_module(key)) - continue; - if (Py_VerboseFlag) - PySys_FormatStderr("# cleanup[2] %U\n", key); - _PyModule_Clear(value); + if (PyModule_Check(value)) { + if (Py_VerboseFlag && PyUnicode_Check(key)) + PySys_FormatStderr("# cleanup[2] removing %U\n", key, value); + STORE_MODULE_WEAKREF(value); PyDict_SetItem(modules, key, Py_None); } } - /* Collect garbage remaining after deleting the modules. Mostly - reference cycles created by classes. */ - PyGC_Collect(); - + /* Clear the modules dict. */ + PyDict_Clear(modules); + /* Replace the interpreter's reference to builtins with an empty dict + (module globals still have a reference to the original builtins). */ + builtins = interp->builtins; + interp->builtins = PyDict_New(); + Py_DECREF(builtins); + /* Collect references */ + _PyGC_CollectNoFail(); /* Dump GC stats before it's too late, since it uses the warnings machinery. */ _PyGC_DumpShutdownStats(); - /* Next, delete all remaining modules */ - pos = 0; - while (PyDict_Next(modules, &pos, &key, &value)) { - if (PyUnicode_Check(key) && PyModule_Check(value)) { + /* Now, if there are any modules left alive, clear their globals to + minimize potential leaks. All C extension modules actually end + up here, since they are kept alive in the interpreter state. */ + if (weaklist != NULL) { + Py_ssize_t i, n; + n = PyList_GET_SIZE(weaklist); + for (i = 0; i < n; i++) { + PyObject *tup = PyList_GET_ITEM(weaklist, i); + PyObject *mod = PyWeakref_GET_OBJECT(PyTuple_GET_ITEM(tup, 1)); + if (mod == Py_None) + continue; + Py_INCREF(mod); + assert(PyModule_Check(mod)); if (Py_VerboseFlag) - PySys_FormatStderr("# cleanup[3] %U\n", key); - _PyModule_Clear(value); - PyDict_SetItem(modules, key, Py_None); + PySys_FormatStderr("# cleanup[3] wiping %U\n", + PyTuple_GET_ITEM(tup, 0), mod); + _PyModule_Clear(mod); + Py_DECREF(mod); } + Py_DECREF(weaklist); } - /* Finally, clear and delete the modules directory */ - PyDict_Clear(modules); - _PyGC_CollectNoFail(); + /* Clear and delete the modules directory. Actual modules will + still be there only if imported during the execution of some + destructor. */ interp->modules = NULL; Py_DECREF(modules); + + /* Once more */ + _PyGC_CollectNoFail(); + +#undef STORE_MODULE_WEAKREF } -- cgit v0.12