summaryrefslogtreecommitdiffstats
path: root/Lib/importlib
diff options
context:
space:
mode:
authorBrett Cannon <brett@python.org>2023-08-29 07:17:25 (GMT)
committerGitHub <noreply@github.com>2023-08-29 07:17:25 (GMT)
commit5f85b443f7119e1c68a15fc9a342655e544d2852 (patch)
treeaaca2e20df1cf5707efa34f5c6e1881016b3f152 /Lib/importlib
parentc780698e9b8416633402657ee2846c0179705e9b (diff)
downloadcpython-5f85b443f7119e1c68a15fc9a342655e544d2852.zip
cpython-5f85b443f7119e1c68a15fc9a342655e544d2852.tar.gz
cpython-5f85b443f7119e1c68a15fc9a342655e544d2852.tar.bz2
GH-106176, GH-104702: Fix reference leak when importing across multiple threads (#108497)
Diffstat (limited to 'Lib/importlib')
-rw-r--r--Lib/importlib/_bootstrap.py115
1 files changed, 103 insertions, 12 deletions
diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py
index c48fd50..ec2e56f 100644
--- a/Lib/importlib/_bootstrap.py
+++ b/Lib/importlib/_bootstrap.py
@@ -51,18 +51,106 @@ def _new_module(name):
# Module-level locking ########################################################
-# A dict mapping module names to weakrefs of _ModuleLock instances
-# Dictionary protected by the global import lock
+# For a list that can have a weakref to it.
+class _List(list):
+ pass
+
+
+# Copied from weakref.py with some simplifications and modifications unique to
+# bootstrapping importlib. Many methods were simply deleting for simplicity, so if they
+# are needed in the future they may work if simply copied back in.
+class _WeakValueDictionary:
+
+ def __init__(self):
+ self_weakref = _weakref.ref(self)
+
+ # Inlined to avoid issues with inheriting from _weakref.ref before _weakref is
+ # set by _setup(). Since there's only one instance of this class, this is
+ # not expensive.
+ class KeyedRef(_weakref.ref):
+
+ __slots__ = "key",
+
+ def __new__(type, ob, key):
+ self = super().__new__(type, ob, type.remove)
+ self.key = key
+ return self
+
+ def __init__(self, ob, key):
+ super().__init__(ob, self.remove)
+
+ @staticmethod
+ def remove(wr):
+ nonlocal self_weakref
+
+ self = self_weakref()
+ if self is not None:
+ if self._iterating:
+ self._pending_removals.append(wr.key)
+ else:
+ _weakref._remove_dead_weakref(self.data, wr.key)
+
+ self._KeyedRef = KeyedRef
+ self.clear()
+
+ def clear(self):
+ self._pending_removals = []
+ self._iterating = set()
+ self.data = {}
+
+ def _commit_removals(self):
+ pop = self._pending_removals.pop
+ d = self.data
+ while True:
+ try:
+ key = pop()
+ except IndexError:
+ return
+ _weakref._remove_dead_weakref(d, key)
+
+ def get(self, key, default=None):
+ if self._pending_removals:
+ self._commit_removals()
+ try:
+ wr = self.data[key]
+ except KeyError:
+ return default
+ else:
+ if (o := wr()) is None:
+ return default
+ else:
+ return o
+
+ def setdefault(self, key, default=None):
+ try:
+ o = self.data[key]()
+ except KeyError:
+ o = None
+ if o is None:
+ if self._pending_removals:
+ self._commit_removals()
+ self.data[key] = self._KeyedRef(default, key)
+ return default
+ else:
+ return o
+
+
+# A dict mapping module names to weakrefs of _ModuleLock instances.
+# Dictionary protected by the global import lock.
_module_locks = {}
-# A dict mapping thread IDs to lists of _ModuleLock instances. This maps a
-# thread to the module locks it is blocking on acquiring. The values are
-# lists because a single thread could perform a re-entrant import and be "in
-# the process" of blocking on locks for more than one module. A thread can
-# be "in the process" because a thread cannot actually block on acquiring
-# more than one lock but it can have set up bookkeeping that reflects that
-# it intends to block on acquiring more than one lock.
-_blocking_on = {}
+# A dict mapping thread IDs to weakref'ed lists of _ModuleLock instances.
+# This maps a thread to the module locks it is blocking on acquiring. The
+# values are lists because a single thread could perform a re-entrant import
+# and be "in the process" of blocking on locks for more than one module. A
+# thread can be "in the process" because a thread cannot actually block on
+# acquiring more than one lock but it can have set up bookkeeping that reflects
+# that it intends to block on acquiring more than one lock.
+#
+# The dictionary uses a WeakValueDictionary to avoid keeping unnecessary
+# lists around, regardless of GC runs. This way there's no memory leak if
+# the list is no longer needed (GH-106176).
+_blocking_on = None
class _BlockingOnManager:
@@ -79,7 +167,7 @@ class _BlockingOnManager:
# re-entrant (i.e., a single thread may take it more than once) so it
# wouldn't help us be correct in the face of re-entrancy either.
- self.blocked_on = _blocking_on.setdefault(self.thread_id, [])
+ self.blocked_on = _blocking_on.setdefault(self.thread_id, _List())
self.blocked_on.append(self.lock)
def __exit__(self, *args, **kwargs):
@@ -1409,7 +1497,7 @@ def _setup(sys_module, _imp_module):
modules, those two modules must be explicitly passed in.
"""
- global _imp, sys
+ global _imp, sys, _blocking_on
_imp = _imp_module
sys = sys_module
@@ -1437,6 +1525,9 @@ def _setup(sys_module, _imp_module):
builtin_module = sys.modules[builtin_name]
setattr(self_module, builtin_name, builtin_module)
+ # Instantiation requires _weakref to have been set.
+ _blocking_on = _WeakValueDictionary()
+
def _install(sys_module, _imp_module):
"""Install importers for builtin and frozen modules"""