diff options
author | Brett Cannon <brett@python.org> | 2023-08-29 07:17:25 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-29 07:17:25 (GMT) |
commit | 5f85b443f7119e1c68a15fc9a342655e544d2852 (patch) | |
tree | aaca2e20df1cf5707efa34f5c6e1881016b3f152 /Lib/importlib | |
parent | c780698e9b8416633402657ee2846c0179705e9b (diff) | |
download | cpython-5f85b443f7119e1c68a15fc9a342655e544d2852.zip cpython-5f85b443f7119e1c68a15fc9a342655e544d2852.tar.gz cpython-5f85b443f7119e1c68a15fc9a342655e544d2852.tar.bz2 |
GH-106176, GH-104702: Fix reference leak when importing across multiple threads (#108497)
Diffstat (limited to 'Lib/importlib')
-rw-r--r-- | Lib/importlib/_bootstrap.py | 115 |
1 files changed, 103 insertions, 12 deletions
diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py index c48fd50..ec2e56f 100644 --- a/Lib/importlib/_bootstrap.py +++ b/Lib/importlib/_bootstrap.py @@ -51,18 +51,106 @@ def _new_module(name): # Module-level locking ######################################################## -# A dict mapping module names to weakrefs of _ModuleLock instances -# Dictionary protected by the global import lock +# For a list that can have a weakref to it. +class _List(list): + pass + + +# Copied from weakref.py with some simplifications and modifications unique to +# bootstrapping importlib. Many methods were simply deleting for simplicity, so if they +# are needed in the future they may work if simply copied back in. +class _WeakValueDictionary: + + def __init__(self): + self_weakref = _weakref.ref(self) + + # Inlined to avoid issues with inheriting from _weakref.ref before _weakref is + # set by _setup(). Since there's only one instance of this class, this is + # not expensive. + class KeyedRef(_weakref.ref): + + __slots__ = "key", + + def __new__(type, ob, key): + self = super().__new__(type, ob, type.remove) + self.key = key + return self + + def __init__(self, ob, key): + super().__init__(ob, self.remove) + + @staticmethod + def remove(wr): + nonlocal self_weakref + + self = self_weakref() + if self is not None: + if self._iterating: + self._pending_removals.append(wr.key) + else: + _weakref._remove_dead_weakref(self.data, wr.key) + + self._KeyedRef = KeyedRef + self.clear() + + def clear(self): + self._pending_removals = [] + self._iterating = set() + self.data = {} + + def _commit_removals(self): + pop = self._pending_removals.pop + d = self.data + while True: + try: + key = pop() + except IndexError: + return + _weakref._remove_dead_weakref(d, key) + + def get(self, key, default=None): + if self._pending_removals: + self._commit_removals() + try: + wr = self.data[key] + except KeyError: + return default + else: + if (o := wr()) is None: + return default + else: + return o + + def setdefault(self, key, default=None): + try: + o = self.data[key]() + except KeyError: + o = None + if o is None: + if self._pending_removals: + self._commit_removals() + self.data[key] = self._KeyedRef(default, key) + return default + else: + return o + + +# A dict mapping module names to weakrefs of _ModuleLock instances. +# Dictionary protected by the global import lock. _module_locks = {} -# A dict mapping thread IDs to lists of _ModuleLock instances. This maps a -# thread to the module locks it is blocking on acquiring. The values are -# lists because a single thread could perform a re-entrant import and be "in -# the process" of blocking on locks for more than one module. A thread can -# be "in the process" because a thread cannot actually block on acquiring -# more than one lock but it can have set up bookkeeping that reflects that -# it intends to block on acquiring more than one lock. -_blocking_on = {} +# A dict mapping thread IDs to weakref'ed lists of _ModuleLock instances. +# This maps a thread to the module locks it is blocking on acquiring. The +# values are lists because a single thread could perform a re-entrant import +# and be "in the process" of blocking on locks for more than one module. A +# thread can be "in the process" because a thread cannot actually block on +# acquiring more than one lock but it can have set up bookkeeping that reflects +# that it intends to block on acquiring more than one lock. +# +# The dictionary uses a WeakValueDictionary to avoid keeping unnecessary +# lists around, regardless of GC runs. This way there's no memory leak if +# the list is no longer needed (GH-106176). +_blocking_on = None class _BlockingOnManager: @@ -79,7 +167,7 @@ class _BlockingOnManager: # re-entrant (i.e., a single thread may take it more than once) so it # wouldn't help us be correct in the face of re-entrancy either. - self.blocked_on = _blocking_on.setdefault(self.thread_id, []) + self.blocked_on = _blocking_on.setdefault(self.thread_id, _List()) self.blocked_on.append(self.lock) def __exit__(self, *args, **kwargs): @@ -1409,7 +1497,7 @@ def _setup(sys_module, _imp_module): modules, those two modules must be explicitly passed in. """ - global _imp, sys + global _imp, sys, _blocking_on _imp = _imp_module sys = sys_module @@ -1437,6 +1525,9 @@ def _setup(sys_module, _imp_module): builtin_module = sys.modules[builtin_name] setattr(self_module, builtin_name, builtin_module) + # Instantiation requires _weakref to have been set. + _blocking_on = _WeakValueDictionary() + def _install(sys_module, _imp_module): """Install importers for builtin and frozen modules""" |