diff options
author | Chris Markiewicz <effigies@gmail.com> | 2024-02-24 00:02:16 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-24 00:02:16 (GMT) |
commit | 200271c61db44d90759f8a8934949aefd72d5724 (patch) | |
tree | 9fddf73bc8d17f6d71c8ac26591911c98a445a9c /Lib/importlib | |
parent | ef6074b352a95706f44a592ffe31baace690cc1c (diff) | |
download | cpython-200271c61db44d90759f8a8934949aefd72d5724.zip cpython-200271c61db44d90759f8a8934949aefd72d5724.tar.gz cpython-200271c61db44d90759f8a8934949aefd72d5724.tar.bz2 |
gh-114763: Protect lazy loading modules from attribute access races (GH-114781)
Setting the __class__ attribute of a lazy-loading module to ModuleType enables other threads to attempt to access attributes before the loading is complete. Now that is protected by a lock.
Diffstat (limited to 'Lib/importlib')
-rw-r--r-- | Lib/importlib/util.py | 81 |
1 files changed, 51 insertions, 30 deletions
diff --git a/Lib/importlib/util.py b/Lib/importlib/util.py index 3ad71d3..ff4f12f 100644 --- a/Lib/importlib/util.py +++ b/Lib/importlib/util.py @@ -13,6 +13,7 @@ from ._bootstrap_external import spec_from_file_location import _imp import sys +import threading import types @@ -171,36 +172,54 @@ class _LazyModule(types.ModuleType): def __getattribute__(self, attr): """Trigger the load of the module and return the attribute.""" - # All module metadata must be garnered from __spec__ in order to avoid - # using mutated values. - # Stop triggering this method. - self.__class__ = types.ModuleType - # Get the original name to make sure no object substitution occurred - # in sys.modules. - original_name = self.__spec__.name - # Figure out exactly what attributes were mutated between the creation - # of the module and now. - attrs_then = self.__spec__.loader_state['__dict__'] - attrs_now = self.__dict__ - attrs_updated = {} - for key, value in attrs_now.items(): - # Code that set the attribute may have kept a reference to the - # assigned object, making identity more important than equality. - if key not in attrs_then: - attrs_updated[key] = value - elif id(attrs_now[key]) != id(attrs_then[key]): - attrs_updated[key] = value - self.__spec__.loader.exec_module(self) - # If exec_module() was used directly there is no guarantee the module - # object was put into sys.modules. - if original_name in sys.modules: - if id(self) != id(sys.modules[original_name]): - raise ValueError(f"module object for {original_name!r} " - "substituted in sys.modules during a lazy " - "load") - # Update after loading since that's what would happen in an eager - # loading situation. - self.__dict__.update(attrs_updated) + __spec__ = object.__getattribute__(self, '__spec__') + loader_state = __spec__.loader_state + with loader_state['lock']: + # Only the first thread to get the lock should trigger the load + # and reset the module's class. The rest can now getattr(). + if object.__getattribute__(self, '__class__') is _LazyModule: + # The first thread comes here multiple times as it descends the + # call stack. The first time, it sets is_loading and triggers + # exec_module(), which will access module.__dict__, module.__name__, + # and/or module.__spec__, reentering this method. These accesses + # need to be allowed to proceed without triggering the load again. + if loader_state['is_loading'] and attr.startswith('__') and attr.endswith('__'): + return object.__getattribute__(self, attr) + loader_state['is_loading'] = True + + __dict__ = object.__getattribute__(self, '__dict__') + + # All module metadata must be gathered from __spec__ in order to avoid + # using mutated values. + # Get the original name to make sure no object substitution occurred + # in sys.modules. + original_name = __spec__.name + # Figure out exactly what attributes were mutated between the creation + # of the module and now. + attrs_then = loader_state['__dict__'] + attrs_now = __dict__ + attrs_updated = {} + for key, value in attrs_now.items(): + # Code that set an attribute may have kept a reference to the + # assigned object, making identity more important than equality. + if key not in attrs_then: + attrs_updated[key] = value + elif id(attrs_now[key]) != id(attrs_then[key]): + attrs_updated[key] = value + __spec__.loader.exec_module(self) + # If exec_module() was used directly there is no guarantee the module + # object was put into sys.modules. + if original_name in sys.modules: + if id(self) != id(sys.modules[original_name]): + raise ValueError(f"module object for {original_name!r} " + "substituted in sys.modules during a lazy " + "load") + # Update after loading since that's what would happen in an eager + # loading situation. + __dict__.update(attrs_updated) + # Finally, stop triggering this method. + self.__class__ = types.ModuleType + return getattr(self, attr) def __delattr__(self, attr): @@ -244,5 +263,7 @@ class LazyLoader(Loader): loader_state = {} loader_state['__dict__'] = module.__dict__.copy() loader_state['__class__'] = module.__class__ + loader_state['lock'] = threading.RLock() + loader_state['is_loading'] = False module.__spec__.loader_state = loader_state module.__class__ = _LazyModule |