summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2024-02-26 20:43:44 (GMT)
committerGitHub <noreply@github.com>2024-02-26 20:43:44 (GMT)
commit7b91b9001a444f5b5acdfd786b07bfde3405e93a (patch)
treeccb8d89d0e70bcea76c8eb9bf902245bee2ce4c2
parent53b84e772cac6e4a55cebf908d6bb9c48fe254dc (diff)
downloadcpython-7b91b9001a444f5b5acdfd786b07bfde3405e93a.zip
cpython-7b91b9001a444f5b5acdfd786b07bfde3405e93a.tar.gz
cpython-7b91b9001a444f5b5acdfd786b07bfde3405e93a.tar.bz2
[3.12] gh-114763: Protect lazy loading modules from attribute access races (GH-114781) (GH-115870)
gh-114763: Protect lazy loading modules from attribute access races (GH-114781) Setting the __class__ attribute of a lazy-loading module to ModuleType enables other threads to attempt to access attributes before the loading is complete. Now that is protected by a lock. (cherry picked from commit 200271c61db44d90759f8a8934949aefd72d5724) Co-authored-by: Chris Markiewicz <effigies@gmail.com>
-rw-r--r--Lib/importlib/util.py81
-rw-r--r--Lib/test/test_importlib/test_lazy.py42
-rw-r--r--Misc/NEWS.d/next/Library/2024-01-30-23-28-29.gh-issue-114763.BRjKkg.rst3
3 files changed, 94 insertions, 32 deletions
diff --git a/Lib/importlib/util.py b/Lib/importlib/util.py
index f4d6e82..7a2311f 100644
--- a/Lib/importlib/util.py
+++ b/Lib/importlib/util.py
@@ -13,6 +13,7 @@ from ._bootstrap_external import spec_from_file_location
import _imp
import sys
+import threading
import types
@@ -171,36 +172,54 @@ class _LazyModule(types.ModuleType):
def __getattribute__(self, attr):
"""Trigger the load of the module and return the attribute."""
- # All module metadata must be garnered from __spec__ in order to avoid
- # using mutated values.
- # Stop triggering this method.
- self.__class__ = types.ModuleType
- # Get the original name to make sure no object substitution occurred
- # in sys.modules.
- original_name = self.__spec__.name
- # Figure out exactly what attributes were mutated between the creation
- # of the module and now.
- attrs_then = self.__spec__.loader_state['__dict__']
- attrs_now = self.__dict__
- attrs_updated = {}
- for key, value in attrs_now.items():
- # Code that set the attribute may have kept a reference to the
- # assigned object, making identity more important than equality.
- if key not in attrs_then:
- attrs_updated[key] = value
- elif id(attrs_now[key]) != id(attrs_then[key]):
- attrs_updated[key] = value
- self.__spec__.loader.exec_module(self)
- # If exec_module() was used directly there is no guarantee the module
- # object was put into sys.modules.
- if original_name in sys.modules:
- if id(self) != id(sys.modules[original_name]):
- raise ValueError(f"module object for {original_name!r} "
- "substituted in sys.modules during a lazy "
- "load")
- # Update after loading since that's what would happen in an eager
- # loading situation.
- self.__dict__.update(attrs_updated)
+ __spec__ = object.__getattribute__(self, '__spec__')
+ loader_state = __spec__.loader_state
+ with loader_state['lock']:
+ # Only the first thread to get the lock should trigger the load
+ # and reset the module's class. The rest can now getattr().
+ if object.__getattribute__(self, '__class__') is _LazyModule:
+ # The first thread comes here multiple times as it descends the
+ # call stack. The first time, it sets is_loading and triggers
+ # exec_module(), which will access module.__dict__, module.__name__,
+ # and/or module.__spec__, reentering this method. These accesses
+ # need to be allowed to proceed without triggering the load again.
+ if loader_state['is_loading'] and attr.startswith('__') and attr.endswith('__'):
+ return object.__getattribute__(self, attr)
+ loader_state['is_loading'] = True
+
+ __dict__ = object.__getattribute__(self, '__dict__')
+
+ # All module metadata must be gathered from __spec__ in order to avoid
+ # using mutated values.
+ # Get the original name to make sure no object substitution occurred
+ # in sys.modules.
+ original_name = __spec__.name
+ # Figure out exactly what attributes were mutated between the creation
+ # of the module and now.
+ attrs_then = loader_state['__dict__']
+ attrs_now = __dict__
+ attrs_updated = {}
+ for key, value in attrs_now.items():
+ # Code that set an attribute may have kept a reference to the
+ # assigned object, making identity more important than equality.
+ if key not in attrs_then:
+ attrs_updated[key] = value
+ elif id(attrs_now[key]) != id(attrs_then[key]):
+ attrs_updated[key] = value
+ __spec__.loader.exec_module(self)
+ # If exec_module() was used directly there is no guarantee the module
+ # object was put into sys.modules.
+ if original_name in sys.modules:
+ if id(self) != id(sys.modules[original_name]):
+ raise ValueError(f"module object for {original_name!r} "
+ "substituted in sys.modules during a lazy "
+ "load")
+ # Update after loading since that's what would happen in an eager
+ # loading situation.
+ __dict__.update(attrs_updated)
+ # Finally, stop triggering this method.
+ self.__class__ = types.ModuleType
+
return getattr(self, attr)
def __delattr__(self, attr):
@@ -244,5 +263,7 @@ class LazyLoader(Loader):
loader_state = {}
loader_state['__dict__'] = module.__dict__.copy()
loader_state['__class__'] = module.__class__
+ loader_state['lock'] = threading.RLock()
+ loader_state['is_loading'] = False
module.__spec__.loader_state = loader_state
module.__class__ = _LazyModule
diff --git a/Lib/test/test_importlib/test_lazy.py b/Lib/test/test_importlib/test_lazy.py
index cc993f3..38ab219 100644
--- a/Lib/test/test_importlib/test_lazy.py
+++ b/Lib/test/test_importlib/test_lazy.py
@@ -2,9 +2,12 @@ import importlib
from importlib import abc
from importlib import util
import sys
+import time
+import threading
import types
import unittest
+from test.support import threading_helper
from test.test_importlib import util as test_util
@@ -40,6 +43,7 @@ class TestingImporter(abc.MetaPathFinder, abc.Loader):
module_name = 'lazy_loader_test'
mutated_name = 'changed'
loaded = None
+ load_count = 0
source_code = 'attr = 42; __name__ = {!r}'.format(mutated_name)
def find_spec(self, name, path, target=None):
@@ -48,8 +52,10 @@ class TestingImporter(abc.MetaPathFinder, abc.Loader):
return util.spec_from_loader(name, util.LazyLoader(self))
def exec_module(self, module):
+ time.sleep(0.01) # Simulate a slow load.
exec(self.source_code, module.__dict__)
self.loaded = module
+ self.load_count += 1
class LazyLoaderTests(unittest.TestCase):
@@ -59,8 +65,9 @@ class LazyLoaderTests(unittest.TestCase):
# Classes that don't define exec_module() trigger TypeError.
util.LazyLoader(object)
- def new_module(self, source_code=None):
- loader = TestingImporter()
+ def new_module(self, source_code=None, loader=None):
+ if loader is None:
+ loader = TestingImporter()
if source_code is not None:
loader.source_code = source_code
spec = util.spec_from_loader(TestingImporter.module_name,
@@ -140,6 +147,37 @@ class LazyLoaderTests(unittest.TestCase):
# Force the load; just care that no exception is raised.
module.__name__
+ @threading_helper.requires_working_threading()
+ def test_module_load_race(self):
+ with test_util.uncache(TestingImporter.module_name):
+ loader = TestingImporter()
+ module = self.new_module(loader=loader)
+ self.assertEqual(loader.load_count, 0)
+
+ class RaisingThread(threading.Thread):
+ exc = None
+ def run(self):
+ try:
+ super().run()
+ except Exception as exc:
+ self.exc = exc
+
+ def access_module():
+ return module.attr
+
+ threads = []
+ for _ in range(2):
+ threads.append(thread := RaisingThread(target=access_module))
+ thread.start()
+
+ # Races could cause errors
+ for thread in threads:
+ thread.join()
+ self.assertIsNone(thread.exc)
+
+ # Or multiple load attempts
+ self.assertEqual(loader.load_count, 1)
+
if __name__ == '__main__':
unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2024-01-30-23-28-29.gh-issue-114763.BRjKkg.rst b/Misc/NEWS.d/next/Library/2024-01-30-23-28-29.gh-issue-114763.BRjKkg.rst
new file mode 100644
index 0000000..e8bdb83
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-01-30-23-28-29.gh-issue-114763.BRjKkg.rst
@@ -0,0 +1,3 @@
+Protect modules loaded with :class:`importlib.util.LazyLoader` from race
+conditions when multiple threads try to access attributes before the loading
+is complete.