diff options
-rw-r--r-- | Lib/importlib/_bootstrap.py | 177 | ||||
-rw-r--r-- | Lib/test/test_frozen.py | 3 | ||||
-rw-r--r-- | Lib/test/test_importlib/frozen/test_finder.py | 29 | ||||
-rw-r--r-- | Lib/test/test_importlib/frozen/test_loader.py | 32 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2021-10-01-09-21-02.bpo-21736.RI47BU.rst | 9 | ||||
-rw-r--r-- | Python/clinic/import.c.h | 37 | ||||
-rw-r--r-- | Python/import.c | 47 |
7 files changed, 240 insertions, 94 deletions
diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py index 49f0881..889f08f 100644 --- a/Lib/importlib/_bootstrap.py +++ b/Lib/importlib/_bootstrap.py @@ -421,7 +421,10 @@ class ModuleSpec: def spec_from_loader(name, loader, *, origin=None, is_package=None): """Return a module spec based on various loader methods.""" - if hasattr(loader, 'get_filename'): + if origin is None: + origin = getattr(loader, '_ORIGIN', None) + + if not origin and hasattr(loader, 'get_filename'): if _bootstrap_external is None: raise NotImplementedError spec_from_file_location = _bootstrap_external.spec_from_file_location @@ -467,12 +470,9 @@ def _spec_from_module(module, loader=None, origin=None): except AttributeError: location = None if origin is None: - if location is None: - try: - origin = loader._ORIGIN - except AttributeError: - origin = None - else: + if loader is not None: + origin = getattr(loader, '_ORIGIN', None) + if not origin and location is not None: origin = location try: cached = module.__cached__ @@ -484,7 +484,7 @@ def _spec_from_module(module, loader=None, origin=None): submodule_search_locations = None spec = ModuleSpec(name, loader, origin=origin) - spec._set_fileattr = False if location is None else True + spec._set_fileattr = False if location is None else (origin == location) spec.cached = cached spec.submodule_search_locations = submodule_search_locations return spec @@ -541,6 +541,7 @@ def _init_module_attrs(spec, module, *, override=False): # __path__ if override or getattr(module, '__path__', None) is None: if spec.submodule_search_locations is not None: + # XXX We should extend __path__ if it's already a list. try: module.__path__ = spec.submodule_search_locations except AttributeError: @@ -825,38 +826,127 @@ class FrozenImporter: return '<module {!r} ({})>'.format(m.__name__, FrozenImporter._ORIGIN) @classmethod - def _setup_module(cls, module): - assert not hasattr(module, '__file__'), module.__file__ - ispkg = hasattr(module, '__path__') - assert not ispkg or not module.__path__, module.__path__ + def _fix_up_module(cls, module): spec = module.__spec__ - assert not ispkg or not spec.submodule_search_locations + state = spec.loader_state + if state is None: + # The module is missing FrozenImporter-specific values. - if spec.loader_state is None: - spec.loader_state = type(sys.implementation)( - data=None, - origname=None, - ) - elif not hasattr(spec.loader_state, 'data'): - spec.loader_state.data = None - if not getattr(spec.loader_state, 'origname', None): + # Fix up the spec attrs. origname = vars(module).pop('__origname__', None) assert origname, 'see PyImport_ImportFrozenModuleObject()' - spec.loader_state.origname = origname + ispkg = hasattr(module, '__path__') + assert _imp.is_frozen_package(module.__name__) == ispkg, ispkg + filename, pkgdir = cls._resolve_filename(origname, spec.name, ispkg) + spec.loader_state = type(sys.implementation)( + filename=filename, + origname=origname, + ) + __path__ = spec.submodule_search_locations + if ispkg: + assert __path__ == [], __path__ + if pkgdir: + spec.submodule_search_locations.insert(0, pkgdir) + else: + assert __path__ is None, __path__ + + # Fix up the module attrs (the bare minimum). + assert not hasattr(module, '__file__'), module.__file__ + if filename: + try: + module.__file__ = filename + except AttributeError: + pass + if ispkg: + if module.__path__ != __path__: + assert module.__path__ == [], module.__path__ + module.__path__.extend(__path__) + else: + # These checks ensure that _fix_up_module() is only called + # in the right places. + __path__ = spec.submodule_search_locations + ispkg = __path__ is not None + # Check the loader state. + assert sorted(vars(state)) == ['filename', 'origname'], state + if state.origname: + # The only frozen modules with "origname" set are stdlib modules. + (__file__, pkgdir, + ) = cls._resolve_filename(state.origname, spec.name, ispkg) + assert state.filename == __file__, (state.filename, __file__) + if pkgdir: + assert __path__ == [pkgdir], (__path__, pkgdir) + else: + assert __path__ == ([] if ispkg else None), __path__ + else: + __file__ = None + assert state.filename is None, state.filename + assert __path__ == ([] if ispkg else None), __path__ + # Check the file attrs. + if __file__: + assert hasattr(module, '__file__') + assert module.__file__ == __file__, (module.__file__, __file__) + else: + assert not hasattr(module, '__file__'), module.__file__ + if ispkg: + assert hasattr(module, '__path__') + assert module.__path__ == __path__, (module.__path__, __path__) + else: + assert not hasattr(module, '__path__'), module.__path__ + assert not spec.has_location + + @classmethod + def _resolve_filename(cls, fullname, alias=None, ispkg=False): + if not fullname or not getattr(sys, '_stdlib_dir', None): + return None, None + try: + sep = cls._SEP + except AttributeError: + sep = cls._SEP = '\\' if sys.platform == 'win32' else '/' + + if fullname != alias: + if fullname.startswith('<'): + fullname = fullname[1:] + if not ispkg: + fullname = f'{fullname}.__init__' + else: + ispkg = False + relfile = fullname.replace('.', sep) + if ispkg: + pkgdir = f'{sys._stdlib_dir}{sep}{relfile}' + filename = f'{pkgdir}{sep}__init__.py' + else: + pkgdir = None + filename = f'{sys._stdlib_dir}{sep}{relfile}.py' + return filename, pkgdir @classmethod def find_spec(cls, fullname, path=None, target=None): info = _call_with_frames_removed(_imp.find_frozen, fullname) if info is None: return None - data, ispkg, origname = info + # We get the marshaled data in exec_module() (the loader + # part of the importer), instead of here (the finder part). + # The loader is the usual place to get the data that will + # be loaded into the module. (For example, see _LoaderBasics + # in _bootstra_external.py.) Most importantly, this importer + # is simpler if we wait to get the data. + # However, getting as much data in the finder as possible + # to later load the module is okay, and sometimes important. + # (That's why ModuleSpec.loader_state exists.) This is + # especially true if it avoids throwing away expensive data + # the loader would otherwise duplicate later and can be done + # efficiently. In this case it isn't worth it. + _, ispkg, origname = info spec = spec_from_loader(fullname, cls, origin=cls._ORIGIN, is_package=ispkg) + filename, pkgdir = cls._resolve_filename(origname, fullname, ispkg) spec.loader_state = type(sys.implementation)( - data=data, + filename=filename, origname=origname, ) + if pkgdir: + spec.submodule_search_locations.insert(0, pkgdir) return spec @classmethod @@ -873,26 +963,22 @@ class FrozenImporter: @staticmethod def create_module(spec): - """Use default semantics for module creation.""" + """Set __file__, if able.""" + module = _new_module(spec.name) + try: + filename = spec.loader_state.filename + except AttributeError: + pass + else: + if filename: + module.__file__ = filename + return module @staticmethod def exec_module(module): spec = module.__spec__ name = spec.name - try: - data = spec.loader_state.data - except AttributeError: - if not _imp.is_frozen(name): - raise ImportError('{!r} is not a frozen module'.format(name), - name=name) - data = None - else: - # We clear the extra data we got from the finder, to save memory. - # Note that if this method is called again (e.g. by - # importlib.reload()) then _imp.get_frozen_object() will notice - # no data was provided and will look it up. - spec.loader_state.data = None - code = _call_with_frames_removed(_imp.get_frozen_object, name, data) + code = _call_with_frames_removed(_imp.get_frozen_object, name) exec(code, module.__dict__) @classmethod @@ -903,7 +989,16 @@ class FrozenImporter: """ # Warning about deprecation implemented in _load_module_shim(). - return _load_module_shim(cls, fullname) + module = _load_module_shim(cls, fullname) + info = _imp.find_frozen(fullname) + assert info is not None + _, ispkg, origname = info + module.__origname__ = origname + vars(module).pop('__file__', None) + if ispkg: + module.__path__ = [] + cls._fix_up_module(module) + return module @classmethod @_requires_frozen @@ -1244,7 +1339,7 @@ def _setup(sys_module, _imp_module): spec = _spec_from_module(module, loader) _init_module_attrs(spec, module) if loader is FrozenImporter: - loader._setup_module(module) + loader._fix_up_module(module) # Directly load built-in modules needed during bootstrap. self_module = sys.modules[__name__] diff --git a/Lib/test/test_frozen.py b/Lib/test/test_frozen.py index 029fd06..0b4a12b 100644 --- a/Lib/test/test_frozen.py +++ b/Lib/test/test_frozen.py @@ -39,9 +39,6 @@ class TestFrozen(unittest.TestCase): self.assertIs(spam.__spec__.loader, importlib.machinery.FrozenImporter) - # This is not possible until frozen packages have __path__ set properly. - # See https://bugs.python.org/issue21736. - @unittest.expectedFailure def test_unfrozen_submodule_in_frozen_package(self): with import_helper.CleanImport('__phello__', '__phello__.spam'): with import_helper.frozen_modules(enabled=True): diff --git a/Lib/test/test_importlib/frozen/test_finder.py b/Lib/test/test_importlib/frozen/test_finder.py index cd5586d..66080b2 100644 --- a/Lib/test/test_importlib/frozen/test_finder.py +++ b/Lib/test/test_importlib/frozen/test_finder.py @@ -44,30 +44,31 @@ class FindSpecTests(abc.FinderTests): if not filename: if not origname: origname = spec.name + filename = resolve_stdlib_file(origname) actual = dict(vars(spec.loader_state)) - # Check the code object used to import the frozen module. - # We can't compare the marshaled data directly because - # marshal.dumps() would mark "expected" (below) as a ref, - # which slightly changes the output. - # (See https://bugs.python.org/issue34093.) - data = actual.pop('data') - with import_helper.frozen_modules(): - expected = _imp.get_frozen_object(spec.name) - code = marshal.loads(data) - self.assertEqual(code, expected) - # Check the rest of spec.loader_state. expected = dict( origname=origname, + filename=filename if origname else None, ) self.assertDictEqual(actual, expected) def check_search_locations(self, spec): - # Frozen packages do not have any path entries. - # (See https://bugs.python.org/issue21736.) - expected = [] + """This is only called when testing packages.""" + missing = object() + filename = getattr(spec.loader_state, 'filename', missing) + origname = getattr(spec.loader_state, 'origname', None) + if not origname or filename is missing: + # We deal with this in check_loader_state(). + return + if not filename: + expected = [] + elif origname != spec.name and not origname.startswith('<'): + expected = [] + else: + expected = [os.path.dirname(filename)] self.assertListEqual(spec.submodule_search_locations, expected) def test_module(self): diff --git a/Lib/test/test_importlib/frozen/test_loader.py b/Lib/test/test_importlib/frozen/test_loader.py index d6f39fa..f1ccb8a 100644 --- a/Lib/test/test_importlib/frozen/test_loader.py +++ b/Lib/test/test_importlib/frozen/test_loader.py @@ -3,10 +3,11 @@ from .. import util machinery = util.import_importlib('importlib.machinery') -from test.support import captured_stdout, import_helper +from test.support import captured_stdout, import_helper, STDLIB_DIR import _imp import contextlib import marshal +import os.path import types import unittest import warnings @@ -30,20 +31,27 @@ def fresh(name, *, oldapi=False): yield +def resolve_stdlib_file(name, ispkg=False): + assert name + if ispkg: + return os.path.join(STDLIB_DIR, *name.split('.'), '__init__.py') + else: + return os.path.join(STDLIB_DIR, *name.split('.')) + '.py' + + class ExecModuleTests(abc.LoaderTests): def exec_module(self, name, origname=None): with import_helper.frozen_modules(): is_package = self.machinery.FrozenImporter.is_package(name) - code = _imp.get_frozen_object(name) spec = self.machinery.ModuleSpec( name, self.machinery.FrozenImporter, origin='frozen', is_package=is_package, loader_state=types.SimpleNamespace( - data=marshal.dumps(code), origname=origname or name, + filename=resolve_stdlib_file(origname or name, is_package), ), ) module = types.ModuleType(name) @@ -68,7 +76,6 @@ class ExecModuleTests(abc.LoaderTests): self.assertEqual(getattr(module, attr), value) self.assertEqual(output, 'Hello world!\n') self.assertTrue(hasattr(module, '__spec__')) - self.assertIsNone(module.__spec__.loader_state.data) self.assertEqual(module.__spec__.loader_state.origname, name) def test_package(self): @@ -82,7 +89,6 @@ class ExecModuleTests(abc.LoaderTests): name=name, attr=attr, given=attr_value, expected=value)) self.assertEqual(output, 'Hello world!\n') - self.assertIsNone(module.__spec__.loader_state.data) self.assertEqual(module.__spec__.loader_state.origname, name) def test_lacking_parent(self): @@ -139,36 +145,41 @@ class LoaderTests(abc.LoaderTests): def test_module(self): module, stdout = self.load_module('__hello__') + filename = resolve_stdlib_file('__hello__') check = {'__name__': '__hello__', '__package__': '', '__loader__': self.machinery.FrozenImporter, + '__file__': filename, } for attr, value in check.items(): - self.assertEqual(getattr(module, attr), value) + self.assertEqual(getattr(module, attr, None), value) self.assertEqual(stdout.getvalue(), 'Hello world!\n') - self.assertFalse(hasattr(module, '__file__')) def test_package(self): module, stdout = self.load_module('__phello__') + filename = resolve_stdlib_file('__phello__', ispkg=True) + pkgdir = os.path.dirname(filename) check = {'__name__': '__phello__', '__package__': '__phello__', - '__path__': [], + '__path__': [pkgdir], '__loader__': self.machinery.FrozenImporter, + '__file__': filename, } for attr, value in check.items(): - attr_value = getattr(module, attr) + attr_value = getattr(module, attr, None) self.assertEqual(attr_value, value, "for __phello__.%s, %r != %r" % (attr, attr_value, value)) self.assertEqual(stdout.getvalue(), 'Hello world!\n') - self.assertFalse(hasattr(module, '__file__')) def test_lacking_parent(self): with util.uncache('__phello__'): module, stdout = self.load_module('__phello__.spam') + filename = resolve_stdlib_file('__phello__.spam') check = {'__name__': '__phello__.spam', '__package__': '__phello__', '__loader__': self.machinery.FrozenImporter, + '__file__': filename, } for attr, value in check.items(): attr_value = getattr(module, attr) @@ -176,7 +187,6 @@ class LoaderTests(abc.LoaderTests): "for __phello__.spam.%s, %r != %r" % (attr, attr_value, value)) self.assertEqual(stdout.getvalue(), 'Hello world!\n') - self.assertFalse(hasattr(module, '__file__')) def test_module_reuse(self): with fresh('__hello__', oldapi=True): diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-10-01-09-21-02.bpo-21736.RI47BU.rst b/Misc/NEWS.d/next/Core and Builtins/2021-10-01-09-21-02.bpo-21736.RI47BU.rst new file mode 100644 index 0000000..8396a49 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-10-01-09-21-02.bpo-21736.RI47BU.rst @@ -0,0 +1,9 @@ +Frozen stdlib modules now have ``__file__`` to the .py file they would +otherwise be loaded from, if possible. For packages, ``__path__`` now has +the correct entry instead of being an empty list, which allows unfrozen +submodules to be imported. These are set only if the stdlib directory is +known when the runtime is initialized. Note that the file at ``__file__`` +is not guaranteed to exist. None of this affects non-stdlib frozen modules +nor, for now, frozen modules imported using +``PyImport_ImportFrozenModule()``. Also, at the moment ``co_filename`` is +not updated for the module. diff --git a/Python/clinic/import.c.h b/Python/clinic/import.c.h index dfb59de..6052316 100644 --- a/Python/clinic/import.c.h +++ b/Python/clinic/import.c.h @@ -170,7 +170,7 @@ exit: } PyDoc_STRVAR(_imp_find_frozen__doc__, -"find_frozen($module, name, /)\n" +"find_frozen($module, name, /, *, withdata=False)\n" "--\n" "\n" "Return info about the corresponding frozen module (if there is one) or None.\n" @@ -184,26 +184,43 @@ PyDoc_STRVAR(_imp_find_frozen__doc__, " the module\'s current name)"); #define _IMP_FIND_FROZEN_METHODDEF \ - {"find_frozen", (PyCFunction)_imp_find_frozen, METH_O, _imp_find_frozen__doc__}, + {"find_frozen", (PyCFunction)(void(*)(void))_imp_find_frozen, METH_FASTCALL|METH_KEYWORDS, _imp_find_frozen__doc__}, static PyObject * -_imp_find_frozen_impl(PyObject *module, PyObject *name); +_imp_find_frozen_impl(PyObject *module, PyObject *name, int withdata); static PyObject * -_imp_find_frozen(PyObject *module, PyObject *arg) +_imp_find_frozen(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + static const char * const _keywords[] = {"", "withdata", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "find_frozen", 0}; + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; PyObject *name; + int withdata = 0; - if (!PyUnicode_Check(arg)) { - _PyArg_BadArgument("find_frozen", "argument", "str", arg); + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { goto exit; } - if (PyUnicode_READY(arg) == -1) { + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("find_frozen", "argument 1", "str", args[0]); goto exit; } - name = arg; - return_value = _imp_find_frozen_impl(module, name); + if (PyUnicode_READY(args[0]) == -1) { + goto exit; + } + name = args[0]; + if (!noptargs) { + goto skip_optional_kwonly; + } + withdata = PyObject_IsTrue(args[1]); + if (withdata < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = _imp_find_frozen_impl(module, name, withdata); exit: return return_value; @@ -548,4 +565,4 @@ exit: #ifndef _IMP_EXEC_DYNAMIC_METHODDEF #define _IMP_EXEC_DYNAMIC_METHODDEF #endif /* !defined(_IMP_EXEC_DYNAMIC_METHODDEF) */ -/*[clinic end generated code: output=8c8dd08158f9ac7c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=adcf787969a11353 input=a9049054013a1b77]*/ diff --git a/Python/import.c b/Python/import.c index 4bc1e51..f216092 100644 --- a/Python/import.c +++ b/Python/import.c @@ -2049,6 +2049,8 @@ _imp.find_frozen name: unicode / + * + withdata: bool = False Return info about the corresponding frozen module (if there is one) or None. @@ -2062,8 +2064,8 @@ The returned info (a 2-tuple): [clinic start generated code]*/ static PyObject * -_imp_find_frozen_impl(PyObject *module, PyObject *name) -/*[clinic end generated code: output=3fd17da90d417e4e input=6aa7b9078a89280a]*/ +_imp_find_frozen_impl(PyObject *module, PyObject *name, int withdata) +/*[clinic end generated code: output=8c1c3c7f925397a5 input=22a8847c201542fd]*/ { struct frozen_info info; frozen_status status = find_frozen(name, &info); @@ -2078,9 +2080,12 @@ _imp_find_frozen_impl(PyObject *module, PyObject *name) return NULL; } - PyObject *data = PyBytes_FromStringAndSize(info.data, info.size); - if (data == NULL) { - return NULL; + PyObject *data = NULL; + if (withdata) { + data = PyMemoryView_FromMemory((char *)info.data, info.size, PyBUF_READ); + if (data == NULL) { + return NULL; + } } PyObject *origname = NULL; @@ -2092,11 +2097,11 @@ _imp_find_frozen_impl(PyObject *module, PyObject *name) } } - PyObject *result = PyTuple_Pack(3, data, + PyObject *result = PyTuple_Pack(3, data ? data : Py_None, info.is_package ? Py_True : Py_False, origname ? origname : Py_None); Py_XDECREF(origname); - Py_DECREF(data); + Py_XDECREF(data); return result; } @@ -2115,15 +2120,14 @@ _imp_get_frozen_object_impl(PyObject *module, PyObject *name, PyObject *dataobj) /*[clinic end generated code: output=54368a673a35e745 input=034bdb88f6460b7b]*/ { - struct frozen_info info; - if (PyBytes_Check(dataobj)) { - info.nameobj = name; - info.data = PyBytes_AS_STRING(dataobj); - info.size = PyBytes_Size(dataobj); - if (info.size == 0) { - set_frozen_error(FROZEN_INVALID, name); + struct frozen_info info = {0}; + Py_buffer buf = {0}; + if (PyObject_CheckBuffer(dataobj)) { + if (PyObject_GetBuffer(dataobj, &buf, PyBUF_READ) != 0) { return NULL; } + info.data = (const char *)buf.buf; + info.size = buf.len; } else if (dataobj != Py_None) { _PyArg_BadArgument("get_frozen_object", "argument 2", "bytes", dataobj); @@ -2136,7 +2140,20 @@ _imp_get_frozen_object_impl(PyObject *module, PyObject *name, return NULL; } } - return unmarshal_frozen_code(&info); + + if (info.nameobj == NULL) { + info.nameobj = name; + } + if (info.size == 0) { + set_frozen_error(FROZEN_INVALID, name); + return NULL; + } + + PyObject *codeobj = unmarshal_frozen_code(&info); + if (dataobj != Py_None) { + PyBuffer_Release(&buf); + } + return codeobj; } /*[clinic input] |