From 074fa5750640a067d9894c69378a00ceecc3b948 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 28 Oct 2021 15:04:33 -0600 Subject: bpo-45395: Make custom frozen modules additions instead of replacements. (gh-28778) Currently custom modules (the array set on PyImport_FrozenModules) replace all the frozen stdlib modules. That can be problematic and is unlikely to be what the user wants. This change treats the custom frozen modules as additions instead. They take precedence over all other frozen modules except for those needed to bootstrap the import system. If the "code" field of an entry in the custom array is NULL then that frozen module is treated as disabled, which allows a custom entry to disable a frozen stdlib module. This change allows us to get rid of is_essential_frozen_module() and simplifies the logic for which frozen modules should be ignored. https://bugs.python.org/issue45395 --- Doc/library/ctypes.rst | 6 +- Include/internal/pycore_import.h | 3 + Lib/ctypes/test/test_values.py | 50 +++---- .../C API/2021-10-06-15-54-40.bpo-45395.yVhdAl.rst | 4 + Programs/_freeze_module.c | 10 +- Programs/_testembed.c | 28 +--- Python/frozen.c | 19 ++- Python/import.c | 159 +++++++++++++++------ Tools/freeze/freeze.py | 6 - Tools/scripts/freeze_modules.py | 63 +++++--- 10 files changed, 219 insertions(+), 129 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2021-10-06-15-54-40.bpo-45395.yVhdAl.rst diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index 87c9c66..7461114 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -1095,7 +1095,7 @@ We have defined the :c:type:`struct _frozen` data type, so we can get the pointe to the table:: >>> FrozenTable = POINTER(struct_frozen) - >>> table = FrozenTable.in_dll(pythonapi, "PyImport_FrozenModules") + >>> table = FrozenTable.in_dll(pythonapi, "_PyImport_FrozenBootstrap") >>> Since ``table`` is a ``pointer`` to the array of ``struct_frozen`` records, we @@ -1111,9 +1111,7 @@ hit the ``NULL`` entry:: ... _frozen_importlib 31764 _frozen_importlib_external 41499 - __hello__ 161 - __phello__ -161 - __phello__.spam 161 + zipimport 12345 >>> The fact that standard Python has a frozen module and a frozen package diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h index 6439b73..aee1f66 100644 --- a/Include/internal/pycore_import.h +++ b/Include/internal/pycore_import.h @@ -15,6 +15,9 @@ struct _module_alias { const char *orig; /* ASCII encoded string */ }; +PyAPI_DATA(const struct _frozen *) _PyImport_FrozenBootstrap; +PyAPI_DATA(const struct _frozen *) _PyImport_FrozenStdlib; +PyAPI_DATA(const struct _frozen *) _PyImport_FrozenTest; extern const struct _module_alias * _PyImport_FrozenAliases; #ifdef __cplusplus diff --git a/Lib/ctypes/test/test_values.py b/Lib/ctypes/test/test_values.py index 96521fd..5f9fa06 100644 --- a/Lib/ctypes/test/test_values.py +++ b/Lib/ctypes/test/test_values.py @@ -56,35 +56,37 @@ class PythonValuesTestCase(unittest.TestCase): ("size", c_int)] FrozenTable = POINTER(struct_frozen) - ft = FrozenTable.in_dll(pythonapi, "PyImport_FrozenModules") - # ft is a pointer to the struct_frozen entries: modules = [] - for entry in ft: - # This is dangerous. We *can* iterate over a pointer, but - # the loop will not terminate (maybe with an access - # violation;-) because the pointer instance has no size. - if entry.name is None: - break - modname = entry.name.decode("ascii") - modules.append(modname) - with self.subTest(modname): - # Do a sanity check on entry.size and entry.code. - self.assertGreater(abs(entry.size), 10) - self.assertTrue([entry.code[i] for i in range(abs(entry.size))]) - # Check the module's package-ness. - with import_helper.frozen_modules(): - spec = importlib.util.find_spec(modname) - if entry.size < 0: - # It's a package. - self.assertIsNotNone(spec.submodule_search_locations) - else: - self.assertIsNone(spec.submodule_search_locations) + for group in ["Bootstrap", "Stdlib", "Test"]: + ft = FrozenTable.in_dll(pythonapi, f"_PyImport_Frozen{group}") + # ft is a pointer to the struct_frozen entries: + for entry in ft: + # This is dangerous. We *can* iterate over a pointer, but + # the loop will not terminate (maybe with an access + # violation;-) because the pointer instance has no size. + if entry.name is None: + break + modname = entry.name.decode("ascii") + modules.append(modname) + with self.subTest(modname): + # Do a sanity check on entry.size and entry.code. + self.assertGreater(abs(entry.size), 10) + self.assertTrue([entry.code[i] for i in range(abs(entry.size))]) + # Check the module's package-ness. + with import_helper.frozen_modules(): + spec = importlib.util.find_spec(modname) + if entry.size < 0: + # It's a package. + self.assertIsNotNone(spec.submodule_search_locations) + else: + self.assertIsNone(spec.submodule_search_locations) with import_helper.frozen_modules(): expected = _imp._frozen_module_names() self.maxDiff = None - self.assertEqual(modules, expected, "PyImport_FrozenModules example " - "in Doc/library/ctypes.rst may be out of date") + self.assertEqual(modules, expected, + "_PyImport_FrozenBootstrap example " + "in Doc/library/ctypes.rst may be out of date") from ctypes import _pointer_type_cache del _pointer_type_cache[struct_frozen] diff --git a/Misc/NEWS.d/next/C API/2021-10-06-15-54-40.bpo-45395.yVhdAl.rst b/Misc/NEWS.d/next/C API/2021-10-06-15-54-40.bpo-45395.yVhdAl.rst new file mode 100644 index 0000000..8996513 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2021-10-06-15-54-40.bpo-45395.yVhdAl.rst @@ -0,0 +1,4 @@ +Custom frozen modules (the array set to ``PyImport_FrozenModules``) are now +treated as additions, rather than replacing all the default frozen modules. +Frozen stdlib modules can still be disabled by setting the "code" field of +the custom array entry to NULL. diff --git a/Programs/_freeze_module.c b/Programs/_freeze_module.c index 316c70d..e3f6c11 100644 --- a/Programs/_freeze_module.c +++ b/Programs/_freeze_module.c @@ -23,13 +23,16 @@ of frozen modules instead, left deliberately blank so as to avoid unintentional import of a stale version of _frozen_importlib. */ -static const struct _frozen _PyImport_FrozenModules[] = { +static const struct _frozen no_modules[] = { {0, 0, 0} /* sentinel */ }; static const struct _module_alias aliases[] = { {0, 0} /* sentinel */ }; +const struct _frozen *_PyImport_FrozenBootstrap; +const struct _frozen *_PyImport_FrozenStdlib; +const struct _frozen *_PyImport_FrozenTest; const struct _frozen *PyImport_FrozenModules; const struct _module_alias *_PyImport_FrozenAliases; @@ -188,7 +191,10 @@ main(int argc, char *argv[]) { const char *name, *inpath, *outpath; - PyImport_FrozenModules = _PyImport_FrozenModules; + _PyImport_FrozenBootstrap = no_modules; + _PyImport_FrozenStdlib = no_modules; + _PyImport_FrozenTest = no_modules; + PyImport_FrozenModules = NULL; _PyImport_FrozenAliases = aliases; if (argc != 4) { diff --git a/Programs/_testembed.c b/Programs/_testembed.c index 773c6c3..6fe18d9 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -8,6 +8,7 @@ #include #include "pycore_initconfig.h" // _PyConfig_InitCompatConfig() #include "pycore_runtime.h" // _PyRuntime +#include "pycore_import.h" // _PyImport_FrozenBootstrap #include #include #include @@ -1804,30 +1805,10 @@ static int test_unicode_id_init(void) static int test_frozenmain(void) { - // Get "_frozen_importlib" and "_frozen_importlib_external" - // from PyImport_FrozenModules - const struct _frozen *importlib = NULL, *importlib_external = NULL; - for (const struct _frozen *mod = PyImport_FrozenModules; mod->name != NULL; mod++) { - if (strcmp(mod->name, "_frozen_importlib") == 0) { - importlib = mod; - } - else if (strcmp(mod->name, "_frozen_importlib_external") == 0) { - importlib_external = mod; - } - } - if (importlib == NULL || importlib_external == NULL) { - error("cannot find frozen importlib and importlib_external"); - return 1; - } - static struct _frozen frozen_modules[4] = { - {0, 0, 0}, // importlib - {0, 0, 0}, // importlib_external {"__main__", M_test_frozenmain, sizeof(M_test_frozenmain)}, {0, 0, 0} // sentinel }; - frozen_modules[0] = *importlib; - frozen_modules[1] = *importlib_external; char* argv[] = { "./argv0", @@ -1846,7 +1827,12 @@ static int test_frozenmain(void) static int list_frozen(void) { const struct _frozen *p; - for (p = PyImport_FrozenModules; ; p++) { + for (p = _PyImport_FrozenBootstrap; ; p++) { + if (p->name == NULL) + break; + printf("%s\n", p->name); + } + for (p = _PyImport_FrozenStdlib; ; p++) { if (p->name == NULL) break; printf("%s\n", p->name); diff --git a/Python/frozen.c b/Python/frozen.c index 499b3b9..15baa97 100644 --- a/Python/frozen.c +++ b/Python/frozen.c @@ -63,14 +63,15 @@ /* Note that a negative size indicates a package. */ -static const struct _frozen _PyImport_FrozenModules[] = { - /* import system */ +static const struct _frozen bootstrap_modules[] = { {"_frozen_importlib", _Py_M__importlib__bootstrap, (int)sizeof(_Py_M__importlib__bootstrap)}, {"_frozen_importlib_external", _Py_M__importlib__bootstrap_external, (int)sizeof(_Py_M__importlib__bootstrap_external)}, {"zipimport", _Py_M__zipimport, (int)sizeof(_Py_M__zipimport)}, - + {0, 0, 0} /* bootstrap sentinel */ +}; +static const struct _frozen stdlib_modules[] = { /* stdlib - startup, without site (python -S) */ {"abc", _Py_M__abc, (int)sizeof(_Py_M__abc)}, {"codecs", _Py_M__codecs, (int)sizeof(_Py_M__codecs)}, @@ -87,8 +88,9 @@ static const struct _frozen _PyImport_FrozenModules[] = { {"os", _Py_M__os, (int)sizeof(_Py_M__os)}, {"site", _Py_M__site, (int)sizeof(_Py_M__site)}, {"stat", _Py_M__stat, (int)sizeof(_Py_M__stat)}, - - /* Test module */ + {0, 0, 0} /* stdlib sentinel */ +}; +static const struct _frozen test_modules[] = { {"__hello__", _Py_M____hello__, (int)sizeof(_Py_M____hello__)}, {"__hello_alias__", _Py_M____hello__, (int)sizeof(_Py_M____hello__)}, {"__phello_alias__", _Py_M____hello__, -(int)sizeof(_Py_M____hello__)}, @@ -103,8 +105,11 @@ static const struct _frozen _PyImport_FrozenModules[] = { {"__phello__.spam", _Py_M____phello___spam, (int)sizeof(_Py_M____phello___spam)}, {"__hello_only__", _Py_M__frozen_only, (int)sizeof(_Py_M__frozen_only)}, - {0, 0, 0} /* modules sentinel */ + {0, 0, 0} /* test sentinel */ }; +const struct _frozen *_PyImport_FrozenBootstrap = bootstrap_modules; +const struct _frozen *_PyImport_FrozenStdlib = stdlib_modules; +const struct _frozen *_PyImport_FrozenTest = test_modules; static const struct _module_alias aliases[] = { {"_frozen_importlib", "importlib._bootstrap"}, @@ -124,4 +129,4 @@ const struct _module_alias *_PyImport_FrozenAliases = aliases; /* Embedding apps may change this pointer to point to their favorite collection of frozen modules: */ -const struct _frozen *PyImport_FrozenModules = _PyImport_FrozenModules; +const struct _frozen *PyImport_FrozenModules = NULL; diff --git a/Python/import.c b/Python/import.c index 15b1956..48ea912 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1071,27 +1071,6 @@ resolve_module_alias(const char *name, const struct _module_alias *aliases, /* Frozen modules */ static bool -is_essential_frozen_module(const char *name) -{ - /* These modules are necessary to bootstrap the import system. */ - if (strcmp(name, "_frozen_importlib") == 0) { - return true; - } - if (strcmp(name, "_frozen_importlib_external") == 0) { - return true; - } - if (strcmp(name, "zipimport") == 0) { - return true; - } - /* This doesn't otherwise have anywhere to find the module. - See frozenmain.c. */ - if (strcmp(name, "__main__") == 0) { - return true; - } - return false; -} - -static bool use_frozen(void) { PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -1115,26 +1094,76 @@ list_frozen_module_names() return NULL; } bool enabled = use_frozen(); - for (const struct _frozen *p = PyImport_FrozenModules; ; p++) { + const struct _frozen *p; +#define ADD_MODULE(name) \ + do { \ + PyObject *nameobj = PyUnicode_FromString(name); \ + if (nameobj == NULL) { \ + goto error; \ + } \ + int res = PyList_Append(names, nameobj); \ + Py_DECREF(nameobj); \ + if (res != 0) { \ + goto error; \ + } \ + } while(0) + // We always use the bootstrap modules. + for (p = _PyImport_FrozenBootstrap; ; p++) { if (p->name == NULL) { break; } - if (!enabled && !is_essential_frozen_module(p->name)) { - continue; + ADD_MODULE(p->name); + } + // Frozen stdlib modules may be disabled. + for (p = _PyImport_FrozenStdlib; ; p++) { + if (p->name == NULL) { + break; } - PyObject *name = PyUnicode_FromString(p->name); - if (name == NULL) { - Py_DECREF(names); - return NULL; + if (enabled) { + ADD_MODULE(p->name); } - int res = PyList_Append(names, name); - Py_DECREF(name); - if (res != 0) { - Py_DECREF(names); - return NULL; + } + for (p = _PyImport_FrozenTest; ; p++) { + if (p->name == NULL) { + break; + } + if (enabled) { + ADD_MODULE(p->name); + } + } +#undef ADD_MODULE + // Add any custom modules. + if (PyImport_FrozenModules != NULL) { + for (p = PyImport_FrozenModules; ; p++) { + if (p->name == NULL) { + break; + } + PyObject *nameobj = PyUnicode_FromString(p->name); + if (nameobj == NULL) { + goto error; + } + int found = PySequence_Contains(names, nameobj); + if (found < 0) { + Py_DECREF(nameobj); + goto error; + } + else if (found) { + Py_DECREF(nameobj); + } + else { + int res = PyList_Append(names, nameobj); + Py_DECREF(nameobj); + if (res != 0) { + goto error; + } + } } } return names; + +error: + Py_DECREF(names); + return NULL; } typedef enum { @@ -1180,6 +1209,54 @@ set_frozen_error(frozen_status status, PyObject *modname) } } +static const struct _frozen * +look_up_frozen(const char *name) +{ + const struct _frozen *p; + // We always use the bootstrap modules. + for (p = _PyImport_FrozenBootstrap; ; p++) { + if (p->name == NULL) { + // We hit the end-of-list sentinel value. + break; + } + if (strcmp(name, p->name) == 0) { + return p; + } + } + // Prefer custom modules, if any. Frozen stdlib modules can be + // disabled here by setting "code" to NULL in the array entry. + if (PyImport_FrozenModules != NULL) { + for (p = PyImport_FrozenModules; ; p++) { + if (p->name == NULL) { + break; + } + if (strcmp(name, p->name) == 0) { + return p; + } + } + } + // Frozen stdlib modules may be disabled. + if (use_frozen()) { + for (p = _PyImport_FrozenStdlib; ; p++) { + if (p->name == NULL) { + break; + } + if (strcmp(name, p->name) == 0) { + return p; + } + } + for (p = _PyImport_FrozenTest; ; p++) { + if (p->name == NULL) { + break; + } + if (strcmp(name, p->name) == 0) { + return p; + } + } + } + return NULL; +} + struct frozen_info { PyObject *nameobj; const char *data; @@ -1209,19 +1286,9 @@ find_frozen(PyObject *nameobj, struct frozen_info *info) return FROZEN_BAD_NAME; } - if (!use_frozen() && !is_essential_frozen_module(name)) { - return FROZEN_DISABLED; - } - - const struct _frozen *p; - for (p = PyImport_FrozenModules; ; p++) { - if (p->name == NULL) { - // We hit the end-of-list sentinel value. - return FROZEN_NOT_FOUND; - } - if (strcmp(name, p->name) == 0) { - break; - } + const struct _frozen *p = look_up_frozen(name); + if (p == NULL) { + return FROZEN_NOT_FOUND; } if (info != NULL) { info->nameobj = nameobj; // borrowed diff --git a/Tools/freeze/freeze.py b/Tools/freeze/freeze.py index d66e1e2..bc5e43f 100755 --- a/Tools/freeze/freeze.py +++ b/Tools/freeze/freeze.py @@ -367,12 +367,6 @@ def main(): else: mf.load_file(mod) - # Alias "importlib._bootstrap" to "_frozen_importlib" so that the - # import machinery can bootstrap. Do the same for - # importlib._bootstrap_external. - mf.modules["_frozen_importlib"] = mf.modules["importlib._bootstrap"] - mf.modules["_frozen_importlib_external"] = mf.modules["importlib._bootstrap_external"] - # Add the main script as either __main__, or the actual module name. if python_entry_is_main: mf.run_script(scriptfile) diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py index 5c7eee4..3614262 100644 --- a/Tools/scripts/freeze_modules.py +++ b/Tools/scripts/freeze_modules.py @@ -60,6 +60,7 @@ PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filt OS_PATH = 'ntpath' if os.name == 'nt' else 'posixpath' # These are modules that get frozen. +TESTS_SECTION = 'Test module' FROZEN = [ # See parse_frozen_spec() for the format. # In cases where the frozenid is duplicated, the first one is re-used. @@ -94,7 +95,7 @@ FROZEN = [ 'site', 'stat', ]), - ('Test module', [ + (TESTS_SECTION, [ '__hello__', '__hello__ : __hello_alias__', '__hello__ : <__phello_alias__>', @@ -103,7 +104,7 @@ FROZEN = [ f'frozen_only : __hello_only__ = {FROZEN_ONLY}', ]), ] -ESSENTIAL = { +BOOTSTRAP = { 'importlib._bootstrap', 'importlib._bootstrap_external', 'zipimport', @@ -527,16 +528,24 @@ def regen_frozen(modules): header = relpath_for_posix_display(src.frozenfile, parentdir) headerlines.append(f'#include "{header}"') - deflines = [] + bootstraplines = [] + stdliblines = [] + testlines = [] aliaslines = [] indent = ' ' lastsection = None for mod in modules: - if mod.section != lastsection: - if lastsection is not None: - deflines.append('') - deflines.append(f'/* {mod.section} */') - lastsection = mod.section + if mod.frozenid in BOOTSTRAP: + lines = bootstraplines + elif mod.section == TESTS_SECTION: + lines = testlines + else: + lines = stdliblines + if mod.section != lastsection: + if lastsection is not None: + lines.append('') + lines.append(f'/* {mod.section} */') + lastsection = mod.section symbol = mod.symbol pkg = '-' if mod.ispkg else '' @@ -544,11 +553,11 @@ def regen_frozen(modules): ) % (mod.name, symbol, pkg, symbol) # TODO: Consider not folding lines if len(line) < 80: - deflines.append(line) + lines.append(line) else: line1, _, line2 = line.rpartition(' ') - deflines.append(line1) - deflines.append(indent + line2) + lines.append(line1) + lines.append(indent + line2) if mod.isalias: if not mod.orig: @@ -559,11 +568,13 @@ def regen_frozen(modules): entry = '{"%s", "%s"},' % (mod.name, mod.orig) aliaslines.append(indent + entry) - if not deflines[0]: - del deflines[0] - for i, line in enumerate(deflines): - if line: - deflines[i] = indent + line + for lines in (bootstraplines, stdliblines, testlines): + # TODO: Is this necessary any more? + if not lines[0]: + del lines[0] + for i, line in enumerate(lines): + if line: + lines[i] = indent + line print(f'# Updating {os.path.relpath(FROZEN_FILE)}') with updating_file_with_tmpfile(FROZEN_FILE) as (infile, outfile): @@ -579,9 +590,23 @@ def regen_frozen(modules): ) lines = replace_block( lines, - "static const struct _frozen _PyImport_FrozenModules[] =", - "/* modules sentinel */", - deflines, + "static const struct _frozen bootstrap_modules[] =", + "/* bootstrap sentinel */", + bootstraplines, + FROZEN_FILE, + ) + lines = replace_block( + lines, + "static const struct _frozen stdlib_modules[] =", + "/* stdlib sentinel */", + stdliblines, + FROZEN_FILE, + ) + lines = replace_block( + lines, + "static const struct _frozen test_modules[] =", + "/* test sentinel */", + testlines, FROZEN_FILE, ) lines = replace_block( -- cgit v0.12