diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2021-09-13 22:18:37 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-13 22:18:37 (GMT) |
commit | a2d8c4b81b8e68e2ffe10945f7ca69174c14e52a (patch) | |
tree | c46c7aead37c0a393f13ef0fb8bb97ea9a9836c6 /Tools | |
parent | 1fc41ae8709e20d741bd86c2345173688a5e84b0 (diff) | |
download | cpython-a2d8c4b81b8e68e2ffe10945f7ca69174c14e52a.zip cpython-a2d8c4b81b8e68e2ffe10945f7ca69174c14e52a.tar.gz cpython-a2d8c4b81b8e68e2ffe10945f7ca69174c14e52a.tar.bz2 |
bpo-45019: Do some cleanup related to frozen modules. (gh-28319)
There are a few things I missed in gh-27980. This is a follow-up that will make subsequent PRs cleaner. It includes fixes to tests and tools that reference the frozen modules.
https://bugs.python.org/issue45019
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/scripts/freeze_modules.py | 316 | ||||
-rw-r--r-- | Tools/scripts/generate_stdlib_module_names.py | 12 |
2 files changed, 225 insertions, 103 deletions
diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py index 4f60e1b..b7e5320 100644 --- a/Tools/scripts/freeze_modules.py +++ b/Tools/scripts/freeze_modules.py @@ -3,6 +3,8 @@ See the notes at the top of Python/frozen.c for more info. """ +from collections import namedtuple +import hashlib import os import os.path import subprocess @@ -21,18 +23,24 @@ STDLIB_DIR = os.path.join(ROOT_DIR, 'Lib') MODULES_DIR = os.path.join(ROOT_DIR, 'Python/frozen_modules') TOOL = os.path.join(ROOT_DIR, 'Programs', '_freeze_module') +MANIFEST = os.path.join(MODULES_DIR, 'MANIFEST') FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c') MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in') PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj') PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filters') +TEST_CTYPES = os.path.join(STDLIB_DIR, 'ctypes', 'test', 'test_values.py') # These are modules that get frozen. FROZEN = [ # See parse_frozen_spec() for the format. # In cases where the frozenid is duplicated, the first one is re-used. - ('importlib', [ + ('import system', [ + # These frozen modules are necessary for bootstrapping + # the import system. 'importlib._bootstrap : _frozen_importlib', 'importlib._bootstrap_external : _frozen_importlib_external', + # This module is important because some Python builds rely + # on a builtin zip file instead of a filesystem. 'zipimport', ]), ('Test module', [ @@ -41,13 +49,43 @@ FROZEN = [ 'hello : __phello__.spam', ]), ] +ESSENTIAL = { + 'importlib._bootstrap', + 'importlib._bootstrap_external', + 'zipimport', +} ####################################### # specs -def parse_frozen_spec(rawspec, knownids=None, section=None): - """Yield (frozenid, pyfile, modname, ispkg) for the corresponding modules. +def parse_frozen_specs(sectionalspecs=FROZEN, destdir=None): + seen = {} + for section, specs in sectionalspecs: + parsed = _parse_specs(specs, section, seen) + for frozenid, pyfile, modname, ispkg, section in parsed: + try: + source = seen[frozenid] + except KeyError: + source = FrozenSource.from_id(frozenid, pyfile, destdir) + seen[frozenid] = source + else: + assert not pyfile + yield FrozenModule(modname, ispkg, section, source) + + +def _parse_specs(specs, section, seen): + for spec in specs: + info, subs = _parse_spec(spec, seen, section) + yield info + for info in subs or (): + yield info + + +def _parse_spec(spec, knownids=None, section=None): + """Yield an info tuple for each module corresponding to the given spec. + + The info consists of: (frozenid, pyfile, modname, ispkg, section). Supported formats: @@ -74,7 +112,7 @@ def parse_frozen_spec(rawspec, knownids=None, section=None): Also, if "modname" has brackets then "frozenid" should not, and "pyfile" should have been provided.. """ - frozenid, _, remainder = rawspec.partition(':') + frozenid, _, remainder = spec.partition(':') modname, _, pyfile = remainder.partition('=') frozenid = frozenid.strip() modname = modname.strip() @@ -82,28 +120,28 @@ def parse_frozen_spec(rawspec, knownids=None, section=None): submodules = None if modname.startswith('<') and modname.endswith('>'): - assert check_modname(frozenid), rawspec + assert check_modname(frozenid), spec modname = modname[1:-1] - assert check_modname(modname), rawspec + assert check_modname(modname), spec if frozenid in knownids: pass elif pyfile: - assert not os.path.isdir(pyfile), rawspec + assert not os.path.isdir(pyfile), spec else: pyfile = _resolve_module(frozenid, ispkg=False) ispkg = True elif pyfile: - assert check_modname(frozenid), rawspec - assert not knownids or frozenid not in knownids, rawspec - assert check_modname(modname), rawspec - assert not os.path.isdir(pyfile), rawspec + assert check_modname(frozenid), spec + assert not knownids or frozenid not in knownids, spec + assert check_modname(modname), spec + assert not os.path.isdir(pyfile), spec ispkg = False elif knownids and frozenid in knownids: - assert check_modname(frozenid), rawspec - assert check_modname(modname), rawspec + assert check_modname(frozenid), spec + assert check_modname(modname), spec ispkg = False else: - assert not modname or check_modname(modname), rawspec + assert not modname or check_modname(modname), spec resolved = iter(resolve_modules(frozenid)) frozenid, pyfile, ispkg = next(resolved) if not modname: @@ -113,7 +151,7 @@ def parse_frozen_spec(rawspec, knownids=None, section=None): pkgname = modname def iter_subs(): for frozenid, pyfile, ispkg in resolved: - assert not knownids or frozenid not in knownids, (frozenid, rawspec) + assert not knownids or frozenid not in knownids, (frozenid, spec) if pkgname: modname = frozenid.replace(pkgid, pkgname, 1) else: @@ -121,59 +159,104 @@ def parse_frozen_spec(rawspec, knownids=None, section=None): yield frozenid, pyfile, modname, ispkg, section submodules = iter_subs() - spec = (frozenid, pyfile or None, modname, ispkg, section) - return spec, submodules + info = (frozenid, pyfile or None, modname, ispkg, section) + return info, submodules -def parse_frozen_specs(rawspecs=FROZEN): - seen = set() - for section, _specs in rawspecs: - for spec in _parse_frozen_specs(_specs, section, seen): - frozenid = spec[0] - yield spec - seen.add(frozenid) +####################################### +# frozen source files +class FrozenSource(namedtuple('FrozenSource', 'id pyfile frozenfile')): -def _parse_frozen_specs(rawspecs, section, seen): - for rawspec in rawspecs: - spec, subs = parse_frozen_spec(rawspec, seen, section) - yield spec - for spec in subs or (): - yield spec + @classmethod + def from_id(cls, frozenid, pyfile=None, destdir=MODULES_DIR): + if not pyfile: + pyfile = os.path.join(STDLIB_DIR, *frozenid.split('.')) + '.py' + #assert os.path.exists(pyfile), (frozenid, pyfile) + frozenfile = resolve_frozen_file(frozenid, destdir) + return cls(frozenid, pyfile, frozenfile) + @property + def frozenid(self): + return self.id -def resolve_frozen_file(spec, destdir=MODULES_DIR): - if isinstance(spec, str): - modname = spec - else: - _, frozenid, _, _, _= spec - modname = frozenid + @property + def modname(self): + if self.pyfile.startswith(STDLIB_DIR): + return self.id + return None + + @property + def symbol(self): + # This matches what we do in Programs/_freeze_module.c: + name = self.frozenid.replace('.', '_') + return '_Py_M__' + name + + +def resolve_frozen_file(frozenid, destdir=MODULES_DIR): + """Return the filename corresponding to the given frozen ID. + + For stdlib modules the ID will always be the full name + of the source module. + """ + if not isinstance(frozenid, str): + try: + frozenid = frozenid.frozenid + except AttributeError: + raise ValueError(f'unsupported frozenid {frozenid!r}') # We use a consistent naming convention for all frozen modules. - return os.path.join(destdir, modname.replace('.', '_')) + '.h' + frozenfile = frozenid.replace('.', '_') + '.h' + if not destdir: + return frozenfile + return os.path.join(destdir, frozenfile) -def resolve_frozen_files(specs, destdir=MODULES_DIR): - frozen = {} - frozenids = [] - lastsection = None - for spec in specs: - frozenid, pyfile, *_, section = spec - if frozenid in frozen: - if section is None: - lastsection = None - else: - assert section == lastsection - continue - lastsection = section - frozenfile = resolve_frozen_file(frozenid, destdir) - frozen[frozenid] = (pyfile, frozenfile) - frozenids.append(frozenid) - return frozen, frozenids +####################################### +# frozen modules + +class FrozenModule(namedtuple('FrozenModule', 'name ispkg section source')): + + def __getattr__(self, name): + return getattr(self.source, name) + + @property + def modname(self): + return self.name + + def summarize(self): + source = self.source.modname + if source: + source = f'<{source}>' + else: + source = os.path.relpath(self.pyfile, ROOT_DIR) + return { + 'module': self.name, + 'ispkg': self.ispkg, + 'source': source, + 'frozen': os.path.basename(self.frozenfile), + 'checksum': _get_checksum(self.frozenfile), + } + + +def _iter_sources(modules): + seen = set() + for mod in modules: + if mod.source not in seen: + yield mod.source + seen.add(mod.source) ####################################### # generic helpers +def _get_checksum(filename): + with open(filename) as infile: + text = infile.read() + m = hashlib.sha256() + m.update(text.encode('utf8')) + return m.hexdigest() + + def resolve_modules(modname, pyfile=None): if modname.startswith('<') and modname.endswith('>'): if pyfile: @@ -293,38 +376,68 @@ def replace_block(lines, start_marker, end_marker, replacements, file): return lines[:start_pos + 1] + replacements + lines[end_pos:] -def regen_frozen(specs, dest=MODULES_DIR): - if isinstance(dest, str): - frozen, frozenids = resolve_frozen_files(specs, destdir) - else: - frozenids, frozen = dest +def regen_manifest(modules): + header = 'module ispkg source frozen checksum'.split() + widths = [5] * len(header) + rows = [] + for mod in modules: + info = mod.summarize() + row = [] + for i, col in enumerate(header): + value = info[col] + if col == 'checksum': + value = value[:12] + elif col == 'ispkg': + value = 'YES' if value else 'no' + widths[i] = max(widths[i], len(value)) + row.append(value or '-') + rows.append(row) + + modlines = [ + '# The list of frozen modules with key information.', + '# Note that the "check_generated_files" CI job will identify', + '# when source files were changed but regen-frozen wasn\'t run.', + '# This file is auto-generated by Tools/scripts/freeze_modules.py.', + ' '.join(c.center(w) for c, w in zip(header, widths)).rstrip(), + ' '.join('-' * w for w in widths), + ] + for row in rows: + for i, w in enumerate(widths): + if header[i] == 'ispkg': + row[i] = row[i].center(w) + else: + row[i] = row[i].ljust(w) + modlines.append(' '.join(row).rstrip()) + print(f'# Updating {os.path.relpath(MANIFEST)}') + with open(MANIFEST, 'w') as outfile: + lines = (l + '\n' for l in modlines) + outfile.writelines(lines) + + +def regen_frozen(modules): headerlines = [] parentdir = os.path.dirname(FROZEN_FILE) - for frozenid in frozenids: + for src in _iter_sources(modules): # Adding a comment to separate sections here doesn't add much, # so we don't. - _, frozenfile = frozen[frozenid] - header = os.path.relpath(frozenfile, parentdir) + header = os.path.relpath(src.frozenfile, parentdir) headerlines.append(f'#include "{header}"') deflines = [] indent = ' ' lastsection = None - for spec in specs: - frozenid, _, modname, ispkg, section = spec - if section != lastsection: + for mod in modules: + if mod.section != lastsection: if lastsection is not None: deflines.append('') - deflines.append(f'/* {section} */') - lastsection = section + deflines.append(f'/* {mod.section} */') + lastsection = mod.section - # This matches what we do in Programs/_freeze_module.c: - name = frozenid.replace('.', '_') - symbol = '_Py_M__' + name - pkg = '-' if ispkg else '' + symbol = mod.symbol + pkg = '-' if mod.ispkg else '' line = ('{"%s", %s, %s(int)sizeof(%s)},' - % (modname, symbol, pkg, symbol)) + ) % (mod.name, symbol, pkg, symbol) # TODO: Consider not folding lines if len(line) < 80: deflines.append(line) @@ -361,22 +474,20 @@ def regen_frozen(specs, dest=MODULES_DIR): outfile.writelines(lines) -def regen_makefile(frozenids, frozen): +def regen_makefile(modules): frozenfiles = [] rules = [''] - for frozenid in frozenids: - pyfile, frozenfile = frozen[frozenid] - header = os.path.relpath(frozenfile, ROOT_DIR) + for src in _iter_sources(modules): + header = os.path.relpath(src.frozenfile, ROOT_DIR) relfile = header.replace('\\', '/') frozenfiles.append(f'\t\t$(srcdir)/{relfile} \\') - _pyfile = os.path.relpath(pyfile, ROOT_DIR) - tmpfile = f'{header}.new' + pyfile = os.path.relpath(src.pyfile, ROOT_DIR) # Note that we freeze the module to the target .h file # instead of going through an intermediate file like we used to. - rules.append(f'{header}: $(srcdir)/Programs/_freeze_module $(srcdir)/{_pyfile}') - rules.append(f'\t$(srcdir)/Programs/_freeze_module {frozenid} \\') - rules.append(f'\t\t$(srcdir)/{_pyfile} \\') + rules.append(f'{header}: Programs/_freeze_module {pyfile}') + rules.append(f'\t$(srcdir)/Programs/_freeze_module {src.frozenid} \\') + rules.append(f'\t\t$(srcdir)/{pyfile} \\') rules.append(f'\t\t$(srcdir)/{header}') rules.append('') @@ -402,22 +513,24 @@ def regen_makefile(frozenids, frozen): outfile.writelines(lines) -def regen_pcbuild(frozenids, frozen): +def regen_pcbuild(modules): projlines = [] filterlines = [] - for frozenid in frozenids: - pyfile, frozenfile = frozen[frozenid] - - _pyfile = os.path.relpath(pyfile, ROOT_DIR).replace('/', '\\') - header = os.path.relpath(frozenfile, ROOT_DIR).replace('/', '\\') + for src in _iter_sources(modules): + # For now we only require the essential frozen modules on Windows. + # See bpo-45186 and bpo-45188. + if src.id not in ESSENTIAL and src.id != 'hello': + continue + pyfile = os.path.relpath(src.pyfile, ROOT_DIR).replace('/', '\\') + header = os.path.relpath(src.frozenfile, ROOT_DIR).replace('/', '\\') intfile = header.split('\\')[-1].strip('.h') + '.g.h' - projlines.append(f' <None Include="..\\{_pyfile}">') - projlines.append(f' <ModName>{frozenid}</ModName>') + projlines.append(f' <None Include="..\\{pyfile}">') + projlines.append(f' <ModName>{src.frozenid}</ModName>') projlines.append(f' <IntFile>$(IntDir){intfile}</IntFile>') projlines.append(f' <OutFile>$(PySourcePath){header}</OutFile>') projlines.append(f' </None>') - filterlines.append(f' <None Include="..\\{_pyfile}">') + filterlines.append(f' <None Include="..\\{pyfile}">') filterlines.append(' <Filter>Python Files</Filter>') filterlines.append(' </None>') @@ -451,7 +564,7 @@ def regen_pcbuild(frozenids, frozen): def freeze_module(modname, pyfile=None, destdir=MODULES_DIR): """Generate the frozen module .h file for the given module.""" for modname, pyfile, ispkg in resolve_modules(modname, pyfile): - frozenfile = _resolve_frozen(modname, destdir) + frozenfile = resolve_frozen_file(modname, destdir) _freeze_module(modname, pyfile, frozenfile) @@ -459,7 +572,7 @@ def _freeze_module(frozenid, pyfile, frozenfile): tmpfile = frozenfile + '.new' argv = [TOOL, frozenid, pyfile, tmpfile] - print('#', ' '.join(os.path.relpath(a) for a in argv)) + print('#', ' '.join(os.path.relpath(a) for a in argv), flush=True) try: subprocess.run(argv, check=True) except subprocess.CalledProcessError: @@ -475,18 +588,17 @@ def _freeze_module(frozenid, pyfile, frozenfile): def main(): # Expand the raw specs, preserving order. - specs = list(parse_frozen_specs()) - frozen, frozenids = resolve_frozen_files(specs, MODULES_DIR) - - # Regen build-related files. - regen_frozen(specs, (frozenids, frozen)) - regen_makefile(frozenids, frozen) - regen_pcbuild(frozenids, frozen) + modules = list(parse_frozen_specs(destdir=MODULES_DIR)) # Freeze the target modules. - for frozenid in frozenids: - pyfile, frozenfile = frozen[frozenid] - _freeze_module(frozenid, pyfile, frozenfile) + for src in _iter_sources(modules): + _freeze_module(src.frozenid, src.pyfile, src.frozenfile) + + # Regen build-related files. + regen_manifest(modules) + regen_frozen(modules) + regen_makefile(modules) + regen_pcbuild(modules) if __name__ == '__main__': diff --git a/Tools/scripts/generate_stdlib_module_names.py b/Tools/scripts/generate_stdlib_module_names.py index 716a6d4..325ae20 100644 --- a/Tools/scripts/generate_stdlib_module_names.py +++ b/Tools/scripts/generate_stdlib_module_names.py @@ -117,9 +117,19 @@ def list_frozen(names): cmd = ' '.join(args) print(f"{cmd} failed with exitcode {exitcode}") sys.exit(exitcode) + submodules = set() for line in proc.stdout.splitlines(): name = line.strip() - names.add(name) + if '.' in name: + submodules.add(name) + else: + names.add(name) + # Make sure all frozen submodules have a known parent. + for name in list(submodules): + if name.partition('.')[0] in names: + submodules.remove(name) + if submodules: + raise Exception(f'unexpected frozen submodules: {sorted(submodules)}') def list_modules(): |