diff options
Diffstat (limited to 'Tools/scripts/freeze_modules.py')
-rw-r--r-- | Tools/scripts/freeze_modules.py | 496 |
1 files changed, 496 insertions, 0 deletions
diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py new file mode 100644 index 0000000..4f60e1b --- /dev/null +++ b/Tools/scripts/freeze_modules.py @@ -0,0 +1,496 @@ +"""Freeze modules and regen related files (e.g. Python/frozen.c). + +See the notes at the top of Python/frozen.c for more info. +""" + +import os +import os.path +import subprocess +import sys +import textwrap + +from update_file import updating_file_with_tmpfile + + +SCRIPTS_DIR = os.path.abspath(os.path.dirname(__file__)) +TOOLS_DIR = os.path.dirname(SCRIPTS_DIR) +ROOT_DIR = os.path.dirname(TOOLS_DIR) + +STDLIB_DIR = os.path.join(ROOT_DIR, 'Lib') +# If MODULES_DIR is changed then the .gitattributes file needs to be updated. +MODULES_DIR = os.path.join(ROOT_DIR, 'Python/frozen_modules') +TOOL = os.path.join(ROOT_DIR, 'Programs', '_freeze_module') + +FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c') +MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in') +PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj') +PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filters') + +# These are modules that get frozen. +FROZEN = [ + # See parse_frozen_spec() for the format. + # In cases where the frozenid is duplicated, the first one is re-used. + ('importlib', [ + 'importlib._bootstrap : _frozen_importlib', + 'importlib._bootstrap_external : _frozen_importlib_external', + 'zipimport', + ]), + ('Test module', [ + 'hello : __hello__ = ' + os.path.join(TOOLS_DIR, 'freeze', 'flag.py'), + 'hello : <__phello__>', + 'hello : __phello__.spam', + ]), +] + + +####################################### +# specs + +def parse_frozen_spec(rawspec, knownids=None, section=None): + """Yield (frozenid, pyfile, modname, ispkg) for the corresponding modules. + + Supported formats: + + frozenid + frozenid : modname + frozenid : modname = pyfile + + "frozenid" and "modname" must be valid module names (dot-separated + identifiers). If "modname" is not provided then "frozenid" is used. + If "pyfile" is not provided then the filename of the module + corresponding to "frozenid" is used. + + Angle brackets around a frozenid (e.g. '<encodings>") indicate + it is a package. This also means it must be an actual module + (i.e. "pyfile" cannot have been provided). Such values can have + patterns to expand submodules: + + <encodings.*> - also freeze all direct submodules + <encodings.**.*> - also freeze the full submodule tree + + As with "frozenid", angle brackets around "modname" indicate + it is a package. However, in this case "pyfile" should not + have been provided and patterns in "modname" are not supported. + Also, if "modname" has brackets then "frozenid" should not, + and "pyfile" should have been provided.. + """ + frozenid, _, remainder = rawspec.partition(':') + modname, _, pyfile = remainder.partition('=') + frozenid = frozenid.strip() + modname = modname.strip() + pyfile = pyfile.strip() + + submodules = None + if modname.startswith('<') and modname.endswith('>'): + assert check_modname(frozenid), rawspec + modname = modname[1:-1] + assert check_modname(modname), rawspec + if frozenid in knownids: + pass + elif pyfile: + assert not os.path.isdir(pyfile), rawspec + else: + pyfile = _resolve_module(frozenid, ispkg=False) + ispkg = True + elif pyfile: + assert check_modname(frozenid), rawspec + assert not knownids or frozenid not in knownids, rawspec + assert check_modname(modname), rawspec + assert not os.path.isdir(pyfile), rawspec + ispkg = False + elif knownids and frozenid in knownids: + assert check_modname(frozenid), rawspec + assert check_modname(modname), rawspec + ispkg = False + else: + assert not modname or check_modname(modname), rawspec + resolved = iter(resolve_modules(frozenid)) + frozenid, pyfile, ispkg = next(resolved) + if not modname: + modname = frozenid + if ispkg: + pkgid = frozenid + pkgname = modname + def iter_subs(): + for frozenid, pyfile, ispkg in resolved: + assert not knownids or frozenid not in knownids, (frozenid, rawspec) + if pkgname: + modname = frozenid.replace(pkgid, pkgname, 1) + else: + modname = frozenid + yield frozenid, pyfile, modname, ispkg, section + submodules = iter_subs() + + spec = (frozenid, pyfile or None, modname, ispkg, section) + return spec, submodules + + +def parse_frozen_specs(rawspecs=FROZEN): + seen = set() + for section, _specs in rawspecs: + for spec in _parse_frozen_specs(_specs, section, seen): + frozenid = spec[0] + yield spec + seen.add(frozenid) + + +def _parse_frozen_specs(rawspecs, section, seen): + for rawspec in rawspecs: + spec, subs = parse_frozen_spec(rawspec, seen, section) + yield spec + for spec in subs or (): + yield spec + + +def resolve_frozen_file(spec, destdir=MODULES_DIR): + if isinstance(spec, str): + modname = spec + else: + _, frozenid, _, _, _= spec + modname = frozenid + # We use a consistent naming convention for all frozen modules. + return os.path.join(destdir, modname.replace('.', '_')) + '.h' + + +def resolve_frozen_files(specs, destdir=MODULES_DIR): + frozen = {} + frozenids = [] + lastsection = None + for spec in specs: + frozenid, pyfile, *_, section = spec + if frozenid in frozen: + if section is None: + lastsection = None + else: + assert section == lastsection + continue + lastsection = section + frozenfile = resolve_frozen_file(frozenid, destdir) + frozen[frozenid] = (pyfile, frozenfile) + frozenids.append(frozenid) + return frozen, frozenids + + +####################################### +# generic helpers + +def resolve_modules(modname, pyfile=None): + if modname.startswith('<') and modname.endswith('>'): + if pyfile: + assert os.path.isdir(pyfile) or os.path.basename(pyfile) == '__init__.py', pyfile + ispkg = True + modname = modname[1:-1] + rawname = modname + # For now, we only expect match patterns at the end of the name. + _modname, sep, match = modname.rpartition('.') + if sep: + if _modname.endswith('.**'): + modname = _modname[:-3] + match = f'**.{match}' + elif match and not match.isidentifier(): + modname = _modname + # Otherwise it's a plain name so we leave it alone. + else: + match = None + else: + ispkg = False + rawname = modname + match = None + + if not check_modname(modname): + raise ValueError(f'not a valid module name ({rawname})') + + if not pyfile: + pyfile = _resolve_module(modname, ispkg=ispkg) + elif os.path.isdir(pyfile): + pyfile = _resolve_module(modname, pyfile, ispkg) + yield modname, pyfile, ispkg + + if match: + pkgdir = os.path.dirname(pyfile) + yield from iter_submodules(modname, pkgdir, match) + + +def check_modname(modname): + return all(n.isidentifier() for n in modname.split('.')) + + +def iter_submodules(pkgname, pkgdir=None, match='*'): + if not pkgdir: + pkgdir = os.path.join(STDLIB_DIR, *pkgname.split('.')) + if not match: + match = '**.*' + match_modname = _resolve_modname_matcher(match, pkgdir) + + def _iter_submodules(pkgname, pkgdir): + for entry in sorted(os.scandir(pkgdir), key=lambda e: e.name): + matched, recursive = match_modname(entry.name) + if not matched: + continue + modname = f'{pkgname}.{entry.name}' + if modname.endswith('.py'): + yield modname[:-3], entry.path, False + elif entry.is_dir(): + pyfile = os.path.join(entry.path, '__init__.py') + # We ignore namespace packages. + if os.path.exists(pyfile): + yield modname, pyfile, True + if recursive: + yield from _iter_submodules(modname, entry.path) + + return _iter_submodules(pkgname, pkgdir) + + +def _resolve_modname_matcher(match, rootdir=None): + if isinstance(match, str): + if match.startswith('**.'): + recursive = True + pat = match[3:] + assert match + else: + recursive = False + pat = match + + if pat == '*': + def match_modname(modname): + return True, recursive + else: + raise NotImplementedError(match) + elif callable(match): + match_modname = match(rootdir) + else: + raise ValueError(f'unsupported matcher {match!r}') + return match_modname + + +def _resolve_module(modname, pathentry=STDLIB_DIR, ispkg=False): + assert pathentry, pathentry + pathentry = os.path.normpath(pathentry) + assert os.path.isabs(pathentry) + if ispkg: + return os.path.join(pathentry, *modname.split('.'), '__init__.py') + return os.path.join(pathentry, *modname.split('.')) + '.py' + + +####################################### +# regenerating dependent files + +def find_marker(lines, marker, file): + for pos, line in enumerate(lines): + if marker in line: + return pos + raise Exception(f"Can't find {marker!r} in file {file}") + + +def replace_block(lines, start_marker, end_marker, replacements, file): + start_pos = find_marker(lines, start_marker, file) + end_pos = find_marker(lines, end_marker, file) + if end_pos <= start_pos: + raise Exception(f"End marker {end_marker!r} " + f"occurs before start marker {start_marker!r} " + f"in file {file}") + replacements = [line.rstrip() + os.linesep for line in replacements] + return lines[:start_pos + 1] + replacements + lines[end_pos:] + + +def regen_frozen(specs, dest=MODULES_DIR): + if isinstance(dest, str): + frozen, frozenids = resolve_frozen_files(specs, destdir) + else: + frozenids, frozen = dest + + headerlines = [] + parentdir = os.path.dirname(FROZEN_FILE) + for frozenid in frozenids: + # Adding a comment to separate sections here doesn't add much, + # so we don't. + _, frozenfile = frozen[frozenid] + header = os.path.relpath(frozenfile, parentdir) + headerlines.append(f'#include "{header}"') + + deflines = [] + indent = ' ' + lastsection = None + for spec in specs: + frozenid, _, modname, ispkg, section = spec + if section != lastsection: + if lastsection is not None: + deflines.append('') + deflines.append(f'/* {section} */') + lastsection = section + + # This matches what we do in Programs/_freeze_module.c: + name = frozenid.replace('.', '_') + symbol = '_Py_M__' + name + pkg = '-' if ispkg else '' + line = ('{"%s", %s, %s(int)sizeof(%s)},' + % (modname, symbol, pkg, symbol)) + # TODO: Consider not folding lines + if len(line) < 80: + deflines.append(line) + else: + line1, _, line2 = line.rpartition(' ') + deflines.append(line1) + deflines.append(indent + line2) + + if not deflines[0]: + del deflines[0] + for i, line in enumerate(deflines): + if line: + deflines[i] = indent + line + + print(f'# Updating {os.path.relpath(FROZEN_FILE)}') + with updating_file_with_tmpfile(FROZEN_FILE) as (infile, outfile): + lines = infile.readlines() + # TODO: Use more obvious markers, e.g. + # $START GENERATED FOOBAR$ / $END GENERATED FOOBAR$ + lines = replace_block( + lines, + "/* Includes for frozen modules: */", + "/* End includes */", + headerlines, + FROZEN_FILE, + ) + lines = replace_block( + lines, + "static const struct _frozen _PyImport_FrozenModules[] =", + "/* sentinel */", + deflines, + FROZEN_FILE, + ) + outfile.writelines(lines) + + +def regen_makefile(frozenids, frozen): + frozenfiles = [] + rules = [''] + for frozenid in frozenids: + pyfile, frozenfile = frozen[frozenid] + header = os.path.relpath(frozenfile, ROOT_DIR) + relfile = header.replace('\\', '/') + frozenfiles.append(f'\t\t$(srcdir)/{relfile} \\') + + _pyfile = os.path.relpath(pyfile, ROOT_DIR) + tmpfile = f'{header}.new' + # Note that we freeze the module to the target .h file + # instead of going through an intermediate file like we used to. + rules.append(f'{header}: $(srcdir)/Programs/_freeze_module $(srcdir)/{_pyfile}') + rules.append(f'\t$(srcdir)/Programs/_freeze_module {frozenid} \\') + rules.append(f'\t\t$(srcdir)/{_pyfile} \\') + rules.append(f'\t\t$(srcdir)/{header}') + rules.append('') + + frozenfiles[-1] = frozenfiles[-1].rstrip(" \\") + + print(f'# Updating {os.path.relpath(MAKEFILE)}') + with updating_file_with_tmpfile(MAKEFILE) as (infile, outfile): + lines = infile.readlines() + lines = replace_block( + lines, + "FROZEN_FILES =", + "# End FROZEN_FILES", + frozenfiles, + MAKEFILE, + ) + lines = replace_block( + lines, + "# BEGIN: freezing modules", + "# END: freezing modules", + rules, + MAKEFILE, + ) + outfile.writelines(lines) + + +def regen_pcbuild(frozenids, frozen): + projlines = [] + filterlines = [] + for frozenid in frozenids: + pyfile, frozenfile = frozen[frozenid] + + _pyfile = os.path.relpath(pyfile, ROOT_DIR).replace('/', '\\') + header = os.path.relpath(frozenfile, ROOT_DIR).replace('/', '\\') + intfile = header.split('\\')[-1].strip('.h') + '.g.h' + projlines.append(f' <None Include="..\\{_pyfile}">') + projlines.append(f' <ModName>{frozenid}</ModName>') + projlines.append(f' <IntFile>$(IntDir){intfile}</IntFile>') + projlines.append(f' <OutFile>$(PySourcePath){header}</OutFile>') + projlines.append(f' </None>') + + filterlines.append(f' <None Include="..\\{_pyfile}">') + filterlines.append(' <Filter>Python Files</Filter>') + filterlines.append(' </None>') + + print(f'# Updating {os.path.relpath(PCBUILD_PROJECT)}') + with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile): + lines = infile.readlines() + lines = replace_block( + lines, + '<!-- BEGIN frozen modules -->', + '<!-- END frozen modules -->', + projlines, + PCBUILD_PROJECT, + ) + outfile.writelines(lines) + print(f'# Updating {os.path.relpath(PCBUILD_FILTERS)}') + with updating_file_with_tmpfile(PCBUILD_FILTERS) as (infile, outfile): + lines = infile.readlines() + lines = replace_block( + lines, + '<!-- BEGIN frozen modules -->', + '<!-- END frozen modules -->', + filterlines, + PCBUILD_FILTERS, + ) + outfile.writelines(lines) + + +####################################### +# freezing modules + +def freeze_module(modname, pyfile=None, destdir=MODULES_DIR): + """Generate the frozen module .h file for the given module.""" + for modname, pyfile, ispkg in resolve_modules(modname, pyfile): + frozenfile = _resolve_frozen(modname, destdir) + _freeze_module(modname, pyfile, frozenfile) + + +def _freeze_module(frozenid, pyfile, frozenfile): + tmpfile = frozenfile + '.new' + + argv = [TOOL, frozenid, pyfile, tmpfile] + print('#', ' '.join(os.path.relpath(a) for a in argv)) + try: + subprocess.run(argv, check=True) + except subprocess.CalledProcessError: + if not os.path.exists(TOOL): + sys.exit(f'ERROR: missing {TOOL}; you need to run "make regen-frozen"') + raise # re-raise + + os.replace(tmpfile, frozenfile) + + +####################################### +# the script + +def main(): + # Expand the raw specs, preserving order. + specs = list(parse_frozen_specs()) + frozen, frozenids = resolve_frozen_files(specs, MODULES_DIR) + + # Regen build-related files. + regen_frozen(specs, (frozenids, frozen)) + regen_makefile(frozenids, frozen) + regen_pcbuild(frozenids, frozen) + + # Freeze the target modules. + for frozenid in frozenids: + pyfile, frozenfile = frozen[frozenid] + _freeze_module(frozenid, pyfile, frozenfile) + + +if __name__ == '__main__': + argv = sys.argv[1:] + if argv: + sys.exit('ERROR: got unexpected args {argv}') + main() |