diff options
author | Guido van Rossum <guido@python.org> | 2021-11-11 02:01:53 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-11-11 02:01:53 (GMT) |
commit | 1cbaa505d007e11c4a1f0d2073d72b6c02c7147c (patch) | |
tree | 671391d64df20ebcf2960fae83030e61f5527aa3 /Tools | |
parent | fc9b62281931da8d20f85d5ed44cfc24f068d3f4 (diff) | |
download | cpython-1cbaa505d007e11c4a1f0d2073d72b6c02c7147c.zip cpython-1cbaa505d007e11c4a1f0d2073d72b6c02c7147c.tar.gz cpython-1cbaa505d007e11c4a1f0d2073d72b6c02c7147c.tar.bz2 |
bpo-45696: Deep-freeze selected modules (GH-29118)
This gains 10% or more in startup time for `python -c pass` on UNIX-ish systems.
The Makefile.pre.in generating code builds on Eric's work for bpo-45020, but the .c file generator is new.
Windows version TBD.
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/freeze/test/freeze.py | 24 | ||||
-rw-r--r-- | Tools/scripts/deepfreeze.py | 418 | ||||
-rw-r--r-- | Tools/scripts/freeze_modules.py | 66 |
3 files changed, 491 insertions, 17 deletions
diff --git a/Tools/freeze/test/freeze.py b/Tools/freeze/test/freeze.py index 18a5d27..387f1ff 100644 --- a/Tools/freeze/test/freeze.py +++ b/Tools/freeze/test/freeze.py @@ -22,13 +22,23 @@ class UnsupportedError(Exception): def _run_quiet(cmd, cwd=None): #print(f'# {" ".join(shlex.quote(a) for a in cmd)}') - return subprocess.run( - cmd, - cwd=cwd, - capture_output=True, - text=True, - check=True, - ) + try: + return subprocess.run( + cmd, + cwd=cwd, + capture_output=True, + text=True, + check=True, + ) + except subprocess.CalledProcessError as err: + # Don't be quiet if things fail + print(f"{err.__class__.__name__}: {err}") + print("--- STDOUT ---") + print(err.stdout) + print("--- STDERR ---") + print(err.stderr) + print("---- END ----") + raise def _run_stdout(cmd, cwd=None): diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py new file mode 100644 index 0000000..074127f --- /dev/null +++ b/Tools/scripts/deepfreeze.py @@ -0,0 +1,418 @@ +import argparse +import builtins +import collections +import contextlib +import os +import sys +import time +import types +import typing + +verbose = False + + +def make_string_literal(b: bytes) -> str: + res = ['"'] + if b.isascii() and b.decode("ascii").isprintable(): + res.append(b.decode("ascii").replace("\\", "\\\\").replace("\"", "\\\"")) + else: + for i in b: + res.append(f"\\x{i:02x}") + res.append('"') + return "".join(res) + + +CO_FAST_LOCAL = 0x20 +CO_FAST_CELL = 0x40 +CO_FAST_FREE = 0x80 + + +def get_localsplus(code: types.CodeType): + a = collections.defaultdict(int) + for name in code.co_varnames: + a[name] |= CO_FAST_LOCAL + for name in code.co_cellvars: + a[name] |= CO_FAST_CELL + for name in code.co_freevars: + a[name] |= CO_FAST_FREE + return tuple(a.keys()), bytes(a.values()) + + +def get_localsplus_counts(code: types.CodeType, + names: tuple[str, ...], + kinds: bytes) -> tuple[int, int, int, int]: + nlocals = 0 + nplaincellvars = 0 + ncellvars = 0 + nfreevars = 0 + for name, kind in zip(names, kinds, strict=True): + if kind & CO_FAST_LOCAL: + nlocals += 1 + if kind & CO_FAST_CELL: + ncellvars += 1 + elif kind & CO_FAST_CELL: + ncellvars += 1 + nplaincellvars += 1 + elif kind & CO_FAST_FREE: + nfreevars += 1 + assert nlocals == len(code.co_varnames) == code.co_nlocals + assert ncellvars == len(code.co_cellvars) + assert nfreevars == len(code.co_freevars) + assert len(names) == nlocals + nplaincellvars + nfreevars + return nlocals, nplaincellvars, ncellvars, nfreevars + + +PyUnicode_1BYTE_KIND = 1 +PyUnicode_2BYTE_KIND = 2 +PyUnicode_4BYTE_KIND = 4 + + +def analyze_character_width(s: str) -> tuple[int, bool]: + maxchar = ' ' + for c in s: + maxchar = max(maxchar, c) + ascii = False + if maxchar <= '\xFF': + kind = PyUnicode_1BYTE_KIND + ascii = maxchar <= '\x7F' + elif maxchar <= '\uFFFF': + kind = PyUnicode_2BYTE_KIND + else: + kind = PyUnicode_4BYTE_KIND + return kind, ascii + + +class Printer: + + def __init__(self, file: typing.TextIO): + self.level = 0 + self.file = file + self.cache: dict[tuple[type, object], str] = {} + self.hits, self.misses = 0, 0 + self.patchups: list[str] = [] + self.write('#include "Python.h"') + self.write('#include "internal/pycore_gc.h"') + self.write('#include "internal/pycore_code.h"') + self.write("") + + @contextlib.contextmanager + def indent(self) -> None: + save_level = self.level + try: + self.level += 1 + yield + finally: + self.level = save_level + + def write(self, arg: str) -> None: + self.file.writelines((" "*self.level, arg, "\n")) + + @contextlib.contextmanager + def block(self, prefix: str, suffix: str = "") -> None: + self.write(prefix + " {") + with self.indent(): + yield + self.write("}" + suffix) + + def object_head(self, typename: str) -> None: + with self.block(".ob_base =", ","): + self.write(f".ob_refcnt = 999999999,") + self.write(f".ob_type = &{typename},") + + def object_var_head(self, typename: str, size: int) -> None: + with self.block(".ob_base =", ","): + self.object_head(typename) + self.write(f".ob_size = {size},") + + def field(self, obj: object, name: str) -> None: + self.write(f".{name} = {getattr(obj, name)},") + + def generate_bytes(self, name: str, b: bytes) -> str: + self.write("static") + with self.indent(): + with self.block("struct"): + self.write("PyObject_VAR_HEAD") + self.write("Py_hash_t ob_shash;") + self.write(f"char ob_sval[{len(b) + 1}];") + with self.block(f"{name} =", ";"): + self.object_var_head("PyBytes_Type", len(b)) + self.write(".ob_shash = -1,") + self.write(f".ob_sval = {make_string_literal(b)},") + return f"& {name}.ob_base.ob_base" + + def generate_unicode(self, name: str, s: str) -> str: + kind, ascii = analyze_character_width(s) + if kind == PyUnicode_1BYTE_KIND: + datatype = "uint8_t" + elif kind == PyUnicode_2BYTE_KIND: + datatype = "uint16_t" + else: + datatype = "uint32_t" + self.write("static") + with self.indent(): + with self.block("struct"): + if ascii: + self.write("PyASCIIObject _ascii;") + else: + self.write("PyCompactUnicodeObject _compact;") + self.write(f"{datatype} _data[{len(s)+1}];") + with self.block(f"{name} =", ";"): + if ascii: + with self.block("._ascii =", ","): + self.object_head("PyUnicode_Type") + self.write(f".length = {len(s)},") + self.write(".hash = -1,") + with self.block(".state =", ","): + self.write(".kind = 1,") + self.write(".compact = 1,") + self.write(".ascii = 1,") + self.write(".ready = 1,") + self.write(f"._data = {make_string_literal(s.encode('ascii'))},") + return f"& {name}._ascii.ob_base" + else: + with self.block("._compact =", ","): + with self.block("._base =", ","): + self.object_head("PyUnicode_Type") + self.write(f".length = {len(s)},") + self.write(".hash = -1,") + with self.block(".state =", ","): + self.write(f".kind = {kind},") + self.write(".compact = 1,") + self.write(".ascii = 0,") + self.write(".ready = 1,") + with self.block(f"._data =", ","): + for i in range(0, len(s), 16): + data = s[i:i+16] + self.write(", ".join(map(str, map(ord, data))) + ",") + if kind == PyUnicode_2BYTE_KIND: + self.patchups.append("if (sizeof(wchar_t) == 2) {") + self.patchups.append(f" {name}._compact._base.wstr = (wchar_t *) {name}._data;") + self.patchups.append(f" {name}._compact.wstr_length = {len(s)};") + self.patchups.append("}") + if kind == PyUnicode_4BYTE_KIND: + self.patchups.append("if (sizeof(wchar_t) == 4) {") + self.patchups.append(f" {name}._compact._base.wstr = (wchar_t *) {name}._data;") + self.patchups.append(f" {name}._compact.wstr_length = {len(s)};") + self.patchups.append("}") + return f"& {name}._compact._base.ob_base" + + + def generate_code(self, name: str, code: types.CodeType) -> str: + # The ordering here matches PyCode_NewWithPosOnlyArgs() + # (but see below). + co_code = self.generate(name + "_code", code.co_code) + co_consts = self.generate(name + "_consts", code.co_consts) + co_names = self.generate(name + "_names", code.co_names) + co_varnames = self.generate(name + "_varnames", code.co_varnames) + co_freevars = self.generate(name + "_freevars", code.co_freevars) + co_cellvars = self.generate(name + "_cellvars", code.co_cellvars) + co_filename = self.generate(name + "_filename", code.co_filename) + co_name = self.generate(name + "_name", code.co_name) + co_qualname = self.generate(name + "_qualname", code.co_qualname) + co_linetable = self.generate(name + "_linetable", code.co_linetable) + co_endlinetable = self.generate(name + "_endlinetable", code.co_endlinetable) + co_columntable = self.generate(name + "_columntable", code.co_columntable) + co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable) + # These fields are not directly accessible + localsplusnames, localspluskinds = get_localsplus(code) + co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames) + co_localspluskinds = self.generate(name + "_localspluskinds", localspluskinds) + # Derived values + nlocals, nplaincellvars, ncellvars, nfreevars = \ + get_localsplus_counts(code, localsplusnames, localspluskinds) + with self.block(f"static struct PyCodeObject {name} =", ";"): + self.object_head("PyCode_Type") + # But the ordering here must match that in cpython/code.h + # (which is a pain because we tend to reorder those for perf) + # otherwise MSVC doesn't like it. + self.write(f".co_consts = {co_consts},") + self.write(f".co_names = {co_names},") + self.write(f".co_firstinstr = (_Py_CODEUNIT *) {co_code.removesuffix('.ob_base.ob_base')}.ob_sval,") + self.write(f".co_exceptiontable = {co_exceptiontable},") + self.field(code, "co_flags") + self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,") + self.field(code, "co_argcount") + self.field(code, "co_posonlyargcount") + self.field(code, "co_kwonlyargcount") + self.field(code, "co_stacksize") + self.field(code, "co_firstlineno") + self.write(f".co_code = {co_code},") + self.write(f".co_localsplusnames = {co_localsplusnames},") + self.write(f".co_localspluskinds = {co_localspluskinds},") + self.write(f".co_filename = {co_filename},") + self.write(f".co_name = {co_name},") + self.write(f".co_qualname = {co_qualname},") + self.write(f".co_linetable = {co_linetable},") + self.write(f".co_endlinetable = {co_endlinetable},") + self.write(f".co_columntable = {co_columntable},") + self.write(f".co_nlocalsplus = {len(localsplusnames)},") + self.field(code, "co_nlocals") + self.write(f".co_nplaincellvars = {nplaincellvars},") + self.write(f".co_ncellvars = {ncellvars},") + self.write(f".co_nfreevars = {nfreevars},") + self.write(f".co_varnames = {co_varnames},") + self.write(f".co_cellvars = {co_cellvars},") + self.write(f".co_freevars = {co_freevars},") + return f"& {name}.ob_base" + + def generate_tuple(self, name: str, t: tuple[object, ...]) -> str: + items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)] + self.write("static") + with self.indent(): + with self.block("struct"): + self.write("PyGC_Head _gc_head;") + with self.block("struct", "_object;"): + self.write("PyObject_VAR_HEAD") + if t: + self.write(f"PyObject *ob_item[{len(t)}];") + with self.block(f"{name} =", ";"): + with self.block("._object =", ","): + self.object_var_head("PyTuple_Type", len(t)) + if items: + with self.block(f".ob_item =", ","): + for item in items: + self.write(item + ",") + return f"& {name}._object.ob_base.ob_base" + + def generate_int(self, name: str, i: int) -> str: + maxint = sys.maxsize + if maxint == 2**31 - 1: + digit = 2**15 + elif maxint == 2**63 - 1: + digit = 2**30 + else: + assert False, f"What int size is this system?!? {maxint=}" + sign = -1 if i < 0 else 0 if i == 0 else +1 + i = abs(i) + digits: list[int] = [] + while i: + i, rem = divmod(i, digit) + digits.append(rem) + self.write("static") + with self.indent(): + with self.block("struct"): + self.write("PyObject_VAR_HEAD") + self.write(f"digit ob_digit[{max(1, len(digits))}];") + with self.block(f"{name} =", ";"): + self.object_var_head("PyLong_Type", sign*len(digits)) + if digits: + ds = ", ".join(map(str, digits)) + self.write(f".ob_digit = {{ {ds} }},") + return f"& {name}.ob_base.ob_base" + + def generate_float(self, name: str, x: float) -> str: + with self.block(f"static PyFloatObject {name} =", ";"): + self.object_head("PyFloat_Type") + self.write(f".ob_fval = {x},") + return f"&{name}.ob_base" + + def generate_complex(self, name: str, z: complex) -> str: + with self.block(f"static PyComplexObject {name} =", ";"): + self.object_head("PyComplex_Type") + self.write(f".cval = {{ {z.real}, {z.imag} }},") + return f"&{name}.ob_base" + + def generate_frozenset(self, name: str, fs: frozenset[object]) -> str: + ret = self.generate_tuple(name, tuple(sorted(fs))) + self.write("// TODO: The above tuple should be a frozenset") + return ret + + def generate(self, name: str, obj: object) -> str: + # Use repr() in the key to distinguish -0.0 from +0.0 + key = (type(obj), obj, repr(obj)) + if key in self.cache: + self.hits += 1 + # print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}") + return self.cache[key] + self.misses += 1 + match obj: + case types.CodeType() as code: + val = self.generate_code(name, code) + case tuple(t): + val = self.generate_tuple(name, t) + case str(s): + val = self.generate_unicode(name, s) + case bytes(b): + val = self.generate_bytes(name, b) + case True: + return "Py_True" + case False: + return "Py_False" + case int(i): + val = self.generate_int(name, i) + case float(x): + val = self.generate_float(name, x) + case complex() as z: + val = self.generate_complex(name, z) + case frozenset(fs): + val = self.generate_frozenset(name, fs) + case builtins.Ellipsis: + return "Py_Ellipsis" + case None: + return "Py_None" + case _: + raise TypeError( + f"Cannot generate code for {type(obj).__name__} object") + # print(f"Cache store {key!r:.40}: {val!r:.40}") + self.cache[key] = val + return val + + +EPILOGUE = """ +PyObject * +_Py_get_%%NAME%%_toplevel(void) +{ + do_patchups(); + return (PyObject *) &toplevel; +} +""" + +def generate(source: str, filename: str, modname: str, file: typing.TextIO) -> None: + code = compile(source, filename, "exec") + printer = Printer(file) + printer.generate("toplevel", code) + printer.write("") + with printer.block("static void do_patchups()"): + for p in printer.patchups: + printer.write(p) + here = os.path.dirname(__file__) + printer.write(EPILOGUE.replace("%%NAME%%", modname.replace(".", "_"))) + if verbose: + print(f"Cache hits: {printer.hits}, misses: {printer.misses}") + + +parser = argparse.ArgumentParser() +parser.add_argument("-m", "--module", help="Defaults to basename(file)") +parser.add_argument("-o", "--output", help="Defaults to MODULE.c") +parser.add_argument("-v", "--verbose", action="store_true", help="Print diagnostics") +parser.add_argument("file", help="Input file (required)") + + +@contextlib.contextmanager +def report_time(label: str): + t0 = time.time() + try: + yield + finally: + t1 = time.time() + if verbose: + print(f"{label}: {t1-t0:.3f} sec") + + +def main() -> None: + global verbose + args = parser.parse_args() + verbose = args.verbose + with open(args.file, encoding="utf-8") as f: + source = f.read() + modname = args.module or os.path.basename(args.file).removesuffix(".py") + output = args.output or modname + ".c" + with open(output, "w", encoding="utf-8") as file: + with report_time("generate"): + generate(source, f"<frozen {modname}>", modname, file) + if verbose: + print(f"Wrote {os.path.getsize(output)} bytes to {output}") + + +if __name__ == "__main__": + main() diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py index 3614262..ccea4e1 100644 --- a/Tools/scripts/freeze_modules.py +++ b/Tools/scripts/freeze_modules.py @@ -528,6 +528,7 @@ def regen_frozen(modules): header = relpath_for_posix_display(src.frozenfile, parentdir) headerlines.append(f'#include "{header}"') + externlines = [] bootstraplines = [] stdliblines = [] testlines = [] @@ -547,17 +548,18 @@ def regen_frozen(modules): lines.append(f'/* {mod.section} */') lastsection = mod.section + # Also add a extern declaration for the corresponding + # deepfreeze-generated function. + orig_name = mod.source.id + code_name = orig_name.replace(".", "_") + get_code_name = "_Py_get_%s_toplevel" % code_name + externlines.append("extern PyObject *%s(void);" % get_code_name) + symbol = mod.symbol pkg = '-' if mod.ispkg else '' - line = ('{"%s", %s, %s(int)sizeof(%s)},' - ) % (mod.name, symbol, pkg, symbol) - # TODO: Consider not folding lines - if len(line) < 80: - lines.append(line) - else: - line1, _, line2 = line.rpartition(' ') - lines.append(line1) - lines.append(indent + line2) + line = ('{"%s", %s, %s(int)sizeof(%s), GET_CODE(%s)},' + ) % (mod.name, symbol, pkg, symbol, code_name) + lines.append(line) if mod.isalias: if not mod.orig: @@ -590,6 +592,13 @@ def regen_frozen(modules): ) lines = replace_block( lines, + "/* Start extern declarations */", + "/* End extern declarations */", + externlines, + FROZEN_FILE, + ) + lines = replace_block( + lines, "static const struct _frozen bootstrap_modules[] =", "/* bootstrap sentinel */", bootstraplines, @@ -622,7 +631,30 @@ def regen_frozen(modules): def regen_makefile(modules): pyfiles = [] frozenfiles = [] + deepfreezefiles = [] rules = [''] + deepfreezerules = [''] + + # TODO: Merge the two loops + for src in _iter_sources(modules): + header = relpath_for_posix_display(src.frozenfile, ROOT_DIR) + relfile = header.replace('\\', '/') + _pyfile = relpath_for_posix_display(src.pyfile, ROOT_DIR) + + # TODO: This is a bit hackish + xfile = relfile.replace("/frozen_modules/", "/deepfreeze/") + cfile = xfile[:-2] + ".c" + ofile = xfile[:-2] + ".o" + deepfreezefiles.append(f"\t\t{ofile} \\") + + # Also add a deepfreeze rule. + deepfreezerules.append(f'{cfile}: $(srcdir)/{_pyfile} $(DEEPFREEZE_DEPS)') + deepfreezerules.append(f'\t@echo "Deepfreezing {cfile} from {_pyfile}"') + deepfreezerules.append(f"\t@./$(BOOTSTRAP) \\") + deepfreezerules.append(f"\t\t$(srcdir)/Tools/scripts/deepfreeze.py \\") + deepfreezerules.append(f"\t\t$(srcdir)/{_pyfile} -m {src.frozenid} -o {cfile}") + deepfreezerules.append('') + for src in _iter_sources(modules): header = relpath_for_posix_display(src.frozenfile, ROOT_DIR) frozenfiles.append(f'\t\t{header} \\') @@ -639,6 +671,7 @@ def regen_makefile(modules): ]) pyfiles[-1] = pyfiles[-1].rstrip(" \\") frozenfiles[-1] = frozenfiles[-1].rstrip(" \\") + deepfreezefiles[-1] = deepfreezefiles[-1].rstrip(" \\") print(f'# Updating {os.path.relpath(MAKEFILE)}') with updating_file_with_tmpfile(MAKEFILE) as (infile, outfile): @@ -659,11 +692,25 @@ def regen_makefile(modules): ) lines = replace_block( lines, + "DEEPFREEZE_OBJS =", + "# End DEEPFREEZE_OBJS", + deepfreezefiles, + MAKEFILE, + ) + lines = replace_block( + lines, "# BEGIN: freezing modules", "# END: freezing modules", rules, MAKEFILE, ) + lines = replace_block( + lines, + "# BEGIN: deepfreeze modules", + "# END: deepfreeze modules", + deepfreezerules, + MAKEFILE, + ) outfile.writelines(lines) @@ -721,7 +768,6 @@ def freeze_module(modname, pyfile=None, destdir=MODULES_DIR): def _freeze_module(frozenid, pyfile, frozenfile, tmpsuffix): tmpfile = f'{frozenfile}.{int(time.time())}' - print(tmpfile) argv = [TOOL, frozenid, pyfile, tmpfile] print('#', ' '.join(os.path.relpath(a) for a in argv), flush=True) |