summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2021-11-11 02:01:53 (GMT)
committerGitHub <noreply@github.com>2021-11-11 02:01:53 (GMT)
commit1cbaa505d007e11c4a1f0d2073d72b6c02c7147c (patch)
tree671391d64df20ebcf2960fae83030e61f5527aa3 /Tools
parentfc9b62281931da8d20f85d5ed44cfc24f068d3f4 (diff)
downloadcpython-1cbaa505d007e11c4a1f0d2073d72b6c02c7147c.zip
cpython-1cbaa505d007e11c4a1f0d2073d72b6c02c7147c.tar.gz
cpython-1cbaa505d007e11c4a1f0d2073d72b6c02c7147c.tar.bz2
bpo-45696: Deep-freeze selected modules (GH-29118)
This gains 10% or more in startup time for `python -c pass` on UNIX-ish systems. The Makefile.pre.in generating code builds on Eric's work for bpo-45020, but the .c file generator is new. Windows version TBD.
Diffstat (limited to 'Tools')
-rw-r--r--Tools/freeze/test/freeze.py24
-rw-r--r--Tools/scripts/deepfreeze.py418
-rw-r--r--Tools/scripts/freeze_modules.py66
3 files changed, 491 insertions, 17 deletions
diff --git a/Tools/freeze/test/freeze.py b/Tools/freeze/test/freeze.py
index 18a5d27..387f1ff 100644
--- a/Tools/freeze/test/freeze.py
+++ b/Tools/freeze/test/freeze.py
@@ -22,13 +22,23 @@ class UnsupportedError(Exception):
def _run_quiet(cmd, cwd=None):
#print(f'# {" ".join(shlex.quote(a) for a in cmd)}')
- return subprocess.run(
- cmd,
- cwd=cwd,
- capture_output=True,
- text=True,
- check=True,
- )
+ try:
+ return subprocess.run(
+ cmd,
+ cwd=cwd,
+ capture_output=True,
+ text=True,
+ check=True,
+ )
+ except subprocess.CalledProcessError as err:
+ # Don't be quiet if things fail
+ print(f"{err.__class__.__name__}: {err}")
+ print("--- STDOUT ---")
+ print(err.stdout)
+ print("--- STDERR ---")
+ print(err.stderr)
+ print("---- END ----")
+ raise
def _run_stdout(cmd, cwd=None):
diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py
new file mode 100644
index 0000000..074127f
--- /dev/null
+++ b/Tools/scripts/deepfreeze.py
@@ -0,0 +1,418 @@
+import argparse
+import builtins
+import collections
+import contextlib
+import os
+import sys
+import time
+import types
+import typing
+
+verbose = False
+
+
+def make_string_literal(b: bytes) -> str:
+ res = ['"']
+ if b.isascii() and b.decode("ascii").isprintable():
+ res.append(b.decode("ascii").replace("\\", "\\\\").replace("\"", "\\\""))
+ else:
+ for i in b:
+ res.append(f"\\x{i:02x}")
+ res.append('"')
+ return "".join(res)
+
+
+CO_FAST_LOCAL = 0x20
+CO_FAST_CELL = 0x40
+CO_FAST_FREE = 0x80
+
+
+def get_localsplus(code: types.CodeType):
+ a = collections.defaultdict(int)
+ for name in code.co_varnames:
+ a[name] |= CO_FAST_LOCAL
+ for name in code.co_cellvars:
+ a[name] |= CO_FAST_CELL
+ for name in code.co_freevars:
+ a[name] |= CO_FAST_FREE
+ return tuple(a.keys()), bytes(a.values())
+
+
+def get_localsplus_counts(code: types.CodeType,
+ names: tuple[str, ...],
+ kinds: bytes) -> tuple[int, int, int, int]:
+ nlocals = 0
+ nplaincellvars = 0
+ ncellvars = 0
+ nfreevars = 0
+ for name, kind in zip(names, kinds, strict=True):
+ if kind & CO_FAST_LOCAL:
+ nlocals += 1
+ if kind & CO_FAST_CELL:
+ ncellvars += 1
+ elif kind & CO_FAST_CELL:
+ ncellvars += 1
+ nplaincellvars += 1
+ elif kind & CO_FAST_FREE:
+ nfreevars += 1
+ assert nlocals == len(code.co_varnames) == code.co_nlocals
+ assert ncellvars == len(code.co_cellvars)
+ assert nfreevars == len(code.co_freevars)
+ assert len(names) == nlocals + nplaincellvars + nfreevars
+ return nlocals, nplaincellvars, ncellvars, nfreevars
+
+
+PyUnicode_1BYTE_KIND = 1
+PyUnicode_2BYTE_KIND = 2
+PyUnicode_4BYTE_KIND = 4
+
+
+def analyze_character_width(s: str) -> tuple[int, bool]:
+ maxchar = ' '
+ for c in s:
+ maxchar = max(maxchar, c)
+ ascii = False
+ if maxchar <= '\xFF':
+ kind = PyUnicode_1BYTE_KIND
+ ascii = maxchar <= '\x7F'
+ elif maxchar <= '\uFFFF':
+ kind = PyUnicode_2BYTE_KIND
+ else:
+ kind = PyUnicode_4BYTE_KIND
+ return kind, ascii
+
+
+class Printer:
+
+ def __init__(self, file: typing.TextIO):
+ self.level = 0
+ self.file = file
+ self.cache: dict[tuple[type, object], str] = {}
+ self.hits, self.misses = 0, 0
+ self.patchups: list[str] = []
+ self.write('#include "Python.h"')
+ self.write('#include "internal/pycore_gc.h"')
+ self.write('#include "internal/pycore_code.h"')
+ self.write("")
+
+ @contextlib.contextmanager
+ def indent(self) -> None:
+ save_level = self.level
+ try:
+ self.level += 1
+ yield
+ finally:
+ self.level = save_level
+
+ def write(self, arg: str) -> None:
+ self.file.writelines((" "*self.level, arg, "\n"))
+
+ @contextlib.contextmanager
+ def block(self, prefix: str, suffix: str = "") -> None:
+ self.write(prefix + " {")
+ with self.indent():
+ yield
+ self.write("}" + suffix)
+
+ def object_head(self, typename: str) -> None:
+ with self.block(".ob_base =", ","):
+ self.write(f".ob_refcnt = 999999999,")
+ self.write(f".ob_type = &{typename},")
+
+ def object_var_head(self, typename: str, size: int) -> None:
+ with self.block(".ob_base =", ","):
+ self.object_head(typename)
+ self.write(f".ob_size = {size},")
+
+ def field(self, obj: object, name: str) -> None:
+ self.write(f".{name} = {getattr(obj, name)},")
+
+ def generate_bytes(self, name: str, b: bytes) -> str:
+ self.write("static")
+ with self.indent():
+ with self.block("struct"):
+ self.write("PyObject_VAR_HEAD")
+ self.write("Py_hash_t ob_shash;")
+ self.write(f"char ob_sval[{len(b) + 1}];")
+ with self.block(f"{name} =", ";"):
+ self.object_var_head("PyBytes_Type", len(b))
+ self.write(".ob_shash = -1,")
+ self.write(f".ob_sval = {make_string_literal(b)},")
+ return f"& {name}.ob_base.ob_base"
+
+ def generate_unicode(self, name: str, s: str) -> str:
+ kind, ascii = analyze_character_width(s)
+ if kind == PyUnicode_1BYTE_KIND:
+ datatype = "uint8_t"
+ elif kind == PyUnicode_2BYTE_KIND:
+ datatype = "uint16_t"
+ else:
+ datatype = "uint32_t"
+ self.write("static")
+ with self.indent():
+ with self.block("struct"):
+ if ascii:
+ self.write("PyASCIIObject _ascii;")
+ else:
+ self.write("PyCompactUnicodeObject _compact;")
+ self.write(f"{datatype} _data[{len(s)+1}];")
+ with self.block(f"{name} =", ";"):
+ if ascii:
+ with self.block("._ascii =", ","):
+ self.object_head("PyUnicode_Type")
+ self.write(f".length = {len(s)},")
+ self.write(".hash = -1,")
+ with self.block(".state =", ","):
+ self.write(".kind = 1,")
+ self.write(".compact = 1,")
+ self.write(".ascii = 1,")
+ self.write(".ready = 1,")
+ self.write(f"._data = {make_string_literal(s.encode('ascii'))},")
+ return f"& {name}._ascii.ob_base"
+ else:
+ with self.block("._compact =", ","):
+ with self.block("._base =", ","):
+ self.object_head("PyUnicode_Type")
+ self.write(f".length = {len(s)},")
+ self.write(".hash = -1,")
+ with self.block(".state =", ","):
+ self.write(f".kind = {kind},")
+ self.write(".compact = 1,")
+ self.write(".ascii = 0,")
+ self.write(".ready = 1,")
+ with self.block(f"._data =", ","):
+ for i in range(0, len(s), 16):
+ data = s[i:i+16]
+ self.write(", ".join(map(str, map(ord, data))) + ",")
+ if kind == PyUnicode_2BYTE_KIND:
+ self.patchups.append("if (sizeof(wchar_t) == 2) {")
+ self.patchups.append(f" {name}._compact._base.wstr = (wchar_t *) {name}._data;")
+ self.patchups.append(f" {name}._compact.wstr_length = {len(s)};")
+ self.patchups.append("}")
+ if kind == PyUnicode_4BYTE_KIND:
+ self.patchups.append("if (sizeof(wchar_t) == 4) {")
+ self.patchups.append(f" {name}._compact._base.wstr = (wchar_t *) {name}._data;")
+ self.patchups.append(f" {name}._compact.wstr_length = {len(s)};")
+ self.patchups.append("}")
+ return f"& {name}._compact._base.ob_base"
+
+
+ def generate_code(self, name: str, code: types.CodeType) -> str:
+ # The ordering here matches PyCode_NewWithPosOnlyArgs()
+ # (but see below).
+ co_code = self.generate(name + "_code", code.co_code)
+ co_consts = self.generate(name + "_consts", code.co_consts)
+ co_names = self.generate(name + "_names", code.co_names)
+ co_varnames = self.generate(name + "_varnames", code.co_varnames)
+ co_freevars = self.generate(name + "_freevars", code.co_freevars)
+ co_cellvars = self.generate(name + "_cellvars", code.co_cellvars)
+ co_filename = self.generate(name + "_filename", code.co_filename)
+ co_name = self.generate(name + "_name", code.co_name)
+ co_qualname = self.generate(name + "_qualname", code.co_qualname)
+ co_linetable = self.generate(name + "_linetable", code.co_linetable)
+ co_endlinetable = self.generate(name + "_endlinetable", code.co_endlinetable)
+ co_columntable = self.generate(name + "_columntable", code.co_columntable)
+ co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable)
+ # These fields are not directly accessible
+ localsplusnames, localspluskinds = get_localsplus(code)
+ co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames)
+ co_localspluskinds = self.generate(name + "_localspluskinds", localspluskinds)
+ # Derived values
+ nlocals, nplaincellvars, ncellvars, nfreevars = \
+ get_localsplus_counts(code, localsplusnames, localspluskinds)
+ with self.block(f"static struct PyCodeObject {name} =", ";"):
+ self.object_head("PyCode_Type")
+ # But the ordering here must match that in cpython/code.h
+ # (which is a pain because we tend to reorder those for perf)
+ # otherwise MSVC doesn't like it.
+ self.write(f".co_consts = {co_consts},")
+ self.write(f".co_names = {co_names},")
+ self.write(f".co_firstinstr = (_Py_CODEUNIT *) {co_code.removesuffix('.ob_base.ob_base')}.ob_sval,")
+ self.write(f".co_exceptiontable = {co_exceptiontable},")
+ self.field(code, "co_flags")
+ self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,")
+ self.field(code, "co_argcount")
+ self.field(code, "co_posonlyargcount")
+ self.field(code, "co_kwonlyargcount")
+ self.field(code, "co_stacksize")
+ self.field(code, "co_firstlineno")
+ self.write(f".co_code = {co_code},")
+ self.write(f".co_localsplusnames = {co_localsplusnames},")
+ self.write(f".co_localspluskinds = {co_localspluskinds},")
+ self.write(f".co_filename = {co_filename},")
+ self.write(f".co_name = {co_name},")
+ self.write(f".co_qualname = {co_qualname},")
+ self.write(f".co_linetable = {co_linetable},")
+ self.write(f".co_endlinetable = {co_endlinetable},")
+ self.write(f".co_columntable = {co_columntable},")
+ self.write(f".co_nlocalsplus = {len(localsplusnames)},")
+ self.field(code, "co_nlocals")
+ self.write(f".co_nplaincellvars = {nplaincellvars},")
+ self.write(f".co_ncellvars = {ncellvars},")
+ self.write(f".co_nfreevars = {nfreevars},")
+ self.write(f".co_varnames = {co_varnames},")
+ self.write(f".co_cellvars = {co_cellvars},")
+ self.write(f".co_freevars = {co_freevars},")
+ return f"& {name}.ob_base"
+
+ def generate_tuple(self, name: str, t: tuple[object, ...]) -> str:
+ items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)]
+ self.write("static")
+ with self.indent():
+ with self.block("struct"):
+ self.write("PyGC_Head _gc_head;")
+ with self.block("struct", "_object;"):
+ self.write("PyObject_VAR_HEAD")
+ if t:
+ self.write(f"PyObject *ob_item[{len(t)}];")
+ with self.block(f"{name} =", ";"):
+ with self.block("._object =", ","):
+ self.object_var_head("PyTuple_Type", len(t))
+ if items:
+ with self.block(f".ob_item =", ","):
+ for item in items:
+ self.write(item + ",")
+ return f"& {name}._object.ob_base.ob_base"
+
+ def generate_int(self, name: str, i: int) -> str:
+ maxint = sys.maxsize
+ if maxint == 2**31 - 1:
+ digit = 2**15
+ elif maxint == 2**63 - 1:
+ digit = 2**30
+ else:
+ assert False, f"What int size is this system?!? {maxint=}"
+ sign = -1 if i < 0 else 0 if i == 0 else +1
+ i = abs(i)
+ digits: list[int] = []
+ while i:
+ i, rem = divmod(i, digit)
+ digits.append(rem)
+ self.write("static")
+ with self.indent():
+ with self.block("struct"):
+ self.write("PyObject_VAR_HEAD")
+ self.write(f"digit ob_digit[{max(1, len(digits))}];")
+ with self.block(f"{name} =", ";"):
+ self.object_var_head("PyLong_Type", sign*len(digits))
+ if digits:
+ ds = ", ".join(map(str, digits))
+ self.write(f".ob_digit = {{ {ds} }},")
+ return f"& {name}.ob_base.ob_base"
+
+ def generate_float(self, name: str, x: float) -> str:
+ with self.block(f"static PyFloatObject {name} =", ";"):
+ self.object_head("PyFloat_Type")
+ self.write(f".ob_fval = {x},")
+ return f"&{name}.ob_base"
+
+ def generate_complex(self, name: str, z: complex) -> str:
+ with self.block(f"static PyComplexObject {name} =", ";"):
+ self.object_head("PyComplex_Type")
+ self.write(f".cval = {{ {z.real}, {z.imag} }},")
+ return f"&{name}.ob_base"
+
+ def generate_frozenset(self, name: str, fs: frozenset[object]) -> str:
+ ret = self.generate_tuple(name, tuple(sorted(fs)))
+ self.write("// TODO: The above tuple should be a frozenset")
+ return ret
+
+ def generate(self, name: str, obj: object) -> str:
+ # Use repr() in the key to distinguish -0.0 from +0.0
+ key = (type(obj), obj, repr(obj))
+ if key in self.cache:
+ self.hits += 1
+ # print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}")
+ return self.cache[key]
+ self.misses += 1
+ match obj:
+ case types.CodeType() as code:
+ val = self.generate_code(name, code)
+ case tuple(t):
+ val = self.generate_tuple(name, t)
+ case str(s):
+ val = self.generate_unicode(name, s)
+ case bytes(b):
+ val = self.generate_bytes(name, b)
+ case True:
+ return "Py_True"
+ case False:
+ return "Py_False"
+ case int(i):
+ val = self.generate_int(name, i)
+ case float(x):
+ val = self.generate_float(name, x)
+ case complex() as z:
+ val = self.generate_complex(name, z)
+ case frozenset(fs):
+ val = self.generate_frozenset(name, fs)
+ case builtins.Ellipsis:
+ return "Py_Ellipsis"
+ case None:
+ return "Py_None"
+ case _:
+ raise TypeError(
+ f"Cannot generate code for {type(obj).__name__} object")
+ # print(f"Cache store {key!r:.40}: {val!r:.40}")
+ self.cache[key] = val
+ return val
+
+
+EPILOGUE = """
+PyObject *
+_Py_get_%%NAME%%_toplevel(void)
+{
+ do_patchups();
+ return (PyObject *) &toplevel;
+}
+"""
+
+def generate(source: str, filename: str, modname: str, file: typing.TextIO) -> None:
+ code = compile(source, filename, "exec")
+ printer = Printer(file)
+ printer.generate("toplevel", code)
+ printer.write("")
+ with printer.block("static void do_patchups()"):
+ for p in printer.patchups:
+ printer.write(p)
+ here = os.path.dirname(__file__)
+ printer.write(EPILOGUE.replace("%%NAME%%", modname.replace(".", "_")))
+ if verbose:
+ print(f"Cache hits: {printer.hits}, misses: {printer.misses}")
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("-m", "--module", help="Defaults to basename(file)")
+parser.add_argument("-o", "--output", help="Defaults to MODULE.c")
+parser.add_argument("-v", "--verbose", action="store_true", help="Print diagnostics")
+parser.add_argument("file", help="Input file (required)")
+
+
+@contextlib.contextmanager
+def report_time(label: str):
+ t0 = time.time()
+ try:
+ yield
+ finally:
+ t1 = time.time()
+ if verbose:
+ print(f"{label}: {t1-t0:.3f} sec")
+
+
+def main() -> None:
+ global verbose
+ args = parser.parse_args()
+ verbose = args.verbose
+ with open(args.file, encoding="utf-8") as f:
+ source = f.read()
+ modname = args.module or os.path.basename(args.file).removesuffix(".py")
+ output = args.output or modname + ".c"
+ with open(output, "w", encoding="utf-8") as file:
+ with report_time("generate"):
+ generate(source, f"<frozen {modname}>", modname, file)
+ if verbose:
+ print(f"Wrote {os.path.getsize(output)} bytes to {output}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py
index 3614262..ccea4e1 100644
--- a/Tools/scripts/freeze_modules.py
+++ b/Tools/scripts/freeze_modules.py
@@ -528,6 +528,7 @@ def regen_frozen(modules):
header = relpath_for_posix_display(src.frozenfile, parentdir)
headerlines.append(f'#include "{header}"')
+ externlines = []
bootstraplines = []
stdliblines = []
testlines = []
@@ -547,17 +548,18 @@ def regen_frozen(modules):
lines.append(f'/* {mod.section} */')
lastsection = mod.section
+ # Also add a extern declaration for the corresponding
+ # deepfreeze-generated function.
+ orig_name = mod.source.id
+ code_name = orig_name.replace(".", "_")
+ get_code_name = "_Py_get_%s_toplevel" % code_name
+ externlines.append("extern PyObject *%s(void);" % get_code_name)
+
symbol = mod.symbol
pkg = '-' if mod.ispkg else ''
- line = ('{"%s", %s, %s(int)sizeof(%s)},'
- ) % (mod.name, symbol, pkg, symbol)
- # TODO: Consider not folding lines
- if len(line) < 80:
- lines.append(line)
- else:
- line1, _, line2 = line.rpartition(' ')
- lines.append(line1)
- lines.append(indent + line2)
+ line = ('{"%s", %s, %s(int)sizeof(%s), GET_CODE(%s)},'
+ ) % (mod.name, symbol, pkg, symbol, code_name)
+ lines.append(line)
if mod.isalias:
if not mod.orig:
@@ -590,6 +592,13 @@ def regen_frozen(modules):
)
lines = replace_block(
lines,
+ "/* Start extern declarations */",
+ "/* End extern declarations */",
+ externlines,
+ FROZEN_FILE,
+ )
+ lines = replace_block(
+ lines,
"static const struct _frozen bootstrap_modules[] =",
"/* bootstrap sentinel */",
bootstraplines,
@@ -622,7 +631,30 @@ def regen_frozen(modules):
def regen_makefile(modules):
pyfiles = []
frozenfiles = []
+ deepfreezefiles = []
rules = ['']
+ deepfreezerules = ['']
+
+ # TODO: Merge the two loops
+ for src in _iter_sources(modules):
+ header = relpath_for_posix_display(src.frozenfile, ROOT_DIR)
+ relfile = header.replace('\\', '/')
+ _pyfile = relpath_for_posix_display(src.pyfile, ROOT_DIR)
+
+ # TODO: This is a bit hackish
+ xfile = relfile.replace("/frozen_modules/", "/deepfreeze/")
+ cfile = xfile[:-2] + ".c"
+ ofile = xfile[:-2] + ".o"
+ deepfreezefiles.append(f"\t\t{ofile} \\")
+
+ # Also add a deepfreeze rule.
+ deepfreezerules.append(f'{cfile}: $(srcdir)/{_pyfile} $(DEEPFREEZE_DEPS)')
+ deepfreezerules.append(f'\t@echo "Deepfreezing {cfile} from {_pyfile}"')
+ deepfreezerules.append(f"\t@./$(BOOTSTRAP) \\")
+ deepfreezerules.append(f"\t\t$(srcdir)/Tools/scripts/deepfreeze.py \\")
+ deepfreezerules.append(f"\t\t$(srcdir)/{_pyfile} -m {src.frozenid} -o {cfile}")
+ deepfreezerules.append('')
+
for src in _iter_sources(modules):
header = relpath_for_posix_display(src.frozenfile, ROOT_DIR)
frozenfiles.append(f'\t\t{header} \\')
@@ -639,6 +671,7 @@ def regen_makefile(modules):
])
pyfiles[-1] = pyfiles[-1].rstrip(" \\")
frozenfiles[-1] = frozenfiles[-1].rstrip(" \\")
+ deepfreezefiles[-1] = deepfreezefiles[-1].rstrip(" \\")
print(f'# Updating {os.path.relpath(MAKEFILE)}')
with updating_file_with_tmpfile(MAKEFILE) as (infile, outfile):
@@ -659,11 +692,25 @@ def regen_makefile(modules):
)
lines = replace_block(
lines,
+ "DEEPFREEZE_OBJS =",
+ "# End DEEPFREEZE_OBJS",
+ deepfreezefiles,
+ MAKEFILE,
+ )
+ lines = replace_block(
+ lines,
"# BEGIN: freezing modules",
"# END: freezing modules",
rules,
MAKEFILE,
)
+ lines = replace_block(
+ lines,
+ "# BEGIN: deepfreeze modules",
+ "# END: deepfreeze modules",
+ deepfreezerules,
+ MAKEFILE,
+ )
outfile.writelines(lines)
@@ -721,7 +768,6 @@ def freeze_module(modname, pyfile=None, destdir=MODULES_DIR):
def _freeze_module(frozenid, pyfile, frozenfile, tmpsuffix):
tmpfile = f'{frozenfile}.{int(time.time())}'
- print(tmpfile)
argv = [TOOL, frozenid, pyfile, tmpfile]
print('#', ' '.join(os.path.relpath(a) for a in argv), flush=True)