summaryrefslogtreecommitdiffstats
path: root/Tools/build/deepfreeze.py
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2022-10-17 10:01:00 (GMT)
committerGitHub <noreply@github.com>2022-10-17 10:01:00 (GMT)
commit1863302d61a7a5dd8b8d345a00f0ee242c7c10bf (patch)
treea1e41af02147e2a14155d5b19d7b68bbb31c3f6f /Tools/build/deepfreeze.py
parenteae7dad40255bad42e4abce53ff8143dcbc66af5 (diff)
downloadcpython-1863302d61a7a5dd8b8d345a00f0ee242c7c10bf.zip
cpython-1863302d61a7a5dd8b8d345a00f0ee242c7c10bf.tar.gz
cpython-1863302d61a7a5dd8b8d345a00f0ee242c7c10bf.tar.bz2
gh-97669: Create Tools/build/ directory (#97963)
Create Tools/build/ directory. Move the following scripts from Tools/scripts/ to Tools/build/: * check_extension_modules.py * deepfreeze.py * freeze_modules.py * generate_global_objects.py * generate_levenshtein_examples.py * generate_opcode_h.py * generate_re_casefix.py * generate_sre_constants.py * generate_stdlib_module_names.py * generate_token.py * parse_html5_entities.py * smelly.py * stable_abi.py * umarshal.py * update_file.py * verify_ensurepip_wheels.py Update references to these scripts.
Diffstat (limited to 'Tools/build/deepfreeze.py')
-rw-r--r--Tools/build/deepfreeze.py504
1 files changed, 504 insertions, 0 deletions
diff --git a/Tools/build/deepfreeze.py b/Tools/build/deepfreeze.py
new file mode 100644
index 0000000..28ac2b1
--- /dev/null
+++ b/Tools/build/deepfreeze.py
@@ -0,0 +1,504 @@
+"""Deep freeze
+
+The script may be executed by _bootstrap_python interpreter.
+Shared library extension modules are not available in that case.
+On Windows, and in cross-compilation cases, it is executed
+by Python 3.10, and 3.11 features are not available.
+"""
+import argparse
+import ast
+import builtins
+import collections
+import contextlib
+import os
+import re
+import time
+import types
+from typing import Dict, FrozenSet, TextIO, Tuple
+
+import umarshal
+from generate_global_objects import get_identifiers_and_strings
+
+verbose = False
+identifiers, strings = get_identifiers_and_strings()
+
+# This must be kept in sync with opcode.py
+RESUME = 151
+
+def isprintable(b: bytes) -> bool:
+ return all(0x20 <= c < 0x7f for c in b)
+
+
+def make_string_literal(b: bytes) -> str:
+ res = ['"']
+ if isprintable(b):
+ res.append(b.decode("ascii").replace("\\", "\\\\").replace("\"", "\\\""))
+ else:
+ for i in b:
+ res.append(f"\\x{i:02x}")
+ res.append('"')
+ return "".join(res)
+
+
+CO_FAST_LOCAL = 0x20
+CO_FAST_CELL = 0x40
+CO_FAST_FREE = 0x80
+
+
+def get_localsplus(code: types.CodeType):
+ a = collections.defaultdict(int)
+ for name in code.co_varnames:
+ a[name] |= CO_FAST_LOCAL
+ for name in code.co_cellvars:
+ a[name] |= CO_FAST_CELL
+ for name in code.co_freevars:
+ a[name] |= CO_FAST_FREE
+ return tuple(a.keys()), bytes(a.values())
+
+
+def get_localsplus_counts(code: types.CodeType,
+ names: Tuple[str, ...],
+ kinds: bytes) -> Tuple[int, int, int, int]:
+ nlocals = 0
+ nplaincellvars = 0
+ ncellvars = 0
+ nfreevars = 0
+ assert len(names) == len(kinds)
+ for name, kind in zip(names, kinds):
+ if kind & CO_FAST_LOCAL:
+ nlocals += 1
+ if kind & CO_FAST_CELL:
+ ncellvars += 1
+ elif kind & CO_FAST_CELL:
+ ncellvars += 1
+ nplaincellvars += 1
+ elif kind & CO_FAST_FREE:
+ nfreevars += 1
+ assert nlocals == len(code.co_varnames) == code.co_nlocals, \
+ (nlocals, len(code.co_varnames), code.co_nlocals)
+ assert ncellvars == len(code.co_cellvars)
+ assert nfreevars == len(code.co_freevars)
+ assert len(names) == nlocals + nplaincellvars + nfreevars
+ return nlocals, nplaincellvars, ncellvars, nfreevars
+
+
+PyUnicode_1BYTE_KIND = 1
+PyUnicode_2BYTE_KIND = 2
+PyUnicode_4BYTE_KIND = 4
+
+
+def analyze_character_width(s: str) -> Tuple[int, bool]:
+ maxchar = ' '
+ for c in s:
+ maxchar = max(maxchar, c)
+ ascii = False
+ if maxchar <= '\xFF':
+ kind = PyUnicode_1BYTE_KIND
+ ascii = maxchar <= '\x7F'
+ elif maxchar <= '\uFFFF':
+ kind = PyUnicode_2BYTE_KIND
+ else:
+ kind = PyUnicode_4BYTE_KIND
+ return kind, ascii
+
+
+def removesuffix(base: str, suffix: str) -> str:
+ if base.endswith(suffix):
+ return base[:len(base) - len(suffix)]
+ return base
+
+class Printer:
+
+ def __init__(self, file: TextIO) -> None:
+ self.level = 0
+ self.file = file
+ self.cache: Dict[tuple[type, object, str], str] = {}
+ self.hits, self.misses = 0, 0
+ self.patchups: list[str] = []
+ self.deallocs: list[str] = []
+ self.interns: list[str] = []
+ self.write('#include "Python.h"')
+ self.write('#include "internal/pycore_gc.h"')
+ self.write('#include "internal/pycore_code.h"')
+ self.write('#include "internal/pycore_frame.h"')
+ self.write('#include "internal/pycore_long.h"')
+ self.write("")
+
+ @contextlib.contextmanager
+ def indent(self) -> None:
+ save_level = self.level
+ try:
+ self.level += 1
+ yield
+ finally:
+ self.level = save_level
+
+ def write(self, arg: str) -> None:
+ self.file.writelines((" "*self.level, arg, "\n"))
+
+ @contextlib.contextmanager
+ def block(self, prefix: str, suffix: str = "") -> None:
+ self.write(prefix + " {")
+ with self.indent():
+ yield
+ self.write("}" + suffix)
+
+ def object_head(self, typename: str) -> None:
+ with self.block(".ob_base =", ","):
+ self.write(f".ob_refcnt = 999999999,")
+ self.write(f".ob_type = &{typename},")
+
+ def object_var_head(self, typename: str, size: int) -> None:
+ with self.block(".ob_base =", ","):
+ self.object_head(typename)
+ self.write(f".ob_size = {size},")
+
+ def field(self, obj: object, name: str) -> None:
+ self.write(f".{name} = {getattr(obj, name)},")
+
+ def generate_bytes(self, name: str, b: bytes) -> str:
+ if b == b"":
+ return "(PyObject *)&_Py_SINGLETON(bytes_empty)"
+ if len(b) == 1:
+ return f"(PyObject *)&_Py_SINGLETON(bytes_characters[{b[0]}])"
+ self.write("static")
+ with self.indent():
+ with self.block("struct"):
+ self.write("PyObject_VAR_HEAD")
+ self.write("Py_hash_t ob_shash;")
+ self.write(f"char ob_sval[{len(b) + 1}];")
+ with self.block(f"{name} =", ";"):
+ self.object_var_head("PyBytes_Type", len(b))
+ self.write(".ob_shash = -1,")
+ self.write(f".ob_sval = {make_string_literal(b)},")
+ return f"& {name}.ob_base.ob_base"
+
+ def generate_unicode(self, name: str, s: str) -> str:
+ if s in strings:
+ return f"&_Py_STR({strings[s]})"
+ if s in identifiers:
+ return f"&_Py_ID({s})"
+ if re.match(r'\A[A-Za-z0-9_]+\Z', s):
+ name = f"const_str_{s}"
+ kind, ascii = analyze_character_width(s)
+ if kind == PyUnicode_1BYTE_KIND:
+ datatype = "uint8_t"
+ elif kind == PyUnicode_2BYTE_KIND:
+ datatype = "uint16_t"
+ else:
+ datatype = "uint32_t"
+ self.write("static")
+ with self.indent():
+ with self.block("struct"):
+ if ascii:
+ self.write("PyASCIIObject _ascii;")
+ else:
+ self.write("PyCompactUnicodeObject _compact;")
+ self.write(f"{datatype} _data[{len(s)+1}];")
+ with self.block(f"{name} =", ";"):
+ if ascii:
+ with self.block("._ascii =", ","):
+ self.object_head("PyUnicode_Type")
+ self.write(f".length = {len(s)},")
+ self.write(".hash = -1,")
+ with self.block(".state =", ","):
+ self.write(".kind = 1,")
+ self.write(".compact = 1,")
+ self.write(".ascii = 1,")
+ self.write(f"._data = {make_string_literal(s.encode('ascii'))},")
+ return f"& {name}._ascii.ob_base"
+ else:
+ with self.block("._compact =", ","):
+ with self.block("._base =", ","):
+ self.object_head("PyUnicode_Type")
+ self.write(f".length = {len(s)},")
+ self.write(".hash = -1,")
+ with self.block(".state =", ","):
+ self.write(f".kind = {kind},")
+ self.write(".compact = 1,")
+ self.write(".ascii = 0,")
+ utf8 = s.encode('utf-8')
+ self.write(f'.utf8 = {make_string_literal(utf8)},')
+ self.write(f'.utf8_length = {len(utf8)},')
+ with self.block(f"._data =", ","):
+ for i in range(0, len(s), 16):
+ data = s[i:i+16]
+ self.write(", ".join(map(str, map(ord, data))) + ",")
+ return f"& {name}._compact._base.ob_base"
+
+
+ def generate_code(self, name: str, code: types.CodeType) -> str:
+ # The ordering here matches PyCode_NewWithPosOnlyArgs()
+ # (but see below).
+ co_consts = self.generate(name + "_consts", code.co_consts)
+ co_names = self.generate(name + "_names", code.co_names)
+ co_filename = self.generate(name + "_filename", code.co_filename)
+ co_name = self.generate(name + "_name", code.co_name)
+ co_qualname = self.generate(name + "_qualname", code.co_qualname)
+ co_linetable = self.generate(name + "_linetable", code.co_linetable)
+ co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable)
+ # These fields are not directly accessible
+ localsplusnames, localspluskinds = get_localsplus(code)
+ co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames)
+ co_localspluskinds = self.generate(name + "_localspluskinds", localspluskinds)
+ # Derived values
+ nlocals, nplaincellvars, ncellvars, nfreevars = \
+ get_localsplus_counts(code, localsplusnames, localspluskinds)
+ co_code_adaptive = make_string_literal(code.co_code)
+ self.write("static")
+ with self.indent():
+ self.write(f"struct _PyCode_DEF({len(code.co_code)})")
+ with self.block(f"{name} =", ";"):
+ self.object_var_head("PyCode_Type", len(code.co_code) // 2)
+ # But the ordering here must match that in cpython/code.h
+ # (which is a pain because we tend to reorder those for perf)
+ # otherwise MSVC doesn't like it.
+ self.write(f".co_consts = {co_consts},")
+ self.write(f".co_names = {co_names},")
+ self.write(f".co_exceptiontable = {co_exceptiontable},")
+ self.field(code, "co_flags")
+ self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,")
+ self.write("._co_linearray_entry_size = 0,")
+ self.field(code, "co_argcount")
+ self.field(code, "co_posonlyargcount")
+ self.field(code, "co_kwonlyargcount")
+ self.write(f".co_framesize = {code.co_stacksize + len(localsplusnames)} + FRAME_SPECIALS_SIZE,")
+ self.field(code, "co_stacksize")
+ self.field(code, "co_firstlineno")
+ self.write(f".co_nlocalsplus = {len(localsplusnames)},")
+ self.field(code, "co_nlocals")
+ self.write(f".co_nplaincellvars = {nplaincellvars},")
+ self.write(f".co_ncellvars = {ncellvars},")
+ self.write(f".co_nfreevars = {nfreevars},")
+ self.write(f".co_localsplusnames = {co_localsplusnames},")
+ self.write(f".co_localspluskinds = {co_localspluskinds},")
+ self.write(f".co_filename = {co_filename},")
+ self.write(f".co_name = {co_name},")
+ self.write(f".co_qualname = {co_qualname},")
+ self.write(f".co_linetable = {co_linetable},")
+ self.write(f"._co_cached = NULL,")
+ self.write("._co_linearray = NULL,")
+ self.write(f".co_code_adaptive = {co_code_adaptive},")
+ for i, op in enumerate(code.co_code[::2]):
+ if op == RESUME:
+ self.write(f"._co_firsttraceable = {i},")
+ break
+ name_as_code = f"(PyCodeObject *)&{name}"
+ self.deallocs.append(f"_PyStaticCode_Dealloc({name_as_code});")
+ self.interns.append(f"_PyStaticCode_InternStrings({name_as_code})")
+ return f"& {name}.ob_base.ob_base"
+
+ def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str:
+ if len(t) == 0:
+ return f"(PyObject *)& _Py_SINGLETON(tuple_empty)"
+ items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)]
+ self.write("static")
+ with self.indent():
+ with self.block("struct"):
+ self.write("PyGC_Head _gc_head;")
+ with self.block("struct", "_object;"):
+ self.write("PyObject_VAR_HEAD")
+ if t:
+ self.write(f"PyObject *ob_item[{len(t)}];")
+ with self.block(f"{name} =", ";"):
+ with self.block("._object =", ","):
+ self.object_var_head("PyTuple_Type", len(t))
+ if items:
+ with self.block(f".ob_item =", ","):
+ for item in items:
+ self.write(item + ",")
+ return f"& {name}._object.ob_base.ob_base"
+
+ def _generate_int_for_bits(self, name: str, i: int, digit: int) -> None:
+ sign = -1 if i < 0 else 0 if i == 0 else +1
+ i = abs(i)
+ digits: list[int] = []
+ while i:
+ i, rem = divmod(i, digit)
+ digits.append(rem)
+ self.write("static")
+ with self.indent():
+ with self.block("struct"):
+ self.write("PyObject_VAR_HEAD")
+ self.write(f"digit ob_digit[{max(1, len(digits))}];")
+ with self.block(f"{name} =", ";"):
+ self.object_var_head("PyLong_Type", sign*len(digits))
+ if digits:
+ ds = ", ".join(map(str, digits))
+ self.write(f".ob_digit = {{ {ds} }},")
+
+ def generate_int(self, name: str, i: int) -> str:
+ if -5 <= i <= 256:
+ return f"(PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + {i}]"
+ if i >= 0:
+ name = f"const_int_{i}"
+ else:
+ name = f"const_int_negative_{abs(i)}"
+ if abs(i) < 2**15:
+ self._generate_int_for_bits(name, i, 2**15)
+ else:
+ connective = "if"
+ for bits_in_digit in 15, 30:
+ self.write(f"#{connective} PYLONG_BITS_IN_DIGIT == {bits_in_digit}")
+ self._generate_int_for_bits(name, i, 2**bits_in_digit)
+ connective = "elif"
+ self.write("#else")
+ self.write('#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"')
+ self.write("#endif")
+ # If neither clause applies, it won't compile
+ return f"& {name}.ob_base.ob_base"
+
+ def generate_float(self, name: str, x: float) -> str:
+ with self.block(f"static PyFloatObject {name} =", ";"):
+ self.object_head("PyFloat_Type")
+ self.write(f".ob_fval = {x},")
+ return f"&{name}.ob_base"
+
+ def generate_complex(self, name: str, z: complex) -> str:
+ with self.block(f"static PyComplexObject {name} =", ";"):
+ self.object_head("PyComplex_Type")
+ self.write(f".cval = {{ {z.real}, {z.imag} }},")
+ return f"&{name}.ob_base"
+
+ def generate_frozenset(self, name: str, fs: FrozenSet[object]) -> str:
+ try:
+ fs = sorted(fs)
+ except TypeError:
+ # frozen set with incompatible types, fallback to repr()
+ fs = sorted(fs, key=repr)
+ ret = self.generate_tuple(name, tuple(fs))
+ self.write("// TODO: The above tuple should be a frozenset")
+ return ret
+
+ def generate_file(self, module: str, code: object)-> None:
+ module = module.replace(".", "_")
+ self.generate(f"{module}_toplevel", code)
+ with self.block(f"static void {module}_do_patchups(void)"):
+ for p in self.patchups:
+ self.write(p)
+ self.patchups.clear()
+ self.write(EPILOGUE.replace("%%NAME%%", module))
+
+ def generate(self, name: str, obj: object) -> str:
+ # Use repr() in the key to distinguish -0.0 from +0.0
+ key = (type(obj), obj, repr(obj))
+ if key in self.cache:
+ self.hits += 1
+ # print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}")
+ return self.cache[key]
+ self.misses += 1
+ if isinstance(obj, (types.CodeType, umarshal.Code)) :
+ val = self.generate_code(name, obj)
+ elif isinstance(obj, tuple):
+ val = self.generate_tuple(name, obj)
+ elif isinstance(obj, str):
+ val = self.generate_unicode(name, obj)
+ elif isinstance(obj, bytes):
+ val = self.generate_bytes(name, obj)
+ elif obj is True:
+ return "Py_True"
+ elif obj is False:
+ return "Py_False"
+ elif isinstance(obj, int):
+ val = self.generate_int(name, obj)
+ elif isinstance(obj, float):
+ val = self.generate_float(name, obj)
+ elif isinstance(obj, complex):
+ val = self.generate_complex(name, obj)
+ elif isinstance(obj, frozenset):
+ val = self.generate_frozenset(name, obj)
+ elif obj is builtins.Ellipsis:
+ return "Py_Ellipsis"
+ elif obj is None:
+ return "Py_None"
+ else:
+ raise TypeError(
+ f"Cannot generate code for {type(obj).__name__} object")
+ # print(f"Cache store {key!r:.40}: {val!r:.40}")
+ self.cache[key] = val
+ return val
+
+
+EPILOGUE = """
+PyObject *
+_Py_get_%%NAME%%_toplevel(void)
+{
+ %%NAME%%_do_patchups();
+ return Py_NewRef((PyObject *) &%%NAME%%_toplevel);
+}
+"""
+
+FROZEN_COMMENT_C = "/* Auto-generated by Programs/_freeze_module.c */"
+FROZEN_COMMENT_PY = "/* Auto-generated by Programs/_freeze_module.py */"
+
+FROZEN_DATA_LINE = r"\s*(\d+,\s*)+\s*"
+
+
+def is_frozen_header(source: str) -> bool:
+ return source.startswith((FROZEN_COMMENT_C, FROZEN_COMMENT_PY))
+
+
+def decode_frozen_data(source: str) -> types.CodeType:
+ lines = source.splitlines()
+ while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None:
+ del lines[0]
+ while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None:
+ del lines[-1]
+ values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip())
+ data = bytes(values)
+ return umarshal.loads(data)
+
+
+def generate(args: list[str], output: TextIO) -> None:
+ printer = Printer(output)
+ for arg in args:
+ file, modname = arg.rsplit(':', 1)
+ with open(file, "r", encoding="utf8") as fd:
+ source = fd.read()
+ if is_frozen_header(source):
+ code = decode_frozen_data(source)
+ else:
+ code = compile(fd.read(), f"<frozen {modname}>", "exec")
+ printer.generate_file(modname, code)
+ with printer.block(f"void\n_Py_Deepfreeze_Fini(void)"):
+ for p in printer.deallocs:
+ printer.write(p)
+ with printer.block(f"int\n_Py_Deepfreeze_Init(void)"):
+ for p in printer.interns:
+ with printer.block(f"if ({p} < 0)"):
+ printer.write("return -1;")
+ printer.write("return 0;")
+ if verbose:
+ print(f"Cache hits: {printer.hits}, misses: {printer.misses}")
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("-o", "--output", help="Defaults to deepfreeze.c", default="deepfreeze.c")
+parser.add_argument("-v", "--verbose", action="store_true", help="Print diagnostics")
+parser.add_argument('args', nargs="+", help="Input file and module name (required) in file:modname format")
+
+@contextlib.contextmanager
+def report_time(label: str):
+ t0 = time.time()
+ try:
+ yield
+ finally:
+ t1 = time.time()
+ if verbose:
+ print(f"{label}: {t1-t0:.3f} sec")
+
+
+def main() -> None:
+ global verbose
+ args = parser.parse_args()
+ verbose = args.verbose
+ output = args.output
+ with open(output, "w", encoding="utf-8") as file:
+ with report_time("generate"):
+ generate(args.args, file)
+ if verbose:
+ print(f"Wrote {os.path.getsize(output)} bytes to {output}")
+
+
+if __name__ == "__main__":
+ main()