diff options
author | Guido van Rossum <guido@python.org> | 2021-11-23 16:56:06 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-11-23 16:56:06 (GMT) |
commit | 5be98e57b3c3b36d1a1176b49c73b8822c6380e7 (patch) | |
tree | 4791beb2411c0112b19d8fa6b6e867bb33416299 /Tools/scripts | |
parent | ae1965ccb4b1fad63fab40fe8805d1b8247668d3 (diff) | |
download | cpython-5be98e57b3c3b36d1a1176b49c73b8822c6380e7.zip cpython-5be98e57b3c3b36d1a1176b49c73b8822c6380e7.tar.gz cpython-5be98e57b3c3b36d1a1176b49c73b8822c6380e7.tar.bz2 |
bpo-45873: Get rid of bootstrap_python (#29717)
Instead we use $(PYTHON_FOR_REGEN) .../deepfreeze.py with the
frozen .h file as input, as we did for Windows in bpo-45850.
We also get rid of the code that generates the .h files
when make regen-frozen is run (i.e., .../make_frozen.py),
and the MANIFEST file.
Restore Python 3.8 and 3.9 as Windows host Python again
Co-authored-by: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com>
Diffstat (limited to 'Tools/scripts')
-rw-r--r-- | Tools/scripts/deepfreeze.py | 87 | ||||
-rw-r--r-- | Tools/scripts/freeze_modules.py | 110 | ||||
-rw-r--r-- | Tools/scripts/umarshal.py | 237 |
3 files changed, 170 insertions, 264 deletions
diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py index b6d52b7..b840c4b 100644 --- a/Tools/scripts/deepfreeze.py +++ b/Tools/scripts/deepfreeze.py @@ -7,7 +7,7 @@ import os import re import time import types -import typing +from typing import Dict, FrozenSet, Tuple, TextIO import umarshal @@ -42,13 +42,14 @@ def get_localsplus(code: types.CodeType): def get_localsplus_counts(code: types.CodeType, - names: tuple[str, ...], - kinds: bytes) -> tuple[int, int, int, int]: + names: Tuple[str, ...], + kinds: bytes) -> Tuple[int, int, int, int]: nlocals = 0 nplaincellvars = 0 ncellvars = 0 nfreevars = 0 - for name, kind in zip(names, kinds, strict=True): + assert len(names) == len(kinds) + for name, kind in zip(names, kinds): if kind & CO_FAST_LOCAL: nlocals += 1 if kind & CO_FAST_CELL: @@ -71,7 +72,7 @@ PyUnicode_2BYTE_KIND = 2 PyUnicode_4BYTE_KIND = 4 -def analyze_character_width(s: str) -> tuple[int, bool]: +def analyze_character_width(s: str) -> Tuple[int, bool]: maxchar = ' ' for c in s: maxchar = max(maxchar, c) @@ -86,12 +87,17 @@ def analyze_character_width(s: str) -> tuple[int, bool]: return kind, ascii +def removesuffix(base: str, suffix: str) -> str: + if base.endswith(suffix): + return base[:len(base) - len(suffix)] + return base + class Printer: - def __init__(self, file: typing.TextIO): + def __init__(self, file: TextIO): self.level = 0 self.file = file - self.cache: dict[tuple[type, object], str] = {} + self.cache: Dict[Tuple[type, object], str] = {} self.hits, self.misses = 0, 0 self.patchups: list[str] = [] self.write('#include "Python.h"') @@ -231,7 +237,7 @@ class Printer: # otherwise MSVC doesn't like it. self.write(f".co_consts = {co_consts},") self.write(f".co_names = {co_names},") - self.write(f".co_firstinstr = (_Py_CODEUNIT *) {co_code.removesuffix('.ob_base.ob_base')}.ob_sval,") + self.write(f".co_firstinstr = (_Py_CODEUNIT *) {removesuffix(co_code, '.ob_base.ob_base')}.ob_sval,") self.write(f".co_exceptiontable = {co_exceptiontable},") self.field(code, "co_flags") self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,") @@ -259,7 +265,7 @@ class Printer: self.write(f".co_freevars = {co_freevars},") return f"& {name}.ob_base" - def generate_tuple(self, name: str, t: tuple[object, ...]) -> str: + def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str: items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)] self.write("static") with self.indent(): @@ -323,7 +329,7 @@ class Printer: self.write(f".cval = {{ {z.real}, {z.imag} }},") return f"&{name}.ob_base" - def generate_frozenset(self, name: str, fs: frozenset[object]) -> str: + def generate_frozenset(self, name: str, fs: FrozenSet[object]) -> str: ret = self.generate_tuple(name, tuple(sorted(fs))) self.write("// TODO: The above tuple should be a frozenset") return ret @@ -336,34 +342,33 @@ class Printer: # print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}") return self.cache[key] self.misses += 1 - match obj: - case types.CodeType() | umarshal.Code() as code: - val = self.generate_code(name, code) - case tuple(t): - val = self.generate_tuple(name, t) - case str(s): - val = self.generate_unicode(name, s) - case bytes(b): - val = self.generate_bytes(name, b) - case True: - return "Py_True" - case False: - return "Py_False" - case int(i): - val = self.generate_int(name, i) - case float(x): - val = self.generate_float(name, x) - case complex() as z: - val = self.generate_complex(name, z) - case frozenset(fs): - val = self.generate_frozenset(name, fs) - case builtins.Ellipsis: - return "Py_Ellipsis" - case None: - return "Py_None" - case _: - raise TypeError( - f"Cannot generate code for {type(obj).__name__} object") + if isinstance(obj, types.CodeType) or isinstance(obj, umarshal.Code): + val = self.generate_code(name, obj) + elif isinstance(obj, tuple): + val = self.generate_tuple(name, obj) + elif isinstance(obj, str): + val = self.generate_unicode(name, obj) + elif isinstance(obj, bytes): + val = self.generate_bytes(name, obj) + elif obj is True: + return "Py_True" + elif obj is False: + return "Py_False" + elif isinstance(obj, int): + val = self.generate_int(name, obj) + elif isinstance(obj, float): + val = self.generate_float(name, obj) + elif isinstance(obj, complex): + val = self.generate_complex(name, obj) + elif isinstance(obj, frozenset): + val = self.generate_frozenset(name, obj) + elif obj is builtins.Ellipsis: + return "Py_Ellipsis" + elif obj is None: + return "Py_None" + else: + raise TypeError( + f"Cannot generate code for {type(obj).__name__} object") # print(f"Cache store {key!r:.40}: {val!r:.40}") self.cache[key] = val return val @@ -393,12 +398,12 @@ def decode_frozen_data(source: str) -> types.CodeType: del lines[0] while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None: del lines[-1] - values: tuple[int, ...] = ast.literal_eval("".join(lines)) + values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip()) data = bytes(values) return umarshal.loads(data) -def generate(source: str, filename: str, modname: str, file: typing.TextIO) -> None: +def generate(source: str, filename: str, modname: str, file: TextIO) -> None: if is_frozen_header(source): code = decode_frozen_data(source) else: @@ -439,7 +444,7 @@ def main() -> None: verbose = args.verbose with open(args.file, encoding="utf-8") as f: source = f.read() - modname = args.module or os.path.basename(args.file).removesuffix(".py") + modname = args.module or removesuffix(os.path.basename(args.file), ".py") output = args.output or modname + ".c" with open(output, "w", encoding="utf-8") as file: with report_time("generate"): diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py index 61ccae6..4281b67 100644 --- a/Tools/scripts/freeze_modules.py +++ b/Tools/scripts/freeze_modules.py @@ -25,31 +25,6 @@ STDLIB_DIR = os.path.join(ROOT_DIR, 'Lib') # need to be updated. MODULES_DIR = os.path.join(ROOT_DIR, 'Python', 'frozen_modules') -if sys.platform != "win32": - TOOL = os.path.join(ROOT_DIR, 'Programs', '_freeze_module') - if not os.path.isfile(TOOL): - # When building out of the source tree, get the tool from directory - # of the Python executable - TOOL = os.path.dirname(sys.executable) - TOOL = os.path.join(TOOL, 'Programs', '_freeze_module') - TOOL = os.path.abspath(TOOL) - if not os.path.isfile(TOOL): - sys.exit("ERROR: missing _freeze_module") -else: - def find_tool(): - archs = ['amd64', 'win32'] - if platform.machine() == "ARM64": - archs.append('arm64') - for arch in archs: - for exe in ['_freeze_module.exe', '_freeze_module_d.exe']: - tool = os.path.join(ROOT_DIR, 'PCbuild', arch, exe) - if os.path.isfile(tool): - return tool - sys.exit("ERROR: missing _freeze_module.exe; you need to run PCbuild/build.bat") - TOOL = find_tool() - del find_tool - -MANIFEST = os.path.join(MODULES_DIR, 'MANIFEST') FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c') MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in') PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj') @@ -480,45 +455,6 @@ def replace_block(lines, start_marker, end_marker, replacements, file): return lines[:start_pos + 1] + replacements + lines[end_pos:] -def regen_manifest(modules): - header = 'module ispkg source frozen checksum'.split() - widths = [5] * len(header) - rows = [] - for mod in modules: - info = mod.summarize() - row = [] - for i, col in enumerate(header): - value = info[col] - if col == 'checksum': - value = value[:12] - elif col == 'ispkg': - value = 'YES' if value else 'no' - widths[i] = max(widths[i], len(value)) - row.append(value or '-') - rows.append(row) - - modlines = [ - '# The list of frozen modules with key information.', - '# Note that the "check_generated_files" CI job will identify', - '# when source files were changed but regen-frozen wasn\'t run.', - '# This file is auto-generated by Tools/scripts/freeze_modules.py.', - ' '.join(c.center(w) for c, w in zip(header, widths)).rstrip(), - ' '.join('-' * w for w in widths), - ] - for row in rows: - for i, w in enumerate(widths): - if header[i] == 'ispkg': - row[i] = row[i].center(w) - else: - row[i] = row[i].ljust(w) - modlines.append(' '.join(row).rstrip()) - - print(f'# Updating {os.path.relpath(MANIFEST)}') - with open(MANIFEST, 'w', encoding="utf-8") as outfile: - lines = (l + '\n' for l in modlines) - outfile.writelines(lines) - - def regen_frozen(modules): headerlines = [] parentdir = os.path.dirname(FROZEN_FILE) @@ -648,11 +584,11 @@ def regen_makefile(modules): deepfreezefiles.append(f"\t\t{ofile} \\") # Also add a deepfreeze rule. - deepfreezerules.append(f'{cfile}: $(srcdir)/{_pyfile} $(DEEPFREEZE_DEPS)') - deepfreezerules.append(f'\t@echo "Deepfreezing {cfile} from {_pyfile}"') - deepfreezerules.append(f"\t@./$(BOOTSTRAP) \\") - deepfreezerules.append(f"\t\t$(srcdir)/Tools/scripts/deepfreeze.py \\") - deepfreezerules.append(f"\t\t$(srcdir)/{_pyfile} -m {src.frozenid} -o {cfile}") + deepfreezerules.append(f'{cfile}: {header} $(DEEPFREEZE_DEPS)') + deepfreezerules.append( + f"\t$(PYTHON_FOR_REGEN) " + f"$(srcdir)/Tools/scripts/deepfreeze.py " + f"{header} -m {src.frozenid} -o {cfile}") deepfreezerules.append('') for src in _iter_sources(modules): @@ -663,7 +599,7 @@ def regen_makefile(modules): pyfiles.append(f'\t\t{pyfile} \\') freeze = (f'Programs/_freeze_module {src.frozenid} ' - f'$(srcdir)/{pyfile} $(srcdir)/{header}') + f'$(srcdir)/{pyfile} {header}') rules.extend([ f'{header}: Programs/_freeze_module {pyfile}', f'\t{freeze}', @@ -775,32 +711,6 @@ def regen_pcbuild(modules): ####################################### -# freezing modules - -def freeze_module(modname, pyfile=None, destdir=MODULES_DIR): - """Generate the frozen module .h file for the given module.""" - tmpsuffix = f'.{int(time.time())}' - for modname, pyfile, ispkg in resolve_modules(modname, pyfile): - frozenfile = resolve_frozen_file(modname, destdir) - _freeze_module(modname, pyfile, frozenfile, tmpsuffix) - - -def _freeze_module(frozenid, pyfile, frozenfile, tmpsuffix): - tmpfile = f'{frozenfile}.{int(time.time())}' - - argv = [TOOL, frozenid, pyfile, tmpfile] - print('#', ' '.join(os.path.relpath(a) for a in argv), flush=True) - try: - subprocess.run(argv, check=True) - except (FileNotFoundError, subprocess.CalledProcessError): - if not os.path.exists(TOOL): - sys.exit(f'ERROR: missing {TOOL}; you need to run "make regen-frozen"') - raise # re-raise - - update_file_with_tmpfile(frozenfile, tmpfile, create=True) - - -####################################### # the script def main(): @@ -810,15 +720,7 @@ def main(): # Regen build-related files. regen_makefile(modules) regen_pcbuild(modules) - - # Freeze the target modules. - tmpsuffix = f'.{int(time.time())}' - for src in _iter_sources(modules): - _freeze_module(src.frozenid, src.pyfile, src.frozenfile, tmpsuffix) - - # Regen files dependent of frozen file details. regen_frozen(modules) - regen_manifest(modules) if __name__ == '__main__': diff --git a/Tools/scripts/umarshal.py b/Tools/scripts/umarshal.py index e0d18c8..2eaaa7c 100644 --- a/Tools/scripts/umarshal.py +++ b/Tools/scripts/umarshal.py @@ -2,7 +2,7 @@ import ast -from typing import Any +from typing import Any, Tuple class Type: @@ -55,10 +55,10 @@ class Code: def __repr__(self) -> str: return f"Code(**{self.__dict__})" - co_localsplusnames: tuple[str] - co_localspluskinds: tuple[int] + co_localsplusnames: Tuple[str] + co_localspluskinds: Tuple[int] - def get_localsplus_names(self, select_kind: int) -> tuple[str, ...]: + def get_localsplus_names(self, select_kind: int) -> Tuple[str, ...]: varnames: list[str] = [] for name, kind in zip(self.co_localsplusnames, self.co_localspluskinds): @@ -67,15 +67,15 @@ class Code: return tuple(varnames) @property - def co_varnames(self) -> tuple[str, ...]: + def co_varnames(self) -> Tuple[str, ...]: return self.get_localsplus_names(CO_FAST_LOCAL) @property - def co_cellvars(self) -> tuple[str, ...]: + def co_cellvars(self) -> Tuple[str, ...]: return self.get_localsplus_names(CO_FAST_CELL) @property - def co_freevars(self) -> tuple[str, ...]: + def co_freevars(self) -> Tuple[str, ...]: return self.get_localsplus_names(CO_FAST_FREE) @property @@ -190,118 +190,117 @@ class Reader: obj = self.r_ref(obj, flag) return obj - match type: - case Type.NULL: - return NULL - case Type.NONE: - return None - case Type.ELLIPSIS: - return Ellipsis - case Type.FALSE: - return False - case Type.TRUE: - return True - case Type.INT: - return R_REF(self.r_long()) - case Type.INT64: - return R_REF(self.r_long64()) - case Type.LONG: - return R_REF(self.r_PyLong()) - case Type.FLOAT: - return R_REF(self.r_float_str()) - case Type.BINARY_FLOAT: - return R_REF(self.r_float_bin()) - case Type.COMPLEX: - return R_REF(complex(self.r_float_str(), - self.r_float_str())) - case Type.BINARY_COMPLEX: - return R_REF(complex(self.r_float_bin(), - self.r_float_bin())) - case Type.STRING: - n = self.r_long() - return R_REF(self.r_string(n)) - case Type.ASCII_INTERNED | Type.ASCII: - n = self.r_long() - return R_REF(self.r_string(n).decode("ascii")) - case Type.SHORT_ASCII_INTERNED | Type.SHORT_ASCII: - n = self.r_byte() - return R_REF(self.r_string(n).decode("ascii")) - case Type.INTERNED | Type.UNICODE: - n = self.r_long() - return R_REF(self.r_string(n).decode("utf8", "surrogatepass")) - case Type.SMALL_TUPLE: - n = self.r_byte() - idx = self.r_ref_reserve(flag) - retval: Any = tuple(self.r_object() for _ in range(n)) - self.r_ref_insert(retval, idx, flag) - return retval - case Type.TUPLE: - n = self.r_long() - idx = self.r_ref_reserve(flag) - retval = tuple(self.r_object() for _ in range(n)) - self.r_ref_insert(retval, idx, flag) - return retval - case Type.LIST: - n = self.r_long() - retval = R_REF([]) - for _ in range(n): - retval.append(self.r_object()) - return retval - case Type.DICT: - retval = R_REF({}) - while True: - key = self.r_object() - if key == NULL: - break - val = self.r_object() - retval[key] = val - return retval - case Type.SET: - n = self.r_long() - retval = R_REF(set()) - for _ in range(n): - v = self.r_object() - retval.add(v) - return retval - case Type.FROZENSET: - n = self.r_long() - s: set[Any] = set() - idx = self.r_ref_reserve(flag) - for _ in range(n): - v = self.r_object() - s.add(v) - retval = frozenset(s) - self.r_ref_insert(retval, idx, flag) - return retval - case Type.CODE: - retval = R_REF(Code()) - retval.co_argcount = self.r_long() - retval.co_posonlyargcount = self.r_long() - retval.co_kwonlyargcount = self.r_long() - retval.co_stacksize = self.r_long() - retval.co_flags = self.r_long() - retval.co_code = self.r_object() - retval.co_consts = self.r_object() - retval.co_names = self.r_object() - retval.co_localsplusnames = self.r_object() - retval.co_localspluskinds = self.r_object() - retval.co_filename = self.r_object() - retval.co_name = self.r_object() - retval.co_qualname = self.r_object() - retval.co_firstlineno = self.r_long() - retval.co_linetable = self.r_object() - retval.co_endlinetable = self.r_object() - retval.co_columntable = self.r_object() - retval.co_exceptiontable = self.r_object() - return retval - case Type.REF: - n = self.r_long() - retval = self.refs[n] - assert retval is not None - return retval - case _: - breakpoint() - raise AssertionError(f"Unknown type {type} {chr(type)!r}") + if type == Type.NULL: + return NULL + elif type == Type.NONE: + return None + elif type == Type.ELLIPSIS: + return Ellipsis + elif type == Type.FALSE: + return False + elif type == Type.TRUE: + return True + elif type == Type.INT: + return R_REF(self.r_long()) + elif type == Type.INT64: + return R_REF(self.r_long64()) + elif type == Type.LONG: + return R_REF(self.r_PyLong()) + elif type == Type.FLOAT: + return R_REF(self.r_float_str()) + elif type == Type.BINARY_FLOAT: + return R_REF(self.r_float_bin()) + elif type == Type.COMPLEX: + return R_REF(complex(self.r_float_str(), + self.r_float_str())) + elif type == Type.BINARY_COMPLEX: + return R_REF(complex(self.r_float_bin(), + self.r_float_bin())) + elif type == Type.STRING: + n = self.r_long() + return R_REF(self.r_string(n)) + elif type == Type.ASCII_INTERNED or type == Type.ASCII: + n = self.r_long() + return R_REF(self.r_string(n).decode("ascii")) + elif type == Type.SHORT_ASCII_INTERNED or type == Type.SHORT_ASCII: + n = self.r_byte() + return R_REF(self.r_string(n).decode("ascii")) + elif type == Type.INTERNED or type == Type.UNICODE: + n = self.r_long() + return R_REF(self.r_string(n).decode("utf8", "surrogatepass")) + elif type == Type.SMALL_TUPLE: + n = self.r_byte() + idx = self.r_ref_reserve(flag) + retval: Any = tuple(self.r_object() for _ in range(n)) + self.r_ref_insert(retval, idx, flag) + return retval + elif type == Type.TUPLE: + n = self.r_long() + idx = self.r_ref_reserve(flag) + retval = tuple(self.r_object() for _ in range(n)) + self.r_ref_insert(retval, idx, flag) + return retval + elif type == Type.LIST: + n = self.r_long() + retval = R_REF([]) + for _ in range(n): + retval.append(self.r_object()) + return retval + elif type == Type.DICT: + retval = R_REF({}) + while True: + key = self.r_object() + if key == NULL: + break + val = self.r_object() + retval[key] = val + return retval + elif type == Type.SET: + n = self.r_long() + retval = R_REF(set()) + for _ in range(n): + v = self.r_object() + retval.add(v) + return retval + elif type == Type.FROZENSET: + n = self.r_long() + s: set[Any] = set() + idx = self.r_ref_reserve(flag) + for _ in range(n): + v = self.r_object() + s.add(v) + retval = frozenset(s) + self.r_ref_insert(retval, idx, flag) + return retval + elif type == Type.CODE: + retval = R_REF(Code()) + retval.co_argcount = self.r_long() + retval.co_posonlyargcount = self.r_long() + retval.co_kwonlyargcount = self.r_long() + retval.co_stacksize = self.r_long() + retval.co_flags = self.r_long() + retval.co_code = self.r_object() + retval.co_consts = self.r_object() + retval.co_names = self.r_object() + retval.co_localsplusnames = self.r_object() + retval.co_localspluskinds = self.r_object() + retval.co_filename = self.r_object() + retval.co_name = self.r_object() + retval.co_qualname = self.r_object() + retval.co_firstlineno = self.r_long() + retval.co_linetable = self.r_object() + retval.co_endlinetable = self.r_object() + retval.co_columntable = self.r_object() + retval.co_exceptiontable = self.r_object() + return retval + elif type == Type.REF: + n = self.r_long() + retval = self.refs[n] + assert retval is not None + return retval + else: + breakpoint() + raise AssertionError(f"Unknown type {type} {chr(type)!r}") def loads(data: bytes) -> Any: |