summaryrefslogtreecommitdiffstats
path: root/Tools/scripts
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2021-11-23 16:56:06 (GMT)
committerGitHub <noreply@github.com>2021-11-23 16:56:06 (GMT)
commit5be98e57b3c3b36d1a1176b49c73b8822c6380e7 (patch)
tree4791beb2411c0112b19d8fa6b6e867bb33416299 /Tools/scripts
parentae1965ccb4b1fad63fab40fe8805d1b8247668d3 (diff)
downloadcpython-5be98e57b3c3b36d1a1176b49c73b8822c6380e7.zip
cpython-5be98e57b3c3b36d1a1176b49c73b8822c6380e7.tar.gz
cpython-5be98e57b3c3b36d1a1176b49c73b8822c6380e7.tar.bz2
bpo-45873: Get rid of bootstrap_python (#29717)
Instead we use $(PYTHON_FOR_REGEN) .../deepfreeze.py with the frozen .h file as input, as we did for Windows in bpo-45850. We also get rid of the code that generates the .h files when make regen-frozen is run (i.e., .../make_frozen.py), and the MANIFEST file. Restore Python 3.8 and 3.9 as Windows host Python again Co-authored-by: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com>
Diffstat (limited to 'Tools/scripts')
-rw-r--r--Tools/scripts/deepfreeze.py87
-rw-r--r--Tools/scripts/freeze_modules.py110
-rw-r--r--Tools/scripts/umarshal.py237
3 files changed, 170 insertions, 264 deletions
diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py
index b6d52b7..b840c4b 100644
--- a/Tools/scripts/deepfreeze.py
+++ b/Tools/scripts/deepfreeze.py
@@ -7,7 +7,7 @@ import os
import re
import time
import types
-import typing
+from typing import Dict, FrozenSet, Tuple, TextIO
import umarshal
@@ -42,13 +42,14 @@ def get_localsplus(code: types.CodeType):
def get_localsplus_counts(code: types.CodeType,
- names: tuple[str, ...],
- kinds: bytes) -> tuple[int, int, int, int]:
+ names: Tuple[str, ...],
+ kinds: bytes) -> Tuple[int, int, int, int]:
nlocals = 0
nplaincellvars = 0
ncellvars = 0
nfreevars = 0
- for name, kind in zip(names, kinds, strict=True):
+ assert len(names) == len(kinds)
+ for name, kind in zip(names, kinds):
if kind & CO_FAST_LOCAL:
nlocals += 1
if kind & CO_FAST_CELL:
@@ -71,7 +72,7 @@ PyUnicode_2BYTE_KIND = 2
PyUnicode_4BYTE_KIND = 4
-def analyze_character_width(s: str) -> tuple[int, bool]:
+def analyze_character_width(s: str) -> Tuple[int, bool]:
maxchar = ' '
for c in s:
maxchar = max(maxchar, c)
@@ -86,12 +87,17 @@ def analyze_character_width(s: str) -> tuple[int, bool]:
return kind, ascii
+def removesuffix(base: str, suffix: str) -> str:
+ if base.endswith(suffix):
+ return base[:len(base) - len(suffix)]
+ return base
+
class Printer:
- def __init__(self, file: typing.TextIO):
+ def __init__(self, file: TextIO):
self.level = 0
self.file = file
- self.cache: dict[tuple[type, object], str] = {}
+ self.cache: Dict[Tuple[type, object], str] = {}
self.hits, self.misses = 0, 0
self.patchups: list[str] = []
self.write('#include "Python.h"')
@@ -231,7 +237,7 @@ class Printer:
# otherwise MSVC doesn't like it.
self.write(f".co_consts = {co_consts},")
self.write(f".co_names = {co_names},")
- self.write(f".co_firstinstr = (_Py_CODEUNIT *) {co_code.removesuffix('.ob_base.ob_base')}.ob_sval,")
+ self.write(f".co_firstinstr = (_Py_CODEUNIT *) {removesuffix(co_code, '.ob_base.ob_base')}.ob_sval,")
self.write(f".co_exceptiontable = {co_exceptiontable},")
self.field(code, "co_flags")
self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,")
@@ -259,7 +265,7 @@ class Printer:
self.write(f".co_freevars = {co_freevars},")
return f"& {name}.ob_base"
- def generate_tuple(self, name: str, t: tuple[object, ...]) -> str:
+ def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str:
items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)]
self.write("static")
with self.indent():
@@ -323,7 +329,7 @@ class Printer:
self.write(f".cval = {{ {z.real}, {z.imag} }},")
return f"&{name}.ob_base"
- def generate_frozenset(self, name: str, fs: frozenset[object]) -> str:
+ def generate_frozenset(self, name: str, fs: FrozenSet[object]) -> str:
ret = self.generate_tuple(name, tuple(sorted(fs)))
self.write("// TODO: The above tuple should be a frozenset")
return ret
@@ -336,34 +342,33 @@ class Printer:
# print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}")
return self.cache[key]
self.misses += 1
- match obj:
- case types.CodeType() | umarshal.Code() as code:
- val = self.generate_code(name, code)
- case tuple(t):
- val = self.generate_tuple(name, t)
- case str(s):
- val = self.generate_unicode(name, s)
- case bytes(b):
- val = self.generate_bytes(name, b)
- case True:
- return "Py_True"
- case False:
- return "Py_False"
- case int(i):
- val = self.generate_int(name, i)
- case float(x):
- val = self.generate_float(name, x)
- case complex() as z:
- val = self.generate_complex(name, z)
- case frozenset(fs):
- val = self.generate_frozenset(name, fs)
- case builtins.Ellipsis:
- return "Py_Ellipsis"
- case None:
- return "Py_None"
- case _:
- raise TypeError(
- f"Cannot generate code for {type(obj).__name__} object")
+ if isinstance(obj, types.CodeType) or isinstance(obj, umarshal.Code):
+ val = self.generate_code(name, obj)
+ elif isinstance(obj, tuple):
+ val = self.generate_tuple(name, obj)
+ elif isinstance(obj, str):
+ val = self.generate_unicode(name, obj)
+ elif isinstance(obj, bytes):
+ val = self.generate_bytes(name, obj)
+ elif obj is True:
+ return "Py_True"
+ elif obj is False:
+ return "Py_False"
+ elif isinstance(obj, int):
+ val = self.generate_int(name, obj)
+ elif isinstance(obj, float):
+ val = self.generate_float(name, obj)
+ elif isinstance(obj, complex):
+ val = self.generate_complex(name, obj)
+ elif isinstance(obj, frozenset):
+ val = self.generate_frozenset(name, obj)
+ elif obj is builtins.Ellipsis:
+ return "Py_Ellipsis"
+ elif obj is None:
+ return "Py_None"
+ else:
+ raise TypeError(
+ f"Cannot generate code for {type(obj).__name__} object")
# print(f"Cache store {key!r:.40}: {val!r:.40}")
self.cache[key] = val
return val
@@ -393,12 +398,12 @@ def decode_frozen_data(source: str) -> types.CodeType:
del lines[0]
while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None:
del lines[-1]
- values: tuple[int, ...] = ast.literal_eval("".join(lines))
+ values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip())
data = bytes(values)
return umarshal.loads(data)
-def generate(source: str, filename: str, modname: str, file: typing.TextIO) -> None:
+def generate(source: str, filename: str, modname: str, file: TextIO) -> None:
if is_frozen_header(source):
code = decode_frozen_data(source)
else:
@@ -439,7 +444,7 @@ def main() -> None:
verbose = args.verbose
with open(args.file, encoding="utf-8") as f:
source = f.read()
- modname = args.module or os.path.basename(args.file).removesuffix(".py")
+ modname = args.module or removesuffix(os.path.basename(args.file), ".py")
output = args.output or modname + ".c"
with open(output, "w", encoding="utf-8") as file:
with report_time("generate"):
diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py
index 61ccae6..4281b67 100644
--- a/Tools/scripts/freeze_modules.py
+++ b/Tools/scripts/freeze_modules.py
@@ -25,31 +25,6 @@ STDLIB_DIR = os.path.join(ROOT_DIR, 'Lib')
# need to be updated.
MODULES_DIR = os.path.join(ROOT_DIR, 'Python', 'frozen_modules')
-if sys.platform != "win32":
- TOOL = os.path.join(ROOT_DIR, 'Programs', '_freeze_module')
- if not os.path.isfile(TOOL):
- # When building out of the source tree, get the tool from directory
- # of the Python executable
- TOOL = os.path.dirname(sys.executable)
- TOOL = os.path.join(TOOL, 'Programs', '_freeze_module')
- TOOL = os.path.abspath(TOOL)
- if not os.path.isfile(TOOL):
- sys.exit("ERROR: missing _freeze_module")
-else:
- def find_tool():
- archs = ['amd64', 'win32']
- if platform.machine() == "ARM64":
- archs.append('arm64')
- for arch in archs:
- for exe in ['_freeze_module.exe', '_freeze_module_d.exe']:
- tool = os.path.join(ROOT_DIR, 'PCbuild', arch, exe)
- if os.path.isfile(tool):
- return tool
- sys.exit("ERROR: missing _freeze_module.exe; you need to run PCbuild/build.bat")
- TOOL = find_tool()
- del find_tool
-
-MANIFEST = os.path.join(MODULES_DIR, 'MANIFEST')
FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c')
MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in')
PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj')
@@ -480,45 +455,6 @@ def replace_block(lines, start_marker, end_marker, replacements, file):
return lines[:start_pos + 1] + replacements + lines[end_pos:]
-def regen_manifest(modules):
- header = 'module ispkg source frozen checksum'.split()
- widths = [5] * len(header)
- rows = []
- for mod in modules:
- info = mod.summarize()
- row = []
- for i, col in enumerate(header):
- value = info[col]
- if col == 'checksum':
- value = value[:12]
- elif col == 'ispkg':
- value = 'YES' if value else 'no'
- widths[i] = max(widths[i], len(value))
- row.append(value or '-')
- rows.append(row)
-
- modlines = [
- '# The list of frozen modules with key information.',
- '# Note that the "check_generated_files" CI job will identify',
- '# when source files were changed but regen-frozen wasn\'t run.',
- '# This file is auto-generated by Tools/scripts/freeze_modules.py.',
- ' '.join(c.center(w) for c, w in zip(header, widths)).rstrip(),
- ' '.join('-' * w for w in widths),
- ]
- for row in rows:
- for i, w in enumerate(widths):
- if header[i] == 'ispkg':
- row[i] = row[i].center(w)
- else:
- row[i] = row[i].ljust(w)
- modlines.append(' '.join(row).rstrip())
-
- print(f'# Updating {os.path.relpath(MANIFEST)}')
- with open(MANIFEST, 'w', encoding="utf-8") as outfile:
- lines = (l + '\n' for l in modlines)
- outfile.writelines(lines)
-
-
def regen_frozen(modules):
headerlines = []
parentdir = os.path.dirname(FROZEN_FILE)
@@ -648,11 +584,11 @@ def regen_makefile(modules):
deepfreezefiles.append(f"\t\t{ofile} \\")
# Also add a deepfreeze rule.
- deepfreezerules.append(f'{cfile}: $(srcdir)/{_pyfile} $(DEEPFREEZE_DEPS)')
- deepfreezerules.append(f'\t@echo "Deepfreezing {cfile} from {_pyfile}"')
- deepfreezerules.append(f"\t@./$(BOOTSTRAP) \\")
- deepfreezerules.append(f"\t\t$(srcdir)/Tools/scripts/deepfreeze.py \\")
- deepfreezerules.append(f"\t\t$(srcdir)/{_pyfile} -m {src.frozenid} -o {cfile}")
+ deepfreezerules.append(f'{cfile}: {header} $(DEEPFREEZE_DEPS)')
+ deepfreezerules.append(
+ f"\t$(PYTHON_FOR_REGEN) "
+ f"$(srcdir)/Tools/scripts/deepfreeze.py "
+ f"{header} -m {src.frozenid} -o {cfile}")
deepfreezerules.append('')
for src in _iter_sources(modules):
@@ -663,7 +599,7 @@ def regen_makefile(modules):
pyfiles.append(f'\t\t{pyfile} \\')
freeze = (f'Programs/_freeze_module {src.frozenid} '
- f'$(srcdir)/{pyfile} $(srcdir)/{header}')
+ f'$(srcdir)/{pyfile} {header}')
rules.extend([
f'{header}: Programs/_freeze_module {pyfile}',
f'\t{freeze}',
@@ -775,32 +711,6 @@ def regen_pcbuild(modules):
#######################################
-# freezing modules
-
-def freeze_module(modname, pyfile=None, destdir=MODULES_DIR):
- """Generate the frozen module .h file for the given module."""
- tmpsuffix = f'.{int(time.time())}'
- for modname, pyfile, ispkg in resolve_modules(modname, pyfile):
- frozenfile = resolve_frozen_file(modname, destdir)
- _freeze_module(modname, pyfile, frozenfile, tmpsuffix)
-
-
-def _freeze_module(frozenid, pyfile, frozenfile, tmpsuffix):
- tmpfile = f'{frozenfile}.{int(time.time())}'
-
- argv = [TOOL, frozenid, pyfile, tmpfile]
- print('#', ' '.join(os.path.relpath(a) for a in argv), flush=True)
- try:
- subprocess.run(argv, check=True)
- except (FileNotFoundError, subprocess.CalledProcessError):
- if not os.path.exists(TOOL):
- sys.exit(f'ERROR: missing {TOOL}; you need to run "make regen-frozen"')
- raise # re-raise
-
- update_file_with_tmpfile(frozenfile, tmpfile, create=True)
-
-
-#######################################
# the script
def main():
@@ -810,15 +720,7 @@ def main():
# Regen build-related files.
regen_makefile(modules)
regen_pcbuild(modules)
-
- # Freeze the target modules.
- tmpsuffix = f'.{int(time.time())}'
- for src in _iter_sources(modules):
- _freeze_module(src.frozenid, src.pyfile, src.frozenfile, tmpsuffix)
-
- # Regen files dependent of frozen file details.
regen_frozen(modules)
- regen_manifest(modules)
if __name__ == '__main__':
diff --git a/Tools/scripts/umarshal.py b/Tools/scripts/umarshal.py
index e0d18c8..2eaaa7c 100644
--- a/Tools/scripts/umarshal.py
+++ b/Tools/scripts/umarshal.py
@@ -2,7 +2,7 @@
import ast
-from typing import Any
+from typing import Any, Tuple
class Type:
@@ -55,10 +55,10 @@ class Code:
def __repr__(self) -> str:
return f"Code(**{self.__dict__})"
- co_localsplusnames: tuple[str]
- co_localspluskinds: tuple[int]
+ co_localsplusnames: Tuple[str]
+ co_localspluskinds: Tuple[int]
- def get_localsplus_names(self, select_kind: int) -> tuple[str, ...]:
+ def get_localsplus_names(self, select_kind: int) -> Tuple[str, ...]:
varnames: list[str] = []
for name, kind in zip(self.co_localsplusnames,
self.co_localspluskinds):
@@ -67,15 +67,15 @@ class Code:
return tuple(varnames)
@property
- def co_varnames(self) -> tuple[str, ...]:
+ def co_varnames(self) -> Tuple[str, ...]:
return self.get_localsplus_names(CO_FAST_LOCAL)
@property
- def co_cellvars(self) -> tuple[str, ...]:
+ def co_cellvars(self) -> Tuple[str, ...]:
return self.get_localsplus_names(CO_FAST_CELL)
@property
- def co_freevars(self) -> tuple[str, ...]:
+ def co_freevars(self) -> Tuple[str, ...]:
return self.get_localsplus_names(CO_FAST_FREE)
@property
@@ -190,118 +190,117 @@ class Reader:
obj = self.r_ref(obj, flag)
return obj
- match type:
- case Type.NULL:
- return NULL
- case Type.NONE:
- return None
- case Type.ELLIPSIS:
- return Ellipsis
- case Type.FALSE:
- return False
- case Type.TRUE:
- return True
- case Type.INT:
- return R_REF(self.r_long())
- case Type.INT64:
- return R_REF(self.r_long64())
- case Type.LONG:
- return R_REF(self.r_PyLong())
- case Type.FLOAT:
- return R_REF(self.r_float_str())
- case Type.BINARY_FLOAT:
- return R_REF(self.r_float_bin())
- case Type.COMPLEX:
- return R_REF(complex(self.r_float_str(),
- self.r_float_str()))
- case Type.BINARY_COMPLEX:
- return R_REF(complex(self.r_float_bin(),
- self.r_float_bin()))
- case Type.STRING:
- n = self.r_long()
- return R_REF(self.r_string(n))
- case Type.ASCII_INTERNED | Type.ASCII:
- n = self.r_long()
- return R_REF(self.r_string(n).decode("ascii"))
- case Type.SHORT_ASCII_INTERNED | Type.SHORT_ASCII:
- n = self.r_byte()
- return R_REF(self.r_string(n).decode("ascii"))
- case Type.INTERNED | Type.UNICODE:
- n = self.r_long()
- return R_REF(self.r_string(n).decode("utf8", "surrogatepass"))
- case Type.SMALL_TUPLE:
- n = self.r_byte()
- idx = self.r_ref_reserve(flag)
- retval: Any = tuple(self.r_object() for _ in range(n))
- self.r_ref_insert(retval, idx, flag)
- return retval
- case Type.TUPLE:
- n = self.r_long()
- idx = self.r_ref_reserve(flag)
- retval = tuple(self.r_object() for _ in range(n))
- self.r_ref_insert(retval, idx, flag)
- return retval
- case Type.LIST:
- n = self.r_long()
- retval = R_REF([])
- for _ in range(n):
- retval.append(self.r_object())
- return retval
- case Type.DICT:
- retval = R_REF({})
- while True:
- key = self.r_object()
- if key == NULL:
- break
- val = self.r_object()
- retval[key] = val
- return retval
- case Type.SET:
- n = self.r_long()
- retval = R_REF(set())
- for _ in range(n):
- v = self.r_object()
- retval.add(v)
- return retval
- case Type.FROZENSET:
- n = self.r_long()
- s: set[Any] = set()
- idx = self.r_ref_reserve(flag)
- for _ in range(n):
- v = self.r_object()
- s.add(v)
- retval = frozenset(s)
- self.r_ref_insert(retval, idx, flag)
- return retval
- case Type.CODE:
- retval = R_REF(Code())
- retval.co_argcount = self.r_long()
- retval.co_posonlyargcount = self.r_long()
- retval.co_kwonlyargcount = self.r_long()
- retval.co_stacksize = self.r_long()
- retval.co_flags = self.r_long()
- retval.co_code = self.r_object()
- retval.co_consts = self.r_object()
- retval.co_names = self.r_object()
- retval.co_localsplusnames = self.r_object()
- retval.co_localspluskinds = self.r_object()
- retval.co_filename = self.r_object()
- retval.co_name = self.r_object()
- retval.co_qualname = self.r_object()
- retval.co_firstlineno = self.r_long()
- retval.co_linetable = self.r_object()
- retval.co_endlinetable = self.r_object()
- retval.co_columntable = self.r_object()
- retval.co_exceptiontable = self.r_object()
- return retval
- case Type.REF:
- n = self.r_long()
- retval = self.refs[n]
- assert retval is not None
- return retval
- case _:
- breakpoint()
- raise AssertionError(f"Unknown type {type} {chr(type)!r}")
+ if type == Type.NULL:
+ return NULL
+ elif type == Type.NONE:
+ return None
+ elif type == Type.ELLIPSIS:
+ return Ellipsis
+ elif type == Type.FALSE:
+ return False
+ elif type == Type.TRUE:
+ return True
+ elif type == Type.INT:
+ return R_REF(self.r_long())
+ elif type == Type.INT64:
+ return R_REF(self.r_long64())
+ elif type == Type.LONG:
+ return R_REF(self.r_PyLong())
+ elif type == Type.FLOAT:
+ return R_REF(self.r_float_str())
+ elif type == Type.BINARY_FLOAT:
+ return R_REF(self.r_float_bin())
+ elif type == Type.COMPLEX:
+ return R_REF(complex(self.r_float_str(),
+ self.r_float_str()))
+ elif type == Type.BINARY_COMPLEX:
+ return R_REF(complex(self.r_float_bin(),
+ self.r_float_bin()))
+ elif type == Type.STRING:
+ n = self.r_long()
+ return R_REF(self.r_string(n))
+ elif type == Type.ASCII_INTERNED or type == Type.ASCII:
+ n = self.r_long()
+ return R_REF(self.r_string(n).decode("ascii"))
+ elif type == Type.SHORT_ASCII_INTERNED or type == Type.SHORT_ASCII:
+ n = self.r_byte()
+ return R_REF(self.r_string(n).decode("ascii"))
+ elif type == Type.INTERNED or type == Type.UNICODE:
+ n = self.r_long()
+ return R_REF(self.r_string(n).decode("utf8", "surrogatepass"))
+ elif type == Type.SMALL_TUPLE:
+ n = self.r_byte()
+ idx = self.r_ref_reserve(flag)
+ retval: Any = tuple(self.r_object() for _ in range(n))
+ self.r_ref_insert(retval, idx, flag)
+ return retval
+ elif type == Type.TUPLE:
+ n = self.r_long()
+ idx = self.r_ref_reserve(flag)
+ retval = tuple(self.r_object() for _ in range(n))
+ self.r_ref_insert(retval, idx, flag)
+ return retval
+ elif type == Type.LIST:
+ n = self.r_long()
+ retval = R_REF([])
+ for _ in range(n):
+ retval.append(self.r_object())
+ return retval
+ elif type == Type.DICT:
+ retval = R_REF({})
+ while True:
+ key = self.r_object()
+ if key == NULL:
+ break
+ val = self.r_object()
+ retval[key] = val
+ return retval
+ elif type == Type.SET:
+ n = self.r_long()
+ retval = R_REF(set())
+ for _ in range(n):
+ v = self.r_object()
+ retval.add(v)
+ return retval
+ elif type == Type.FROZENSET:
+ n = self.r_long()
+ s: set[Any] = set()
+ idx = self.r_ref_reserve(flag)
+ for _ in range(n):
+ v = self.r_object()
+ s.add(v)
+ retval = frozenset(s)
+ self.r_ref_insert(retval, idx, flag)
+ return retval
+ elif type == Type.CODE:
+ retval = R_REF(Code())
+ retval.co_argcount = self.r_long()
+ retval.co_posonlyargcount = self.r_long()
+ retval.co_kwonlyargcount = self.r_long()
+ retval.co_stacksize = self.r_long()
+ retval.co_flags = self.r_long()
+ retval.co_code = self.r_object()
+ retval.co_consts = self.r_object()
+ retval.co_names = self.r_object()
+ retval.co_localsplusnames = self.r_object()
+ retval.co_localspluskinds = self.r_object()
+ retval.co_filename = self.r_object()
+ retval.co_name = self.r_object()
+ retval.co_qualname = self.r_object()
+ retval.co_firstlineno = self.r_long()
+ retval.co_linetable = self.r_object()
+ retval.co_endlinetable = self.r_object()
+ retval.co_columntable = self.r_object()
+ retval.co_exceptiontable = self.r_object()
+ return retval
+ elif type == Type.REF:
+ n = self.r_long()
+ retval = self.refs[n]
+ assert retval is not None
+ return retval
+ else:
+ breakpoint()
+ raise AssertionError(f"Unknown type {type} {chr(type)!r}")
def loads(data: bytes) -> Any: