summaryrefslogtreecommitdiffstats
path: root/Tools/build
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/build')
-rw-r--r--Tools/build/check_extension_modules.py484
-rw-r--r--Tools/build/deepfreeze.py504
-rw-r--r--Tools/build/freeze_modules.py733
-rw-r--r--Tools/build/generate_global_objects.py382
-rw-r--r--Tools/build/generate_levenshtein_examples.py70
-rw-r--r--Tools/build/generate_opcode_h.py199
-rwxr-xr-xTools/build/generate_re_casefix.py96
-rwxr-xr-xTools/build/generate_sre_constants.py80
-rw-r--r--Tools/build/generate_stdlib_module_names.py139
-rwxr-xr-xTools/build/generate_token.py282
-rwxr-xr-xTools/build/parse_html5_entities.py115
-rwxr-xr-xTools/build/smelly.py173
-rw-r--r--Tools/build/stable_abi.py757
-rw-r--r--Tools/build/umarshal.py325
-rw-r--r--Tools/build/update_file.py92
-rwxr-xr-xTools/build/verify_ensurepip_wheels.py98
16 files changed, 4529 insertions, 0 deletions
diff --git a/Tools/build/check_extension_modules.py b/Tools/build/check_extension_modules.py
new file mode 100644
index 0000000..59239c6
--- /dev/null
+++ b/Tools/build/check_extension_modules.py
@@ -0,0 +1,484 @@
+"""Check extension modules
+
+The script checks shared and built-in extension modules. It verifies that the
+modules have been built and that they can be imported successfully. Missing
+modules and failed imports are reported to the user. Shared extension
+files are renamed on failed import.
+
+Module information is parsed from several sources:
+
+- core modules hard-coded in Modules/config.c.in
+- Windows-specific modules that are hard-coded in PC/config.c
+- MODULE_{name}_STATE entries in Makefile (provided through sysconfig)
+- Various makesetup files:
+ - $(srcdir)/Modules/Setup
+ - Modules/Setup.[local|bootstrap|stdlib] files, which are generated
+ from $(srcdir)/Modules/Setup.*.in files
+
+See --help for more information
+"""
+import argparse
+import collections
+import enum
+import logging
+import os
+import pathlib
+import re
+import sys
+import sysconfig
+import warnings
+
+from importlib._bootstrap import _load as bootstrap_load
+from importlib.machinery import BuiltinImporter, ExtensionFileLoader, ModuleSpec
+from importlib.util import spec_from_file_location, spec_from_loader
+from typing import Iterable
+
+SRC_DIR = pathlib.Path(__file__).parent.parent.parent
+
+# core modules, hard-coded in Modules/config.h.in
+CORE_MODULES = {
+ "_ast",
+ "_imp",
+ "_string",
+ "_tokenize",
+ "_warnings",
+ "builtins",
+ "gc",
+ "marshal",
+ "sys",
+}
+
+# Windows-only modules
+WINDOWS_MODULES = {
+ "_msi",
+ "_overlapped",
+ "_testconsole",
+ "_winapi",
+ "msvcrt",
+ "nt",
+ "winreg",
+ "winsound",
+}
+
+
+logger = logging.getLogger(__name__)
+
+parser = argparse.ArgumentParser(
+ prog="check_extension_modules",
+ description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+)
+
+parser.add_argument(
+ "--verbose",
+ action="store_true",
+ help="Verbose, report builtin, shared, and unavailable modules",
+)
+
+parser.add_argument(
+ "--debug",
+ action="store_true",
+ help="Enable debug logging",
+)
+
+parser.add_argument(
+ "--strict",
+ action=argparse.BooleanOptionalAction,
+ help=(
+ "Strict check, fail when a module is missing or fails to import"
+ "(default: no, unless env var PYTHONSTRICTEXTENSIONBUILD is set)"
+ ),
+ default=bool(os.environ.get("PYTHONSTRICTEXTENSIONBUILD")),
+)
+
+parser.add_argument(
+ "--cross-compiling",
+ action=argparse.BooleanOptionalAction,
+ help=(
+ "Use cross-compiling checks "
+ "(default: no, unless env var _PYTHON_HOST_PLATFORM is set)."
+ ),
+ default="_PYTHON_HOST_PLATFORM" in os.environ,
+)
+
+parser.add_argument(
+ "--list-module-names",
+ action="store_true",
+ help="Print a list of module names to stdout and exit",
+)
+
+
+class ModuleState(enum.Enum):
+ # Makefile state "yes"
+ BUILTIN = "builtin"
+ SHARED = "shared"
+
+ DISABLED = "disabled"
+ MISSING = "missing"
+ NA = "n/a"
+ # disabled by Setup / makesetup rule
+ DISABLED_SETUP = "disabled_setup"
+
+ def __bool__(self):
+ return self.value in {"builtin", "shared"}
+
+
+ModuleInfo = collections.namedtuple("ModuleInfo", "name state")
+
+
+class ModuleChecker:
+ pybuilddir_txt = "pybuilddir.txt"
+
+ setup_files = (
+ # see end of configure.ac
+ "Modules/Setup.local",
+ "Modules/Setup.stdlib",
+ "Modules/Setup.bootstrap",
+ SRC_DIR / "Modules/Setup",
+ )
+
+ def __init__(self, cross_compiling: bool = False, strict: bool = False):
+ self.cross_compiling = cross_compiling
+ self.strict_extensions_build = strict
+ self.ext_suffix = sysconfig.get_config_var("EXT_SUFFIX")
+ self.platform = sysconfig.get_platform()
+ self.builddir = self.get_builddir()
+ self.modules = self.get_modules()
+
+ self.builtin_ok = []
+ self.shared_ok = []
+ self.failed_on_import = []
+ self.missing = []
+ self.disabled_configure = []
+ self.disabled_setup = []
+ self.notavailable = []
+
+ def check(self):
+ for modinfo in self.modules:
+ logger.debug("Checking '%s' (%s)", modinfo.name, self.get_location(modinfo))
+ if modinfo.state == ModuleState.DISABLED:
+ self.disabled_configure.append(modinfo)
+ elif modinfo.state == ModuleState.DISABLED_SETUP:
+ self.disabled_setup.append(modinfo)
+ elif modinfo.state == ModuleState.MISSING:
+ self.missing.append(modinfo)
+ elif modinfo.state == ModuleState.NA:
+ self.notavailable.append(modinfo)
+ else:
+ try:
+ if self.cross_compiling:
+ self.check_module_cross(modinfo)
+ else:
+ self.check_module_import(modinfo)
+ except (ImportError, FileNotFoundError):
+ self.rename_module(modinfo)
+ self.failed_on_import.append(modinfo)
+ else:
+ if modinfo.state == ModuleState.BUILTIN:
+ self.builtin_ok.append(modinfo)
+ else:
+ assert modinfo.state == ModuleState.SHARED
+ self.shared_ok.append(modinfo)
+
+ def summary(self, *, verbose: bool = False):
+ longest = max([len(e.name) for e in self.modules], default=0)
+
+ def print_three_column(modinfos: list[ModuleInfo]):
+ names = [modinfo.name for modinfo in modinfos]
+ names.sort(key=str.lower)
+ # guarantee zip() doesn't drop anything
+ while len(names) % 3:
+ names.append("")
+ for l, m, r in zip(names[::3], names[1::3], names[2::3]):
+ print("%-*s %-*s %-*s" % (longest, l, longest, m, longest, r))
+
+ if verbose and self.builtin_ok:
+ print("The following *built-in* modules have been successfully built:")
+ print_three_column(self.builtin_ok)
+ print()
+
+ if verbose and self.shared_ok:
+ print("The following *shared* modules have been successfully built:")
+ print_three_column(self.shared_ok)
+ print()
+
+ if self.disabled_configure:
+ print("The following modules are *disabled* in configure script:")
+ print_three_column(self.disabled_configure)
+ print()
+
+ if self.disabled_setup:
+ print("The following modules are *disabled* in Modules/Setup files:")
+ print_three_column(self.disabled_setup)
+ print()
+
+ if verbose and self.notavailable:
+ print(
+ f"The following modules are not available on platform '{self.platform}':"
+ )
+ print_three_column(self.notavailable)
+ print()
+
+ if self.missing:
+ print("The necessary bits to build these optional modules were not found:")
+ print_three_column(self.missing)
+ print("To find the necessary bits, look in configure.ac and config.log.")
+ print()
+
+ if self.failed_on_import:
+ print(
+ "Following modules built successfully "
+ "but were removed because they could not be imported:"
+ )
+ print_three_column(self.failed_on_import)
+ print()
+
+ if any(
+ modinfo.name == "_ssl" for modinfo in self.missing + self.failed_on_import
+ ):
+ print("Could not build the ssl module!")
+ print("Python requires a OpenSSL 1.1.1 or newer")
+ if sysconfig.get_config_var("OPENSSL_LDFLAGS"):
+ print("Custom linker flags may require --with-openssl-rpath=auto")
+ print()
+
+ disabled = len(self.disabled_configure) + len(self.disabled_setup)
+ print(
+ f"Checked {len(self.modules)} modules ("
+ f"{len(self.builtin_ok)} built-in, "
+ f"{len(self.shared_ok)} shared, "
+ f"{len(self.notavailable)} n/a on {self.platform}, "
+ f"{disabled} disabled, "
+ f"{len(self.missing)} missing, "
+ f"{len(self.failed_on_import)} failed on import)"
+ )
+
+ def check_strict_build(self):
+ """Fail if modules are missing and it's a strict build"""
+ if self.strict_extensions_build and (self.failed_on_import or self.missing):
+ raise RuntimeError("Failed to build some stdlib modules")
+
+ def list_module_names(self, *, all: bool = False) -> set:
+ names = {modinfo.name for modinfo in self.modules}
+ if all:
+ names.update(WINDOWS_MODULES)
+ return names
+
+ def get_builddir(self) -> pathlib.Path:
+ try:
+ with open(self.pybuilddir_txt, encoding="utf-8") as f:
+ builddir = f.read()
+ except FileNotFoundError:
+ logger.error("%s must be run from the top build directory", __file__)
+ raise
+ builddir = pathlib.Path(builddir)
+ logger.debug("%s: %s", self.pybuilddir_txt, builddir)
+ return builddir
+
+ def get_modules(self) -> list[ModuleInfo]:
+ """Get module info from sysconfig and Modules/Setup* files"""
+ seen = set()
+ modules = []
+ # parsing order is important, first entry wins
+ for modinfo in self.get_core_modules():
+ modules.append(modinfo)
+ seen.add(modinfo.name)
+ for setup_file in self.setup_files:
+ for modinfo in self.parse_setup_file(setup_file):
+ if modinfo.name not in seen:
+ modules.append(modinfo)
+ seen.add(modinfo.name)
+ for modinfo in self.get_sysconfig_modules():
+ if modinfo.name not in seen:
+ modules.append(modinfo)
+ seen.add(modinfo.name)
+ logger.debug("Found %i modules in total", len(modules))
+ modules.sort()
+ return modules
+
+ def get_core_modules(self) -> Iterable[ModuleInfo]:
+ """Get hard-coded core modules"""
+ for name in CORE_MODULES:
+ modinfo = ModuleInfo(name, ModuleState.BUILTIN)
+ logger.debug("Found core module %s", modinfo)
+ yield modinfo
+
+ def get_sysconfig_modules(self) -> Iterable[ModuleInfo]:
+ """Get modules defined in Makefile through sysconfig
+
+ MODBUILT_NAMES: modules in *static* block
+ MODSHARED_NAMES: modules in *shared* block
+ MODDISABLED_NAMES: modules in *disabled* block
+ """
+ moddisabled = set(sysconfig.get_config_var("MODDISABLED_NAMES").split())
+ if self.cross_compiling:
+ modbuiltin = set(sysconfig.get_config_var("MODBUILT_NAMES").split())
+ else:
+ modbuiltin = set(sys.builtin_module_names)
+
+ for key, value in sysconfig.get_config_vars().items():
+ if not key.startswith("MODULE_") or not key.endswith("_STATE"):
+ continue
+ if value not in {"yes", "disabled", "missing", "n/a"}:
+ raise ValueError(f"Unsupported value '{value}' for {key}")
+
+ modname = key[7:-6].lower()
+ if modname in moddisabled:
+ # Setup "*disabled*" rule
+ state = ModuleState.DISABLED_SETUP
+ elif value in {"disabled", "missing", "n/a"}:
+ state = ModuleState(value)
+ elif modname in modbuiltin:
+ assert value == "yes"
+ state = ModuleState.BUILTIN
+ else:
+ assert value == "yes"
+ state = ModuleState.SHARED
+
+ modinfo = ModuleInfo(modname, state)
+ logger.debug("Found %s in Makefile", modinfo)
+ yield modinfo
+
+ def parse_setup_file(self, setup_file: pathlib.Path) -> Iterable[ModuleInfo]:
+ """Parse a Modules/Setup file"""
+ assign_var = re.compile(r"^\w+=") # EGG_SPAM=foo
+ # default to static module
+ state = ModuleState.BUILTIN
+ logger.debug("Parsing Setup file %s", setup_file)
+ with open(setup_file, encoding="utf-8") as f:
+ for line in f:
+ line = line.strip()
+ if not line or line.startswith("#") or assign_var.match(line):
+ continue
+ match line.split():
+ case ["*shared*"]:
+ state = ModuleState.SHARED
+ case ["*static*"]:
+ state = ModuleState.BUILTIN
+ case ["*disabled*"]:
+ state = ModuleState.DISABLED
+ case ["*noconfig*"]:
+ state = None
+ case [*items]:
+ if state == ModuleState.DISABLED:
+ # *disabled* can disable multiple modules per line
+ for item in items:
+ modinfo = ModuleInfo(item, state)
+ logger.debug("Found %s in %s", modinfo, setup_file)
+ yield modinfo
+ elif state in {ModuleState.SHARED, ModuleState.BUILTIN}:
+ # *shared* and *static*, first item is the name of the module.
+ modinfo = ModuleInfo(items[0], state)
+ logger.debug("Found %s in %s", modinfo, setup_file)
+ yield modinfo
+
+ def get_spec(self, modinfo: ModuleInfo) -> ModuleSpec:
+ """Get ModuleSpec for builtin or extension module"""
+ if modinfo.state == ModuleState.SHARED:
+ location = os.fspath(self.get_location(modinfo))
+ loader = ExtensionFileLoader(modinfo.name, location)
+ return spec_from_file_location(modinfo.name, location, loader=loader)
+ elif modinfo.state == ModuleState.BUILTIN:
+ return spec_from_loader(modinfo.name, loader=BuiltinImporter)
+ else:
+ raise ValueError(modinfo)
+
+ def get_location(self, modinfo: ModuleInfo) -> pathlib.Path:
+ """Get shared library location in build directory"""
+ if modinfo.state == ModuleState.SHARED:
+ return self.builddir / f"{modinfo.name}{self.ext_suffix}"
+ else:
+ return None
+
+ def _check_file(self, modinfo: ModuleInfo, spec: ModuleSpec):
+ """Check that the module file is present and not empty"""
+ if spec.loader is BuiltinImporter:
+ return
+ try:
+ st = os.stat(spec.origin)
+ except FileNotFoundError:
+ logger.error("%s (%s) is missing", modinfo.name, spec.origin)
+ raise
+ if not st.st_size:
+ raise ImportError(f"{spec.origin} is an empty file")
+
+ def check_module_import(self, modinfo: ModuleInfo):
+ """Attempt to import module and report errors"""
+ spec = self.get_spec(modinfo)
+ self._check_file(modinfo, spec)
+ try:
+ with warnings.catch_warnings():
+ # ignore deprecation warning from deprecated modules
+ warnings.simplefilter("ignore", DeprecationWarning)
+ bootstrap_load(spec)
+ except ImportError as e:
+ logger.error("%s failed to import: %s", modinfo.name, e)
+ raise
+ except Exception as e:
+ logger.exception("Importing extension '%s' failed!", modinfo.name)
+ raise
+
+ def check_module_cross(self, modinfo: ModuleInfo):
+ """Sanity check for cross compiling"""
+ spec = self.get_spec(modinfo)
+ self._check_file(modinfo, spec)
+
+ def rename_module(self, modinfo: ModuleInfo) -> None:
+ """Rename module file"""
+ if modinfo.state == ModuleState.BUILTIN:
+ logger.error("Cannot mark builtin module '%s' as failed!", modinfo.name)
+ return
+
+ failed_name = f"{modinfo.name}_failed{self.ext_suffix}"
+ builddir_path = self.get_location(modinfo)
+ if builddir_path.is_symlink():
+ symlink = builddir_path
+ module_path = builddir_path.resolve().relative_to(os.getcwd())
+ failed_path = module_path.parent / failed_name
+ else:
+ symlink = None
+ module_path = builddir_path
+ failed_path = self.builddir / failed_name
+
+ # remove old failed file
+ failed_path.unlink(missing_ok=True)
+ # remove symlink
+ if symlink is not None:
+ symlink.unlink(missing_ok=True)
+ # rename shared extension file
+ try:
+ module_path.rename(failed_path)
+ except FileNotFoundError:
+ logger.debug("Shared extension file '%s' does not exist.", module_path)
+ else:
+ logger.debug("Rename '%s' -> '%s'", module_path, failed_path)
+
+
+def main():
+ args = parser.parse_args()
+ if args.debug:
+ args.verbose = True
+ logging.basicConfig(
+ level=logging.DEBUG if args.debug else logging.INFO,
+ format="[%(levelname)s] %(message)s",
+ )
+
+ checker = ModuleChecker(
+ cross_compiling=args.cross_compiling,
+ strict=args.strict,
+ )
+ if args.list_module_names:
+ names = checker.list_module_names(all=True)
+ for name in sorted(names):
+ print(name)
+ else:
+ checker.check()
+ checker.summary(verbose=args.verbose)
+ try:
+ checker.check_strict_build()
+ except RuntimeError as e:
+ parser.exit(1, f"\nError: {e}\n")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/Tools/build/deepfreeze.py b/Tools/build/deepfreeze.py
new file mode 100644
index 0000000..28ac2b1
--- /dev/null
+++ b/Tools/build/deepfreeze.py
@@ -0,0 +1,504 @@
+"""Deep freeze
+
+The script may be executed by _bootstrap_python interpreter.
+Shared library extension modules are not available in that case.
+On Windows, and in cross-compilation cases, it is executed
+by Python 3.10, and 3.11 features are not available.
+"""
+import argparse
+import ast
+import builtins
+import collections
+import contextlib
+import os
+import re
+import time
+import types
+from typing import Dict, FrozenSet, TextIO, Tuple
+
+import umarshal
+from generate_global_objects import get_identifiers_and_strings
+
+verbose = False
+identifiers, strings = get_identifiers_and_strings()
+
+# This must be kept in sync with opcode.py
+RESUME = 151
+
+def isprintable(b: bytes) -> bool:
+ return all(0x20 <= c < 0x7f for c in b)
+
+
+def make_string_literal(b: bytes) -> str:
+ res = ['"']
+ if isprintable(b):
+ res.append(b.decode("ascii").replace("\\", "\\\\").replace("\"", "\\\""))
+ else:
+ for i in b:
+ res.append(f"\\x{i:02x}")
+ res.append('"')
+ return "".join(res)
+
+
+CO_FAST_LOCAL = 0x20
+CO_FAST_CELL = 0x40
+CO_FAST_FREE = 0x80
+
+
+def get_localsplus(code: types.CodeType):
+ a = collections.defaultdict(int)
+ for name in code.co_varnames:
+ a[name] |= CO_FAST_LOCAL
+ for name in code.co_cellvars:
+ a[name] |= CO_FAST_CELL
+ for name in code.co_freevars:
+ a[name] |= CO_FAST_FREE
+ return tuple(a.keys()), bytes(a.values())
+
+
+def get_localsplus_counts(code: types.CodeType,
+ names: Tuple[str, ...],
+ kinds: bytes) -> Tuple[int, int, int, int]:
+ nlocals = 0
+ nplaincellvars = 0
+ ncellvars = 0
+ nfreevars = 0
+ assert len(names) == len(kinds)
+ for name, kind in zip(names, kinds):
+ if kind & CO_FAST_LOCAL:
+ nlocals += 1
+ if kind & CO_FAST_CELL:
+ ncellvars += 1
+ elif kind & CO_FAST_CELL:
+ ncellvars += 1
+ nplaincellvars += 1
+ elif kind & CO_FAST_FREE:
+ nfreevars += 1
+ assert nlocals == len(code.co_varnames) == code.co_nlocals, \
+ (nlocals, len(code.co_varnames), code.co_nlocals)
+ assert ncellvars == len(code.co_cellvars)
+ assert nfreevars == len(code.co_freevars)
+ assert len(names) == nlocals + nplaincellvars + nfreevars
+ return nlocals, nplaincellvars, ncellvars, nfreevars
+
+
+PyUnicode_1BYTE_KIND = 1
+PyUnicode_2BYTE_KIND = 2
+PyUnicode_4BYTE_KIND = 4
+
+
+def analyze_character_width(s: str) -> Tuple[int, bool]:
+ maxchar = ' '
+ for c in s:
+ maxchar = max(maxchar, c)
+ ascii = False
+ if maxchar <= '\xFF':
+ kind = PyUnicode_1BYTE_KIND
+ ascii = maxchar <= '\x7F'
+ elif maxchar <= '\uFFFF':
+ kind = PyUnicode_2BYTE_KIND
+ else:
+ kind = PyUnicode_4BYTE_KIND
+ return kind, ascii
+
+
+def removesuffix(base: str, suffix: str) -> str:
+ if base.endswith(suffix):
+ return base[:len(base) - len(suffix)]
+ return base
+
+class Printer:
+
+ def __init__(self, file: TextIO) -> None:
+ self.level = 0
+ self.file = file
+ self.cache: Dict[tuple[type, object, str], str] = {}
+ self.hits, self.misses = 0, 0
+ self.patchups: list[str] = []
+ self.deallocs: list[str] = []
+ self.interns: list[str] = []
+ self.write('#include "Python.h"')
+ self.write('#include "internal/pycore_gc.h"')
+ self.write('#include "internal/pycore_code.h"')
+ self.write('#include "internal/pycore_frame.h"')
+ self.write('#include "internal/pycore_long.h"')
+ self.write("")
+
+ @contextlib.contextmanager
+ def indent(self) -> None:
+ save_level = self.level
+ try:
+ self.level += 1
+ yield
+ finally:
+ self.level = save_level
+
+ def write(self, arg: str) -> None:
+ self.file.writelines((" "*self.level, arg, "\n"))
+
+ @contextlib.contextmanager
+ def block(self, prefix: str, suffix: str = "") -> None:
+ self.write(prefix + " {")
+ with self.indent():
+ yield
+ self.write("}" + suffix)
+
+ def object_head(self, typename: str) -> None:
+ with self.block(".ob_base =", ","):
+ self.write(f".ob_refcnt = 999999999,")
+ self.write(f".ob_type = &{typename},")
+
+ def object_var_head(self, typename: str, size: int) -> None:
+ with self.block(".ob_base =", ","):
+ self.object_head(typename)
+ self.write(f".ob_size = {size},")
+
+ def field(self, obj: object, name: str) -> None:
+ self.write(f".{name} = {getattr(obj, name)},")
+
+ def generate_bytes(self, name: str, b: bytes) -> str:
+ if b == b"":
+ return "(PyObject *)&_Py_SINGLETON(bytes_empty)"
+ if len(b) == 1:
+ return f"(PyObject *)&_Py_SINGLETON(bytes_characters[{b[0]}])"
+ self.write("static")
+ with self.indent():
+ with self.block("struct"):
+ self.write("PyObject_VAR_HEAD")
+ self.write("Py_hash_t ob_shash;")
+ self.write(f"char ob_sval[{len(b) + 1}];")
+ with self.block(f"{name} =", ";"):
+ self.object_var_head("PyBytes_Type", len(b))
+ self.write(".ob_shash = -1,")
+ self.write(f".ob_sval = {make_string_literal(b)},")
+ return f"& {name}.ob_base.ob_base"
+
+ def generate_unicode(self, name: str, s: str) -> str:
+ if s in strings:
+ return f"&_Py_STR({strings[s]})"
+ if s in identifiers:
+ return f"&_Py_ID({s})"
+ if re.match(r'\A[A-Za-z0-9_]+\Z', s):
+ name = f"const_str_{s}"
+ kind, ascii = analyze_character_width(s)
+ if kind == PyUnicode_1BYTE_KIND:
+ datatype = "uint8_t"
+ elif kind == PyUnicode_2BYTE_KIND:
+ datatype = "uint16_t"
+ else:
+ datatype = "uint32_t"
+ self.write("static")
+ with self.indent():
+ with self.block("struct"):
+ if ascii:
+ self.write("PyASCIIObject _ascii;")
+ else:
+ self.write("PyCompactUnicodeObject _compact;")
+ self.write(f"{datatype} _data[{len(s)+1}];")
+ with self.block(f"{name} =", ";"):
+ if ascii:
+ with self.block("._ascii =", ","):
+ self.object_head("PyUnicode_Type")
+ self.write(f".length = {len(s)},")
+ self.write(".hash = -1,")
+ with self.block(".state =", ","):
+ self.write(".kind = 1,")
+ self.write(".compact = 1,")
+ self.write(".ascii = 1,")
+ self.write(f"._data = {make_string_literal(s.encode('ascii'))},")
+ return f"& {name}._ascii.ob_base"
+ else:
+ with self.block("._compact =", ","):
+ with self.block("._base =", ","):
+ self.object_head("PyUnicode_Type")
+ self.write(f".length = {len(s)},")
+ self.write(".hash = -1,")
+ with self.block(".state =", ","):
+ self.write(f".kind = {kind},")
+ self.write(".compact = 1,")
+ self.write(".ascii = 0,")
+ utf8 = s.encode('utf-8')
+ self.write(f'.utf8 = {make_string_literal(utf8)},')
+ self.write(f'.utf8_length = {len(utf8)},')
+ with self.block(f"._data =", ","):
+ for i in range(0, len(s), 16):
+ data = s[i:i+16]
+ self.write(", ".join(map(str, map(ord, data))) + ",")
+ return f"& {name}._compact._base.ob_base"
+
+
+ def generate_code(self, name: str, code: types.CodeType) -> str:
+ # The ordering here matches PyCode_NewWithPosOnlyArgs()
+ # (but see below).
+ co_consts = self.generate(name + "_consts", code.co_consts)
+ co_names = self.generate(name + "_names", code.co_names)
+ co_filename = self.generate(name + "_filename", code.co_filename)
+ co_name = self.generate(name + "_name", code.co_name)
+ co_qualname = self.generate(name + "_qualname", code.co_qualname)
+ co_linetable = self.generate(name + "_linetable", code.co_linetable)
+ co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable)
+ # These fields are not directly accessible
+ localsplusnames, localspluskinds = get_localsplus(code)
+ co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames)
+ co_localspluskinds = self.generate(name + "_localspluskinds", localspluskinds)
+ # Derived values
+ nlocals, nplaincellvars, ncellvars, nfreevars = \
+ get_localsplus_counts(code, localsplusnames, localspluskinds)
+ co_code_adaptive = make_string_literal(code.co_code)
+ self.write("static")
+ with self.indent():
+ self.write(f"struct _PyCode_DEF({len(code.co_code)})")
+ with self.block(f"{name} =", ";"):
+ self.object_var_head("PyCode_Type", len(code.co_code) // 2)
+ # But the ordering here must match that in cpython/code.h
+ # (which is a pain because we tend to reorder those for perf)
+ # otherwise MSVC doesn't like it.
+ self.write(f".co_consts = {co_consts},")
+ self.write(f".co_names = {co_names},")
+ self.write(f".co_exceptiontable = {co_exceptiontable},")
+ self.field(code, "co_flags")
+ self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,")
+ self.write("._co_linearray_entry_size = 0,")
+ self.field(code, "co_argcount")
+ self.field(code, "co_posonlyargcount")
+ self.field(code, "co_kwonlyargcount")
+ self.write(f".co_framesize = {code.co_stacksize + len(localsplusnames)} + FRAME_SPECIALS_SIZE,")
+ self.field(code, "co_stacksize")
+ self.field(code, "co_firstlineno")
+ self.write(f".co_nlocalsplus = {len(localsplusnames)},")
+ self.field(code, "co_nlocals")
+ self.write(f".co_nplaincellvars = {nplaincellvars},")
+ self.write(f".co_ncellvars = {ncellvars},")
+ self.write(f".co_nfreevars = {nfreevars},")
+ self.write(f".co_localsplusnames = {co_localsplusnames},")
+ self.write(f".co_localspluskinds = {co_localspluskinds},")
+ self.write(f".co_filename = {co_filename},")
+ self.write(f".co_name = {co_name},")
+ self.write(f".co_qualname = {co_qualname},")
+ self.write(f".co_linetable = {co_linetable},")
+ self.write(f"._co_cached = NULL,")
+ self.write("._co_linearray = NULL,")
+ self.write(f".co_code_adaptive = {co_code_adaptive},")
+ for i, op in enumerate(code.co_code[::2]):
+ if op == RESUME:
+ self.write(f"._co_firsttraceable = {i},")
+ break
+ name_as_code = f"(PyCodeObject *)&{name}"
+ self.deallocs.append(f"_PyStaticCode_Dealloc({name_as_code});")
+ self.interns.append(f"_PyStaticCode_InternStrings({name_as_code})")
+ return f"& {name}.ob_base.ob_base"
+
+ def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str:
+ if len(t) == 0:
+ return f"(PyObject *)& _Py_SINGLETON(tuple_empty)"
+ items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)]
+ self.write("static")
+ with self.indent():
+ with self.block("struct"):
+ self.write("PyGC_Head _gc_head;")
+ with self.block("struct", "_object;"):
+ self.write("PyObject_VAR_HEAD")
+ if t:
+ self.write(f"PyObject *ob_item[{len(t)}];")
+ with self.block(f"{name} =", ";"):
+ with self.block("._object =", ","):
+ self.object_var_head("PyTuple_Type", len(t))
+ if items:
+ with self.block(f".ob_item =", ","):
+ for item in items:
+ self.write(item + ",")
+ return f"& {name}._object.ob_base.ob_base"
+
+ def _generate_int_for_bits(self, name: str, i: int, digit: int) -> None:
+ sign = -1 if i < 0 else 0 if i == 0 else +1
+ i = abs(i)
+ digits: list[int] = []
+ while i:
+ i, rem = divmod(i, digit)
+ digits.append(rem)
+ self.write("static")
+ with self.indent():
+ with self.block("struct"):
+ self.write("PyObject_VAR_HEAD")
+ self.write(f"digit ob_digit[{max(1, len(digits))}];")
+ with self.block(f"{name} =", ";"):
+ self.object_var_head("PyLong_Type", sign*len(digits))
+ if digits:
+ ds = ", ".join(map(str, digits))
+ self.write(f".ob_digit = {{ {ds} }},")
+
+ def generate_int(self, name: str, i: int) -> str:
+ if -5 <= i <= 256:
+ return f"(PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + {i}]"
+ if i >= 0:
+ name = f"const_int_{i}"
+ else:
+ name = f"const_int_negative_{abs(i)}"
+ if abs(i) < 2**15:
+ self._generate_int_for_bits(name, i, 2**15)
+ else:
+ connective = "if"
+ for bits_in_digit in 15, 30:
+ self.write(f"#{connective} PYLONG_BITS_IN_DIGIT == {bits_in_digit}")
+ self._generate_int_for_bits(name, i, 2**bits_in_digit)
+ connective = "elif"
+ self.write("#else")
+ self.write('#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"')
+ self.write("#endif")
+ # If neither clause applies, it won't compile
+ return f"& {name}.ob_base.ob_base"
+
+ def generate_float(self, name: str, x: float) -> str:
+ with self.block(f"static PyFloatObject {name} =", ";"):
+ self.object_head("PyFloat_Type")
+ self.write(f".ob_fval = {x},")
+ return f"&{name}.ob_base"
+
+ def generate_complex(self, name: str, z: complex) -> str:
+ with self.block(f"static PyComplexObject {name} =", ";"):
+ self.object_head("PyComplex_Type")
+ self.write(f".cval = {{ {z.real}, {z.imag} }},")
+ return f"&{name}.ob_base"
+
+ def generate_frozenset(self, name: str, fs: FrozenSet[object]) -> str:
+ try:
+ fs = sorted(fs)
+ except TypeError:
+ # frozen set with incompatible types, fallback to repr()
+ fs = sorted(fs, key=repr)
+ ret = self.generate_tuple(name, tuple(fs))
+ self.write("// TODO: The above tuple should be a frozenset")
+ return ret
+
+ def generate_file(self, module: str, code: object)-> None:
+ module = module.replace(".", "_")
+ self.generate(f"{module}_toplevel", code)
+ with self.block(f"static void {module}_do_patchups(void)"):
+ for p in self.patchups:
+ self.write(p)
+ self.patchups.clear()
+ self.write(EPILOGUE.replace("%%NAME%%", module))
+
+ def generate(self, name: str, obj: object) -> str:
+ # Use repr() in the key to distinguish -0.0 from +0.0
+ key = (type(obj), obj, repr(obj))
+ if key in self.cache:
+ self.hits += 1
+ # print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}")
+ return self.cache[key]
+ self.misses += 1
+ if isinstance(obj, (types.CodeType, umarshal.Code)) :
+ val = self.generate_code(name, obj)
+ elif isinstance(obj, tuple):
+ val = self.generate_tuple(name, obj)
+ elif isinstance(obj, str):
+ val = self.generate_unicode(name, obj)
+ elif isinstance(obj, bytes):
+ val = self.generate_bytes(name, obj)
+ elif obj is True:
+ return "Py_True"
+ elif obj is False:
+ return "Py_False"
+ elif isinstance(obj, int):
+ val = self.generate_int(name, obj)
+ elif isinstance(obj, float):
+ val = self.generate_float(name, obj)
+ elif isinstance(obj, complex):
+ val = self.generate_complex(name, obj)
+ elif isinstance(obj, frozenset):
+ val = self.generate_frozenset(name, obj)
+ elif obj is builtins.Ellipsis:
+ return "Py_Ellipsis"
+ elif obj is None:
+ return "Py_None"
+ else:
+ raise TypeError(
+ f"Cannot generate code for {type(obj).__name__} object")
+ # print(f"Cache store {key!r:.40}: {val!r:.40}")
+ self.cache[key] = val
+ return val
+
+
+EPILOGUE = """
+PyObject *
+_Py_get_%%NAME%%_toplevel(void)
+{
+ %%NAME%%_do_patchups();
+ return Py_NewRef((PyObject *) &%%NAME%%_toplevel);
+}
+"""
+
+FROZEN_COMMENT_C = "/* Auto-generated by Programs/_freeze_module.c */"
+FROZEN_COMMENT_PY = "/* Auto-generated by Programs/_freeze_module.py */"
+
+FROZEN_DATA_LINE = r"\s*(\d+,\s*)+\s*"
+
+
+def is_frozen_header(source: str) -> bool:
+ return source.startswith((FROZEN_COMMENT_C, FROZEN_COMMENT_PY))
+
+
+def decode_frozen_data(source: str) -> types.CodeType:
+ lines = source.splitlines()
+ while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None:
+ del lines[0]
+ while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None:
+ del lines[-1]
+ values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip())
+ data = bytes(values)
+ return umarshal.loads(data)
+
+
+def generate(args: list[str], output: TextIO) -> None:
+ printer = Printer(output)
+ for arg in args:
+ file, modname = arg.rsplit(':', 1)
+ with open(file, "r", encoding="utf8") as fd:
+ source = fd.read()
+ if is_frozen_header(source):
+ code = decode_frozen_data(source)
+ else:
+ code = compile(fd.read(), f"<frozen {modname}>", "exec")
+ printer.generate_file(modname, code)
+ with printer.block(f"void\n_Py_Deepfreeze_Fini(void)"):
+ for p in printer.deallocs:
+ printer.write(p)
+ with printer.block(f"int\n_Py_Deepfreeze_Init(void)"):
+ for p in printer.interns:
+ with printer.block(f"if ({p} < 0)"):
+ printer.write("return -1;")
+ printer.write("return 0;")
+ if verbose:
+ print(f"Cache hits: {printer.hits}, misses: {printer.misses}")
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("-o", "--output", help="Defaults to deepfreeze.c", default="deepfreeze.c")
+parser.add_argument("-v", "--verbose", action="store_true", help="Print diagnostics")
+parser.add_argument('args', nargs="+", help="Input file and module name (required) in file:modname format")
+
+@contextlib.contextmanager
+def report_time(label: str):
+ t0 = time.time()
+ try:
+ yield
+ finally:
+ t1 = time.time()
+ if verbose:
+ print(f"{label}: {t1-t0:.3f} sec")
+
+
+def main() -> None:
+ global verbose
+ args = parser.parse_args()
+ verbose = args.verbose
+ output = args.output
+ with open(output, "w", encoding="utf-8") as file:
+ with report_time("generate"):
+ generate(args.args, file)
+ if verbose:
+ print(f"Wrote {os.path.getsize(output)} bytes to {output}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/Tools/build/freeze_modules.py b/Tools/build/freeze_modules.py
new file mode 100644
index 0000000..810224b
--- /dev/null
+++ b/Tools/build/freeze_modules.py
@@ -0,0 +1,733 @@
+"""Freeze modules and regen related files (e.g. Python/frozen.c).
+
+See the notes at the top of Python/frozen.c for more info.
+"""
+
+from collections import namedtuple
+import hashlib
+import os
+import ntpath
+import posixpath
+import argparse
+from update_file import updating_file_with_tmpfile
+
+
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+ROOT_DIR = os.path.abspath(ROOT_DIR)
+FROZEN_ONLY = os.path.join(ROOT_DIR, 'Tools', 'freeze', 'flag.py')
+
+STDLIB_DIR = os.path.join(ROOT_DIR, 'Lib')
+# If FROZEN_MODULES_DIR or DEEPFROZEN_MODULES_DIR is changed then the
+# .gitattributes and .gitignore files needs to be updated.
+FROZEN_MODULES_DIR = os.path.join(ROOT_DIR, 'Python', 'frozen_modules')
+DEEPFROZEN_MODULES_DIR = os.path.join(ROOT_DIR, 'Python', 'deepfreeze')
+
+FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c')
+MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in')
+PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj')
+PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filters')
+PCBUILD_PYTHONCORE = os.path.join(ROOT_DIR, 'PCbuild', 'pythoncore.vcxproj')
+
+
+OS_PATH = 'ntpath' if os.name == 'nt' else 'posixpath'
+
+# These are modules that get frozen.
+TESTS_SECTION = 'Test module'
+FROZEN = [
+ # See parse_frozen_spec() for the format.
+ # In cases where the frozenid is duplicated, the first one is re-used.
+ ('import system', [
+ # These frozen modules are necessary for bootstrapping
+ # the import system.
+ 'importlib._bootstrap : _frozen_importlib',
+ 'importlib._bootstrap_external : _frozen_importlib_external',
+ # This module is important because some Python builds rely
+ # on a builtin zip file instead of a filesystem.
+ 'zipimport',
+ ]),
+ ('stdlib - startup, without site (python -S)', [
+ 'abc',
+ 'codecs',
+ # For now we do not freeze the encodings, due # to the noise all
+ # those extra modules add to the text printed during the build.
+ # (See https://github.com/python/cpython/pull/28398#pullrequestreview-756856469.)
+ #'<encodings.*>',
+ 'io',
+ ]),
+ ('stdlib - startup, with site', [
+ '_collections_abc',
+ '_sitebuiltins',
+ 'genericpath',
+ 'ntpath',
+ 'posixpath',
+ # We must explicitly mark os.path as a frozen module
+ # even though it will never be imported.
+ f'{OS_PATH} : os.path',
+ 'os',
+ 'site',
+ 'stat',
+ ]),
+ ('runpy - run module with -m', [
+ "importlib.util",
+ "importlib.machinery",
+ "runpy",
+ ]),
+ (TESTS_SECTION, [
+ '__hello__',
+ '__hello__ : __hello_alias__',
+ '__hello__ : <__phello_alias__>',
+ '__hello__ : __phello_alias__.spam',
+ '<__phello__.**.*>',
+ f'frozen_only : __hello_only__ = {FROZEN_ONLY}',
+ ]),
+]
+BOOTSTRAP = {
+ 'importlib._bootstrap',
+ 'importlib._bootstrap_external',
+ 'zipimport',
+}
+
+
+#######################################
+# platform-specific helpers
+
+if os.path is posixpath:
+ relpath_for_posix_display = os.path.relpath
+
+ def relpath_for_windows_display(path, base):
+ return ntpath.relpath(
+ ntpath.join(*path.split(os.path.sep)),
+ ntpath.join(*base.split(os.path.sep)),
+ )
+
+else:
+ relpath_for_windows_display = ntpath.relpath
+
+ def relpath_for_posix_display(path, base):
+ return posixpath.relpath(
+ posixpath.join(*path.split(os.path.sep)),
+ posixpath.join(*base.split(os.path.sep)),
+ )
+
+
+#######################################
+# specs
+
+def parse_frozen_specs():
+ seen = {}
+ for section, specs in FROZEN:
+ parsed = _parse_specs(specs, section, seen)
+ for item in parsed:
+ frozenid, pyfile, modname, ispkg, section = item
+ try:
+ source = seen[frozenid]
+ except KeyError:
+ source = FrozenSource.from_id(frozenid, pyfile)
+ seen[frozenid] = source
+ else:
+ assert not pyfile or pyfile == source.pyfile, item
+ yield FrozenModule(modname, ispkg, section, source)
+
+
+def _parse_specs(specs, section, seen):
+ for spec in specs:
+ info, subs = _parse_spec(spec, seen, section)
+ yield info
+ for info in subs or ():
+ yield info
+
+
+def _parse_spec(spec, knownids=None, section=None):
+ """Yield an info tuple for each module corresponding to the given spec.
+
+ The info consists of: (frozenid, pyfile, modname, ispkg, section).
+
+ Supported formats:
+
+ frozenid
+ frozenid : modname
+ frozenid : modname = pyfile
+
+ "frozenid" and "modname" must be valid module names (dot-separated
+ identifiers). If "modname" is not provided then "frozenid" is used.
+ If "pyfile" is not provided then the filename of the module
+ corresponding to "frozenid" is used.
+
+ Angle brackets around a frozenid (e.g. '<encodings>") indicate
+ it is a package. This also means it must be an actual module
+ (i.e. "pyfile" cannot have been provided). Such values can have
+ patterns to expand submodules:
+
+ <encodings.*> - also freeze all direct submodules
+ <encodings.**.*> - also freeze the full submodule tree
+
+ As with "frozenid", angle brackets around "modname" indicate
+ it is a package. However, in this case "pyfile" should not
+ have been provided and patterns in "modname" are not supported.
+ Also, if "modname" has brackets then "frozenid" should not,
+ and "pyfile" should have been provided..
+ """
+ frozenid, _, remainder = spec.partition(':')
+ modname, _, pyfile = remainder.partition('=')
+ frozenid = frozenid.strip()
+ modname = modname.strip()
+ pyfile = pyfile.strip()
+
+ submodules = None
+ if modname.startswith('<') and modname.endswith('>'):
+ assert check_modname(frozenid), spec
+ modname = modname[1:-1]
+ assert check_modname(modname), spec
+ if frozenid in knownids:
+ pass
+ elif pyfile:
+ assert not os.path.isdir(pyfile), spec
+ else:
+ pyfile = _resolve_module(frozenid, ispkg=False)
+ ispkg = True
+ elif pyfile:
+ assert check_modname(frozenid), spec
+ assert not knownids or frozenid not in knownids, spec
+ assert check_modname(modname), spec
+ assert not os.path.isdir(pyfile), spec
+ ispkg = False
+ elif knownids and frozenid in knownids:
+ assert check_modname(frozenid), spec
+ assert check_modname(modname), spec
+ ispkg = False
+ else:
+ assert not modname or check_modname(modname), spec
+ resolved = iter(resolve_modules(frozenid))
+ frozenid, pyfile, ispkg = next(resolved)
+ if not modname:
+ modname = frozenid
+ if ispkg:
+ pkgid = frozenid
+ pkgname = modname
+ pkgfiles = {pyfile: pkgid}
+ def iter_subs():
+ for frozenid, pyfile, ispkg in resolved:
+ if pkgname:
+ modname = frozenid.replace(pkgid, pkgname, 1)
+ else:
+ modname = frozenid
+ if pyfile:
+ if pyfile in pkgfiles:
+ frozenid = pkgfiles[pyfile]
+ pyfile = None
+ elif ispkg:
+ pkgfiles[pyfile] = frozenid
+ yield frozenid, pyfile, modname, ispkg, section
+ submodules = iter_subs()
+
+ info = (frozenid, pyfile or None, modname, ispkg, section)
+ return info, submodules
+
+
+#######################################
+# frozen source files
+
+class FrozenSource(namedtuple('FrozenSource', 'id pyfile frozenfile deepfreezefile')):
+
+ @classmethod
+ def from_id(cls, frozenid, pyfile=None):
+ if not pyfile:
+ pyfile = os.path.join(STDLIB_DIR, *frozenid.split('.')) + '.py'
+ #assert os.path.exists(pyfile), (frozenid, pyfile)
+ frozenfile = resolve_frozen_file(frozenid, FROZEN_MODULES_DIR)
+ deepfreezefile = resolve_frozen_file(frozenid, DEEPFROZEN_MODULES_DIR)
+ return cls(frozenid, pyfile, frozenfile, deepfreezefile)
+
+ @property
+ def frozenid(self):
+ return self.id
+
+ @property
+ def modname(self):
+ if self.pyfile.startswith(STDLIB_DIR):
+ return self.id
+ return None
+
+ @property
+ def symbol(self):
+ # This matches what we do in Programs/_freeze_module.c:
+ name = self.frozenid.replace('.', '_')
+ return '_Py_M__' + name
+
+ @property
+ def ispkg(self):
+ if not self.pyfile:
+ return False
+ elif self.frozenid.endswith('.__init__'):
+ return False
+ else:
+ return os.path.basename(self.pyfile) == '__init__.py'
+
+ @property
+ def isbootstrap(self):
+ return self.id in BOOTSTRAP
+
+
+def resolve_frozen_file(frozenid, destdir):
+ """Return the filename corresponding to the given frozen ID.
+
+ For stdlib modules the ID will always be the full name
+ of the source module.
+ """
+ if not isinstance(frozenid, str):
+ try:
+ frozenid = frozenid.frozenid
+ except AttributeError:
+ raise ValueError(f'unsupported frozenid {frozenid!r}')
+ # We use a consistent naming convention for all frozen modules.
+ frozenfile = f'{frozenid}.h'
+ if not destdir:
+ return frozenfile
+ return os.path.join(destdir, frozenfile)
+
+
+#######################################
+# frozen modules
+
+class FrozenModule(namedtuple('FrozenModule', 'name ispkg section source')):
+
+ def __getattr__(self, name):
+ return getattr(self.source, name)
+
+ @property
+ def modname(self):
+ return self.name
+
+ @property
+ def orig(self):
+ return self.source.modname
+
+ @property
+ def isalias(self):
+ orig = self.source.modname
+ if not orig:
+ return True
+ return self.name != orig
+
+ def summarize(self):
+ source = self.source.modname
+ if source:
+ source = f'<{source}>'
+ else:
+ source = relpath_for_posix_display(self.pyfile, ROOT_DIR)
+ return {
+ 'module': self.name,
+ 'ispkg': self.ispkg,
+ 'source': source,
+ 'frozen': os.path.basename(self.frozenfile),
+ 'checksum': _get_checksum(self.frozenfile),
+ }
+
+
+def _iter_sources(modules):
+ seen = set()
+ for mod in modules:
+ if mod.source not in seen:
+ yield mod.source
+ seen.add(mod.source)
+
+
+#######################################
+# generic helpers
+
+def _get_checksum(filename):
+ with open(filename, "rb") as infile:
+ contents = infile.read()
+ m = hashlib.sha256()
+ m.update(contents)
+ return m.hexdigest()
+
+
+def resolve_modules(modname, pyfile=None):
+ if modname.startswith('<') and modname.endswith('>'):
+ if pyfile:
+ assert os.path.isdir(pyfile) or os.path.basename(pyfile) == '__init__.py', pyfile
+ ispkg = True
+ modname = modname[1:-1]
+ rawname = modname
+ # For now, we only expect match patterns at the end of the name.
+ _modname, sep, match = modname.rpartition('.')
+ if sep:
+ if _modname.endswith('.**'):
+ modname = _modname[:-3]
+ match = f'**.{match}'
+ elif match and not match.isidentifier():
+ modname = _modname
+ # Otherwise it's a plain name so we leave it alone.
+ else:
+ match = None
+ else:
+ ispkg = False
+ rawname = modname
+ match = None
+
+ if not check_modname(modname):
+ raise ValueError(f'not a valid module name ({rawname})')
+
+ if not pyfile:
+ pyfile = _resolve_module(modname, ispkg=ispkg)
+ elif os.path.isdir(pyfile):
+ pyfile = _resolve_module(modname, pyfile, ispkg)
+ yield modname, pyfile, ispkg
+
+ if match:
+ pkgdir = os.path.dirname(pyfile)
+ yield from iter_submodules(modname, pkgdir, match)
+
+
+def check_modname(modname):
+ return all(n.isidentifier() for n in modname.split('.'))
+
+
+def iter_submodules(pkgname, pkgdir=None, match='*'):
+ if not pkgdir:
+ pkgdir = os.path.join(STDLIB_DIR, *pkgname.split('.'))
+ if not match:
+ match = '**.*'
+ match_modname = _resolve_modname_matcher(match, pkgdir)
+
+ def _iter_submodules(pkgname, pkgdir):
+ for entry in sorted(os.scandir(pkgdir), key=lambda e: e.name):
+ matched, recursive = match_modname(entry.name)
+ if not matched:
+ continue
+ modname = f'{pkgname}.{entry.name}'
+ if modname.endswith('.py'):
+ yield modname[:-3], entry.path, False
+ elif entry.is_dir():
+ pyfile = os.path.join(entry.path, '__init__.py')
+ # We ignore namespace packages.
+ if os.path.exists(pyfile):
+ yield modname, pyfile, True
+ if recursive:
+ yield from _iter_submodules(modname, entry.path)
+
+ return _iter_submodules(pkgname, pkgdir)
+
+
+def _resolve_modname_matcher(match, rootdir=None):
+ if isinstance(match, str):
+ if match.startswith('**.'):
+ recursive = True
+ pat = match[3:]
+ assert match
+ else:
+ recursive = False
+ pat = match
+
+ if pat == '*':
+ def match_modname(modname):
+ return True, recursive
+ else:
+ raise NotImplementedError(match)
+ elif callable(match):
+ match_modname = match(rootdir)
+ else:
+ raise ValueError(f'unsupported matcher {match!r}')
+ return match_modname
+
+
+def _resolve_module(modname, pathentry=STDLIB_DIR, ispkg=False):
+ assert pathentry, pathentry
+ pathentry = os.path.normpath(pathentry)
+ assert os.path.isabs(pathentry)
+ if ispkg:
+ return os.path.join(pathentry, *modname.split('.'), '__init__.py')
+ return os.path.join(pathentry, *modname.split('.')) + '.py'
+
+
+#######################################
+# regenerating dependent files
+
+def find_marker(lines, marker, file):
+ for pos, line in enumerate(lines):
+ if marker in line:
+ return pos
+ raise Exception(f"Can't find {marker!r} in file {file}")
+
+
+def replace_block(lines, start_marker, end_marker, replacements, file):
+ start_pos = find_marker(lines, start_marker, file)
+ end_pos = find_marker(lines, end_marker, file)
+ if end_pos <= start_pos:
+ raise Exception(f"End marker {end_marker!r} "
+ f"occurs before start marker {start_marker!r} "
+ f"in file {file}")
+ replacements = [line.rstrip() + '\n' for line in replacements]
+ return lines[:start_pos + 1] + replacements + lines[end_pos:]
+
+
+def regen_frozen(modules, frozen_modules: bool):
+ headerlines = []
+ parentdir = os.path.dirname(FROZEN_FILE)
+ if frozen_modules:
+ for src in _iter_sources(modules):
+ # Adding a comment to separate sections here doesn't add much,
+ # so we don't.
+ header = relpath_for_posix_display(src.frozenfile, parentdir)
+ headerlines.append(f'#include "{header}"')
+
+ externlines = []
+ bootstraplines = []
+ stdliblines = []
+ testlines = []
+ aliaslines = []
+ indent = ' '
+ lastsection = None
+ for mod in modules:
+ if mod.isbootstrap:
+ lines = bootstraplines
+ elif mod.section == TESTS_SECTION:
+ lines = testlines
+ else:
+ lines = stdliblines
+ if mod.section != lastsection:
+ if lastsection is not None:
+ lines.append('')
+ lines.append(f'/* {mod.section} */')
+ lastsection = mod.section
+
+ # Also add a extern declaration for the corresponding
+ # deepfreeze-generated function.
+ orig_name = mod.source.id
+ code_name = orig_name.replace(".", "_")
+ get_code_name = "_Py_get_%s_toplevel" % code_name
+ externlines.append("extern PyObject *%s(void);" % get_code_name)
+
+ symbol = mod.symbol
+ pkg = 'true' if mod.ispkg else 'false'
+ if not frozen_modules:
+ line = ('{"%s", NULL, 0, %s, GET_CODE(%s)},'
+ ) % (mod.name, pkg, code_name)
+ else:
+ line = ('{"%s", %s, (int)sizeof(%s), %s, GET_CODE(%s)},'
+ ) % (mod.name, symbol, symbol, pkg, code_name)
+ lines.append(line)
+
+ if mod.isalias:
+ if not mod.orig:
+ entry = '{"%s", NULL},' % (mod.name,)
+ elif mod.source.ispkg:
+ entry = '{"%s", "<%s"},' % (mod.name, mod.orig)
+ else:
+ entry = '{"%s", "%s"},' % (mod.name, mod.orig)
+ aliaslines.append(indent + entry)
+
+ for lines in (bootstraplines, stdliblines, testlines):
+ # TODO: Is this necessary any more?
+ if not lines[0]:
+ del lines[0]
+ for i, line in enumerate(lines):
+ if line:
+ lines[i] = indent + line
+
+ print(f'# Updating {os.path.relpath(FROZEN_FILE)}')
+ with updating_file_with_tmpfile(FROZEN_FILE) as (infile, outfile):
+ lines = infile.readlines()
+ # TODO: Use more obvious markers, e.g.
+ # $START GENERATED FOOBAR$ / $END GENERATED FOOBAR$
+ lines = replace_block(
+ lines,
+ "/* Includes for frozen modules: */",
+ "/* End includes */",
+ headerlines,
+ FROZEN_FILE,
+ )
+ lines = replace_block(
+ lines,
+ "/* Start extern declarations */",
+ "/* End extern declarations */",
+ externlines,
+ FROZEN_FILE,
+ )
+ lines = replace_block(
+ lines,
+ "static const struct _frozen bootstrap_modules[] =",
+ "/* bootstrap sentinel */",
+ bootstraplines,
+ FROZEN_FILE,
+ )
+ lines = replace_block(
+ lines,
+ "static const struct _frozen stdlib_modules[] =",
+ "/* stdlib sentinel */",
+ stdliblines,
+ FROZEN_FILE,
+ )
+ lines = replace_block(
+ lines,
+ "static const struct _frozen test_modules[] =",
+ "/* test sentinel */",
+ testlines,
+ FROZEN_FILE,
+ )
+ lines = replace_block(
+ lines,
+ "const struct _module_alias aliases[] =",
+ "/* aliases sentinel */",
+ aliaslines,
+ FROZEN_FILE,
+ )
+ outfile.writelines(lines)
+
+
+def regen_makefile(modules):
+ pyfiles = []
+ frozenfiles = []
+ rules = ['']
+ deepfreezerules = ["Python/deepfreeze/deepfreeze.c: $(DEEPFREEZE_DEPS)",
+ "\t$(PYTHON_FOR_FREEZE) $(srcdir)/Tools/build/deepfreeze.py \\"]
+ for src in _iter_sources(modules):
+ frozen_header = relpath_for_posix_display(src.frozenfile, ROOT_DIR)
+ frozenfiles.append(f'\t\t{frozen_header} \\')
+
+ pyfile = relpath_for_posix_display(src.pyfile, ROOT_DIR)
+ pyfiles.append(f'\t\t{pyfile} \\')
+
+ if src.isbootstrap:
+ freezecmd = '$(FREEZE_MODULE_BOOTSTRAP)'
+ freezedep = '$(FREEZE_MODULE_BOOTSTRAP_DEPS)'
+ else:
+ freezecmd = '$(FREEZE_MODULE)'
+ freezedep = '$(FREEZE_MODULE_DEPS)'
+
+ freeze = (f'{freezecmd} {src.frozenid} '
+ f'$(srcdir)/{pyfile} {frozen_header}')
+ rules.extend([
+ f'{frozen_header}: {pyfile} {freezedep}',
+ f'\t{freeze}',
+ '',
+ ])
+ deepfreezerules.append(f"\t{frozen_header}:{src.frozenid} \\")
+ deepfreezerules.append('\t-o Python/deepfreeze/deepfreeze.c')
+ pyfiles[-1] = pyfiles[-1].rstrip(" \\")
+ frozenfiles[-1] = frozenfiles[-1].rstrip(" \\")
+
+ print(f'# Updating {os.path.relpath(MAKEFILE)}')
+ with updating_file_with_tmpfile(MAKEFILE) as (infile, outfile):
+ lines = infile.readlines()
+ lines = replace_block(
+ lines,
+ "FROZEN_FILES_IN =",
+ "# End FROZEN_FILES_IN",
+ pyfiles,
+ MAKEFILE,
+ )
+ lines = replace_block(
+ lines,
+ "FROZEN_FILES_OUT =",
+ "# End FROZEN_FILES_OUT",
+ frozenfiles,
+ MAKEFILE,
+ )
+ lines = replace_block(
+ lines,
+ "# BEGIN: freezing modules",
+ "# END: freezing modules",
+ rules,
+ MAKEFILE,
+ )
+ lines = replace_block(
+ lines,
+ "# BEGIN: deepfreeze modules",
+ "# END: deepfreeze modules",
+ deepfreezerules,
+ MAKEFILE,
+ )
+ outfile.writelines(lines)
+
+
+def regen_pcbuild(modules):
+ projlines = []
+ filterlines = []
+ corelines = []
+ deepfreezerules = ['\t<Exec Command=\'$(PythonForBuild) "$(PySourcePath)Tools\\build\\deepfreeze.py" ^']
+ for src in _iter_sources(modules):
+ pyfile = relpath_for_windows_display(src.pyfile, ROOT_DIR)
+ header = relpath_for_windows_display(src.frozenfile, ROOT_DIR)
+ intfile = ntpath.splitext(ntpath.basename(header))[0] + '.g.h'
+ projlines.append(f' <None Include="..\\{pyfile}">')
+ projlines.append(f' <ModName>{src.frozenid}</ModName>')
+ projlines.append(f' <IntFile>$(IntDir){intfile}</IntFile>')
+ projlines.append(f' <OutFile>$(PySourcePath){header}</OutFile>')
+ projlines.append(f' </None>')
+
+ filterlines.append(f' <None Include="..\\{pyfile}">')
+ filterlines.append(' <Filter>Python Files</Filter>')
+ filterlines.append(' </None>')
+ deepfreezerules.append(f'\t\t "$(PySourcePath){header}:{src.frozenid}" ^')
+ deepfreezerules.append('\t\t "-o" "$(PySourcePath)Python\\deepfreeze\\deepfreeze.c"\'/>' )
+
+ corelines.append(f' <ClCompile Include="..\\Python\\deepfreeze\\deepfreeze.c" />')
+
+ print(f'# Updating {os.path.relpath(PCBUILD_PROJECT)}')
+ with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile):
+ lines = infile.readlines()
+ lines = replace_block(
+ lines,
+ '<!-- BEGIN frozen modules -->',
+ '<!-- END frozen modules -->',
+ projlines,
+ PCBUILD_PROJECT,
+ )
+ outfile.writelines(lines)
+ with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile):
+ lines = infile.readlines()
+ lines = replace_block(
+ lines,
+ '<!-- BEGIN deepfreeze rule -->',
+ '<!-- END deepfreeze rule -->',
+ deepfreezerules,
+ PCBUILD_PROJECT,
+ )
+ outfile.writelines(lines)
+ print(f'# Updating {os.path.relpath(PCBUILD_FILTERS)}')
+ with updating_file_with_tmpfile(PCBUILD_FILTERS) as (infile, outfile):
+ lines = infile.readlines()
+ lines = replace_block(
+ lines,
+ '<!-- BEGIN frozen modules -->',
+ '<!-- END frozen modules -->',
+ filterlines,
+ PCBUILD_FILTERS,
+ )
+ outfile.writelines(lines)
+ print(f'# Updating {os.path.relpath(PCBUILD_PYTHONCORE)}')
+ with updating_file_with_tmpfile(PCBUILD_PYTHONCORE) as (infile, outfile):
+ lines = infile.readlines()
+ lines = replace_block(
+ lines,
+ '<!-- BEGIN deepfreeze -->',
+ '<!-- END deepfreeze -->',
+ corelines,
+ PCBUILD_FILTERS,
+ )
+ outfile.writelines(lines)
+
+
+#######################################
+# the script
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--frozen-modules", action="store_true",
+ help="Use both frozen and deepfrozen modules. (default: uses only deepfrozen modules)")
+
+def main():
+ args = parser.parse_args()
+ frozen_modules: bool = args.frozen_modules
+ # Expand the raw specs, preserving order.
+ modules = list(parse_frozen_specs())
+
+ # Regen build-related files.
+ regen_makefile(modules)
+ regen_pcbuild(modules)
+ regen_frozen(modules, frozen_modules)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/Tools/build/generate_global_objects.py b/Tools/build/generate_global_objects.py
new file mode 100644
index 0000000..dd67cfe
--- /dev/null
+++ b/Tools/build/generate_global_objects.py
@@ -0,0 +1,382 @@
+import contextlib
+import io
+import os.path
+import re
+
+SCRIPT_NAME = 'Tools/build/generate_global_objects.py'
+__file__ = os.path.abspath(__file__)
+ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+INTERNAL = os.path.join(ROOT, 'Include', 'internal')
+
+
+IGNORED = {
+ 'ACTION', # Python/_warnings.c
+ 'ATTR', # Python/_warnings.c and Objects/funcobject.c
+ 'DUNDER', # Objects/typeobject.c
+ 'RDUNDER', # Objects/typeobject.c
+ 'SPECIAL', # Objects/weakrefobject.c
+ 'NAME', # Objects/typeobject.c
+}
+IDENTIFIERS = [
+ # from ADD() Python/_warnings.c
+ 'default',
+ 'ignore',
+
+ # from GET_WARNINGS_ATTR() in Python/_warnings.c
+ 'WarningMessage',
+ '_showwarnmsg',
+ '_warn_unawaited_coroutine',
+ 'defaultaction',
+ 'filters',
+ 'onceregistry',
+
+ # from WRAP_METHOD() in Objects/weakrefobject.c
+ '__bytes__',
+ '__reversed__',
+
+ # from COPY_ATTR() in Objects/funcobject.c
+ '__module__',
+ '__name__',
+ '__qualname__',
+ '__doc__',
+ '__annotations__',
+
+ # from SLOT* in Objects/typeobject.c
+ '__abs__',
+ '__add__',
+ '__aiter__',
+ '__and__',
+ '__anext__',
+ '__await__',
+ '__bool__',
+ '__call__',
+ '__contains__',
+ '__del__',
+ '__delattr__',
+ '__delete__',
+ '__delitem__',
+ '__eq__',
+ '__float__',
+ '__floordiv__',
+ '__ge__',
+ '__get__',
+ '__getattr__',
+ '__getattribute__',
+ '__getitem__',
+ '__gt__',
+ '__hash__',
+ '__iadd__',
+ '__iand__',
+ '__ifloordiv__',
+ '__ilshift__',
+ '__imatmul__',
+ '__imod__',
+ '__imul__',
+ '__index__',
+ '__init__',
+ '__int__',
+ '__invert__',
+ '__ior__',
+ '__ipow__',
+ '__irshift__',
+ '__isub__',
+ '__iter__',
+ '__itruediv__',
+ '__ixor__',
+ '__le__',
+ '__len__',
+ '__lshift__',
+ '__lt__',
+ '__matmul__',
+ '__mod__',
+ '__mul__',
+ '__ne__',
+ '__neg__',
+ '__new__',
+ '__next__',
+ '__or__',
+ '__pos__',
+ '__pow__',
+ '__radd__',
+ '__rand__',
+ '__repr__',
+ '__rfloordiv__',
+ '__rlshift__',
+ '__rmatmul__',
+ '__rmod__',
+ '__rmul__',
+ '__ror__',
+ '__rpow__',
+ '__rrshift__',
+ '__rshift__',
+ '__rsub__',
+ '__rtruediv__',
+ '__rxor__',
+ '__set__',
+ '__setattr__',
+ '__setitem__',
+ '__str__',
+ '__sub__',
+ '__truediv__',
+ '__xor__',
+ '__divmod__',
+ '__rdivmod__',
+]
+
+
+#######################################
+# helpers
+
+def iter_files():
+ for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'):
+ root = os.path.join(ROOT, name)
+ for dirname, _, files in os.walk(root):
+ for name in files:
+ if not name.endswith(('.c', '.h')):
+ continue
+ yield os.path.join(dirname, name)
+
+
+def iter_global_strings():
+ id_regex = re.compile(r'\b_Py_ID\((\w+)\)')
+ str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')
+ for filename in iter_files():
+ try:
+ infile = open(filename, encoding='utf-8')
+ except FileNotFoundError:
+ # The file must have been a temporary file.
+ continue
+ with infile:
+ for lno, line in enumerate(infile, 1):
+ for m in id_regex.finditer(line):
+ identifier, = m.groups()
+ yield identifier, None, filename, lno, line
+ for m in str_regex.finditer(line):
+ varname, string = m.groups()
+ yield varname, string, filename, lno, line
+
+
+def iter_to_marker(lines, marker):
+ for line in lines:
+ if line.rstrip() == marker:
+ break
+ yield line
+
+
+class Printer:
+
+ def __init__(self, file):
+ self.level = 0
+ self.file = file
+ self.continuation = [False]
+
+ @contextlib.contextmanager
+ def indent(self):
+ save_level = self.level
+ try:
+ self.level += 1
+ yield
+ finally:
+ self.level = save_level
+
+ def write(self, arg):
+ eol = '\n'
+ if self.continuation[-1]:
+ eol = f' \\{eol}' if arg else f'\\{eol}'
+ self.file.writelines((" "*self.level, arg, eol))
+
+ @contextlib.contextmanager
+ def block(self, prefix, suffix="", *, continuation=None):
+ if continuation is None:
+ continuation = self.continuation[-1]
+ self.continuation.append(continuation)
+
+ self.write(prefix + " {")
+ with self.indent():
+ yield
+ self.continuation.pop()
+ self.write("}" + suffix)
+
+
+@contextlib.contextmanager
+def open_for_changes(filename, orig):
+ """Like open() but only write to the file if it changed."""
+ outfile = io.StringIO()
+ yield outfile
+ text = outfile.getvalue()
+ if text != orig:
+ with open(filename, 'w', encoding='utf-8') as outfile:
+ outfile.write(text)
+ else:
+ print(f'# not changed: {filename}')
+
+
+#######################################
+# the global objects
+
+START = f'/* The following is auto-generated by {SCRIPT_NAME}. */'
+END = '/* End auto-generated code */'
+
+
+def generate_global_strings(identifiers, strings):
+ filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
+
+ # Read the non-generated part of the file.
+ with open(filename) as infile:
+ orig = infile.read()
+ lines = iter(orig.rstrip().splitlines())
+ before = '\n'.join(iter_to_marker(lines, START))
+ for _ in iter_to_marker(lines, END):
+ pass
+ after = '\n'.join(lines)
+
+ # Generate the file.
+ with open_for_changes(filename, orig) as outfile:
+ printer = Printer(outfile)
+ printer.write(before)
+ printer.write(START)
+ with printer.block('struct _Py_global_strings', ';'):
+ with printer.block('struct', ' literals;'):
+ for literal, name in sorted(strings.items(), key=lambda x: x[1]):
+ printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
+ outfile.write('\n')
+ with printer.block('struct', ' identifiers;'):
+ for name in sorted(identifiers):
+ assert name.isidentifier(), name
+ printer.write(f'STRUCT_FOR_ID({name})')
+ with printer.block('struct', ' ascii[128];'):
+ printer.write("PyASCIIObject _ascii;")
+ printer.write("uint8_t _data[2];")
+ with printer.block('struct', ' latin1[128];'):
+ printer.write("PyCompactUnicodeObject _latin1;")
+ printer.write("uint8_t _data[2];")
+ printer.write(END)
+ printer.write(after)
+
+
+def generate_runtime_init(identifiers, strings):
+ # First get some info from the declarations.
+ nsmallposints = None
+ nsmallnegints = None
+ with open(os.path.join(INTERNAL, 'pycore_global_objects.h')) as infile:
+ for line in infile:
+ if line.startswith('#define _PY_NSMALLPOSINTS'):
+ nsmallposints = int(line.split()[-1])
+ elif line.startswith('#define _PY_NSMALLNEGINTS'):
+ nsmallnegints = int(line.split()[-1])
+ break
+ else:
+ raise NotImplementedError
+ assert nsmallposints and nsmallnegints
+
+ # Then target the runtime initializer.
+ filename = os.path.join(INTERNAL, 'pycore_runtime_init_generated.h')
+
+ # Read the non-generated part of the file.
+ with open(filename) as infile:
+ orig = infile.read()
+ lines = iter(orig.rstrip().splitlines())
+ before = '\n'.join(iter_to_marker(lines, START))
+ for _ in iter_to_marker(lines, END):
+ pass
+ after = '\n'.join(lines)
+
+ # Generate the file.
+ with open_for_changes(filename, orig) as outfile:
+ immortal_objects = []
+ printer = Printer(outfile)
+ printer.write(before)
+ printer.write(START)
+ with printer.block('#define _Py_global_objects_INIT', continuation=True):
+ with printer.block('.singletons =', ','):
+ # Global int objects.
+ with printer.block('.small_ints =', ','):
+ for i in range(-nsmallnegints, nsmallposints):
+ printer.write(f'_PyLong_DIGIT_INIT({i}),')
+ immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + {i}]')
+ printer.write('')
+ # Global bytes objects.
+ printer.write('.bytes_empty = _PyBytes_SIMPLE_INIT(0, 0),')
+ immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_empty)')
+ with printer.block('.bytes_characters =', ','):
+ for i in range(256):
+ printer.write(f'_PyBytes_CHAR_INIT({i}),')
+ immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_characters)[{i}]')
+ printer.write('')
+ # Global strings.
+ with printer.block('.strings =', ','):
+ with printer.block('.literals =', ','):
+ for literal, name in sorted(strings.items(), key=lambda x: x[1]):
+ printer.write(f'INIT_STR({name}, "{literal}"),')
+ immortal_objects.append(f'(PyObject *)&_Py_STR({name})')
+ with printer.block('.identifiers =', ','):
+ for name in sorted(identifiers):
+ assert name.isidentifier(), name
+ printer.write(f'INIT_ID({name}),')
+ immortal_objects.append(f'(PyObject *)&_Py_ID({name})')
+ with printer.block('.ascii =', ','):
+ for i in range(128):
+ printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),')
+ immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).ascii[{i}]')
+ with printer.block('.latin1 =', ','):
+ for i in range(128, 256):
+ utf8 = ['"']
+ for c in chr(i).encode('utf-8'):
+ utf8.append(f"\\x{c:02x}")
+ utf8.append('"')
+ printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}", {"".join(utf8)}),')
+ immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).latin1[{i} - 128]')
+ printer.write('')
+ with printer.block('.tuple_empty =', ','):
+ printer.write('.ob_base = _PyVarObject_IMMORTAL_INIT(&PyTuple_Type, 0)')
+ immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(tuple_empty)')
+ printer.write('')
+ printer.write("static inline void")
+ with printer.block("_PyUnicode_InitStaticStrings(void)"):
+ printer.write(f'PyObject *string;')
+ for i in sorted(identifiers):
+ # This use of _Py_ID() is ignored by iter_global_strings()
+ # since iter_files() ignores .h files.
+ printer.write(f'string = &_Py_ID({i});')
+ printer.write(f'PyUnicode_InternInPlace(&string);')
+ printer.write('')
+ printer.write('#ifdef Py_DEBUG')
+ printer.write("static inline void")
+ with printer.block("_PyStaticObjects_CheckRefcnt(void)"):
+ for i in immortal_objects:
+ with printer.block(f'if (Py_REFCNT({i}) < _PyObject_IMMORTAL_REFCNT)', ';'):
+ printer.write(f'_PyObject_Dump({i});')
+ printer.write(f'Py_FatalError("immortal object has less refcnt than '
+ 'expected _PyObject_IMMORTAL_REFCNT");')
+ printer.write('#endif')
+ printer.write(END)
+ printer.write(after)
+
+
+def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]':
+ identifiers = set(IDENTIFIERS)
+ strings = {}
+ for name, string, *_ in iter_global_strings():
+ if string is None:
+ if name not in IGNORED:
+ identifiers.add(name)
+ else:
+ if string not in strings:
+ strings[string] = name
+ elif name != strings[string]:
+ raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
+ return identifiers, strings
+
+
+#######################################
+# the script
+
+def main() -> None:
+ identifiers, strings = get_identifiers_and_strings()
+
+ generate_global_strings(identifiers, strings)
+ generate_runtime_init(identifiers, strings)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/Tools/build/generate_levenshtein_examples.py b/Tools/build/generate_levenshtein_examples.py
new file mode 100644
index 0000000..5a8360f
--- /dev/null
+++ b/Tools/build/generate_levenshtein_examples.py
@@ -0,0 +1,70 @@
+"""Generate 10,000 unique examples for the Levenshtein short-circuit tests."""
+
+import argparse
+from functools import cache
+import json
+import os.path
+from random import choices, randrange
+
+
+# This should be in sync with Lib/traceback.py. It's not importing those values
+# because this script is being executed by PYTHON_FOR_REGEN and not by the in-tree
+# build of Python.
+_MOVE_COST = 2
+_CASE_COST = 1
+
+
+def _substitution_cost(ch_a, ch_b):
+ if ch_a == ch_b:
+ return 0
+ if ch_a.lower() == ch_b.lower():
+ return _CASE_COST
+ return _MOVE_COST
+
+
+@cache
+def levenshtein(a, b):
+ if not a or not b:
+ return (len(a) + len(b)) * _MOVE_COST
+ option1 = levenshtein(a[:-1], b[:-1]) + _substitution_cost(a[-1], b[-1])
+ option2 = levenshtein(a[:-1], b) + _MOVE_COST
+ option3 = levenshtein(a, b[:-1]) + _MOVE_COST
+ return min(option1, option2, option3)
+
+
+def main():
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument('output_path', metavar='FILE', type=str)
+ parser.add_argument('--overwrite', dest='overwrite', action='store_const',
+ const=True, default=False,
+ help='overwrite an existing test file')
+
+ args = parser.parse_args()
+ output_path = os.path.realpath(args.output_path)
+ if not args.overwrite and os.path.isfile(output_path):
+ print(f"{output_path} already exists, skipping regeneration.")
+ print(
+ "To force, add --overwrite to the invocation of this tool or"
+ " delete the existing file."
+ )
+ return
+
+ examples = set()
+ # Create a lot of non-empty examples, which should end up with a Gauss-like
+ # distribution for even costs (moves) and odd costs (case substitutions).
+ while len(examples) < 9990:
+ a = ''.join(choices("abcABC", k=randrange(1, 10)))
+ b = ''.join(choices("abcABC", k=randrange(1, 10)))
+ expected = levenshtein(a, b)
+ examples.add((a, b, expected))
+ # Create one empty case each for strings between 0 and 9 in length.
+ for i in range(10):
+ b = ''.join(choices("abcABC", k=i))
+ expected = levenshtein("", b)
+ examples.add(("", b, expected))
+ with open(output_path, "w") as f:
+ json.dump(sorted(examples), f, indent=2)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/Tools/build/generate_opcode_h.py b/Tools/build/generate_opcode_h.py
new file mode 100644
index 0000000..372221a
--- /dev/null
+++ b/Tools/build/generate_opcode_h.py
@@ -0,0 +1,199 @@
+# This script generates the opcode.h header file.
+
+import sys
+import tokenize
+
+SCRIPT_NAME = "Tools/build/generate_opcode_h.py"
+PYTHON_OPCODE = "Lib/opcode.py"
+
+header = f"""
+// Auto-generated by {SCRIPT_NAME} from {PYTHON_OPCODE}
+
+#ifndef Py_OPCODE_H
+#define Py_OPCODE_H
+#ifdef __cplusplus
+extern "C" {{
+#endif
+
+
+/* Instruction opcodes for compiled code */
+""".lstrip()
+
+footer = """
+
+#define IS_PSEUDO_OPCODE(op) (((op) >= MIN_PSEUDO_OPCODE) && ((op) <= MAX_PSEUDO_OPCODE))
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_OPCODE_H */
+"""
+
+internal_header = f"""
+// Auto-generated by {SCRIPT_NAME} from {PYTHON_OPCODE}
+
+#ifndef Py_INTERNAL_OPCODE_H
+#define Py_INTERNAL_OPCODE_H
+#ifdef __cplusplus
+extern "C" {{
+#endif
+
+#ifndef Py_BUILD_CORE
+# error "this header requires Py_BUILD_CORE define"
+#endif
+
+#include "opcode.h"
+""".lstrip()
+
+internal_footer = """
+#ifdef __cplusplus
+}
+#endif
+#endif // !Py_INTERNAL_OPCODE_H
+"""
+
+DEFINE = "#define {:<38} {:>3}\n"
+
+UINT32_MASK = (1<<32)-1
+
+def write_int_array_from_ops(name, ops, out):
+ bits = 0
+ for op in ops:
+ bits |= 1<<op
+ out.write(f"static const uint32_t {name}[9] = {{\n")
+ for i in range(9):
+ out.write(f" {bits & UINT32_MASK}U,\n")
+ bits >>= 32
+ assert bits == 0
+ out.write(f"}};\n")
+
+def main(opcode_py, outfile='Include/opcode.h', internaloutfile='Include/internal/pycore_opcode.h'):
+ opcode = {}
+ if hasattr(tokenize, 'open'):
+ fp = tokenize.open(opcode_py) # Python 3.2+
+ else:
+ fp = open(opcode_py) # Python 2.7
+ with fp:
+ code = fp.read()
+ exec(code, opcode)
+ opmap = opcode['opmap']
+ opname = opcode['opname']
+ hasarg = opcode['hasarg']
+ hasconst = opcode['hasconst']
+ hasjrel = opcode['hasjrel']
+ hasjabs = opcode['hasjabs']
+ is_pseudo = opcode['is_pseudo']
+ _pseudo_ops = opcode['_pseudo_ops']
+
+ HAVE_ARGUMENT = opcode["HAVE_ARGUMENT"]
+ MIN_PSEUDO_OPCODE = opcode["MIN_PSEUDO_OPCODE"]
+ MAX_PSEUDO_OPCODE = opcode["MAX_PSEUDO_OPCODE"]
+
+ NUM_OPCODES = len(opname)
+ used = [ False ] * len(opname)
+ next_op = 1
+
+ for name, op in opmap.items():
+ used[op] = True
+
+ specialized_opmap = {}
+ opname_including_specialized = opname.copy()
+ for name in opcode['_specialized_instructions']:
+ while used[next_op]:
+ next_op += 1
+ specialized_opmap[name] = next_op
+ opname_including_specialized[next_op] = name
+ used[next_op] = True
+ specialized_opmap['DO_TRACING'] = 255
+ opname_including_specialized[255] = 'DO_TRACING'
+ used[255] = True
+
+ with (open(outfile, 'w') as fobj, open(internaloutfile, 'w') as iobj):
+ fobj.write(header)
+ iobj.write(internal_header)
+
+ for name in opname:
+ if name in opmap:
+ op = opmap[name]
+ if op == HAVE_ARGUMENT:
+ fobj.write(DEFINE.format("HAVE_ARGUMENT", HAVE_ARGUMENT))
+ if op == MIN_PSEUDO_OPCODE:
+ fobj.write(DEFINE.format("MIN_PSEUDO_OPCODE", MIN_PSEUDO_OPCODE))
+
+ fobj.write(DEFINE.format(name, op))
+
+ if op == MAX_PSEUDO_OPCODE:
+ fobj.write(DEFINE.format("MAX_PSEUDO_OPCODE", MAX_PSEUDO_OPCODE))
+
+
+ for name, op in specialized_opmap.items():
+ fobj.write(DEFINE.format(name, op))
+
+ iobj.write("\nextern const uint8_t _PyOpcode_Caches[256];\n")
+ iobj.write("\nextern const uint8_t _PyOpcode_Deopt[256];\n")
+ iobj.write("\n#ifdef NEED_OPCODE_TABLES\n")
+ write_int_array_from_ops("_PyOpcode_RelativeJump", opcode['hasjrel'], iobj)
+ write_int_array_from_ops("_PyOpcode_Jump", opcode['hasjrel'] + opcode['hasjabs'], iobj)
+
+ iobj.write("\nconst uint8_t _PyOpcode_Caches[256] = {\n")
+ for i, entries in enumerate(opcode["_inline_cache_entries"]):
+ if entries:
+ iobj.write(f" [{opname[i]}] = {entries},\n")
+ iobj.write("};\n")
+
+ deoptcodes = {}
+ for basic, op in opmap.items():
+ if not is_pseudo(op):
+ deoptcodes[basic] = basic
+ for basic, family in opcode["_specializations"].items():
+ for specialized in family:
+ deoptcodes[specialized] = basic
+ iobj.write("\nconst uint8_t _PyOpcode_Deopt[256] = {\n")
+ for opt, deopt in sorted(deoptcodes.items()):
+ iobj.write(f" [{opt}] = {deopt},\n")
+ iobj.write("};\n")
+ iobj.write("#endif // NEED_OPCODE_TABLES\n")
+
+ fobj.write("\n")
+ fobj.write("#define HAS_ARG(op) ((((op) >= HAVE_ARGUMENT) && (!IS_PSEUDO_OPCODE(op)))\\")
+ for op in _pseudo_ops:
+ if opmap[op] in hasarg:
+ fobj.write(f"\n || ((op) == {op}) \\")
+ fobj.write("\n )\n")
+
+ fobj.write("\n")
+ fobj.write("#define HAS_CONST(op) (false\\")
+ for op in hasconst:
+ fobj.write(f"\n || ((op) == {opname[op]}) \\")
+ fobj.write("\n )\n")
+
+ fobj.write("\n")
+ for i, (op, _) in enumerate(opcode["_nb_ops"]):
+ fobj.write(DEFINE.format(op, i))
+
+ iobj.write("\n")
+ iobj.write("#ifdef Py_DEBUG\n")
+ iobj.write(f"static const char *const _PyOpcode_OpName[{NUM_OPCODES}] = {{\n")
+ for op, name in enumerate(opname_including_specialized):
+ if name[0] != "<":
+ op = name
+ iobj.write(f''' [{op}] = "{name}",\n''')
+ iobj.write("};\n")
+ iobj.write("#endif\n")
+
+ iobj.write("\n")
+ iobj.write("#define EXTRA_CASES \\\n")
+ for i, flag in enumerate(used):
+ if not flag:
+ iobj.write(f" case {i}: \\\n")
+ iobj.write(" ;\n")
+
+ fobj.write(footer)
+ iobj.write(internal_footer)
+
+
+ print(f"{outfile} regenerated from {opcode_py}")
+
+
+if __name__ == '__main__':
+ main(sys.argv[1], sys.argv[2], sys.argv[3])
diff --git a/Tools/build/generate_re_casefix.py b/Tools/build/generate_re_casefix.py
new file mode 100755
index 0000000..b57ac07
--- /dev/null
+++ b/Tools/build/generate_re_casefix.py
@@ -0,0 +1,96 @@
+#! /usr/bin/env python3
+# This script generates Lib/re/_casefix.py.
+
+import collections
+import sys
+import unicodedata
+
+SCRIPT_NAME = 'Tools/build/generate_re_casefix.py'
+
+def update_file(file, content):
+ try:
+ with open(file, 'r', encoding='utf-8') as fobj:
+ if fobj.read() == content:
+ return False
+ except (OSError, ValueError):
+ pass
+ with open(file, 'w', encoding='utf-8') as fobj:
+ fobj.write(content)
+ return True
+
+re_casefix_template = f"""\
+# Auto-generated by {SCRIPT_NAME}.
+
+# Maps the code of lowercased character to codes of different lowercased
+# characters which have the same uppercase.
+_EXTRA_CASES = {
+%s
+}
+"""
+
+def uname(i):
+ return unicodedata.name(chr(i), r'U+%04X' % i)
+
+class hexint(int):
+ def __repr__(self):
+ return '%#06x' % self
+
+def alpha(i):
+ c = chr(i)
+ return c if c.isalpha() else ascii(c)[1:-1]
+
+
+def main(outfile='Lib/re/_casefix.py'):
+ # Find sets of characters which have the same uppercase.
+ equivalent_chars = collections.defaultdict(str)
+ for c in map(chr, range(sys.maxunicode + 1)):
+ equivalent_chars[c.upper()] += c
+ equivalent_chars = [t for t in equivalent_chars.values() if len(t) > 1]
+
+ # List of codes of lowercased characters which have the same uppercase.
+ equivalent_lower_codes = [sorted(t)
+ for s in equivalent_chars
+ for t in [set(ord(c.lower()) for c in s)]
+ if len(t) > 1]
+
+ bad_codes = []
+ for t in equivalent_lower_codes:
+ for i in t:
+ if i > 0xffff:
+ bad_codes.extend(t)
+ try:
+ bad_codes.append(ord(chr(i).upper()))
+ except (ValueError, TypeError):
+ pass
+ break
+ if bad_codes:
+ print('Case-insensitive matching may not work correctly for character:',
+ file=sys.stderr)
+ for i in sorted(bad_codes):
+ print(" '%s' (U+%04x, %s)" % (alpha(i), i, uname(i)),
+ file=sys.stderr)
+ sys.exit(1)
+
+ mapping = {i: tuple(j for j in t if i != j)
+ for t in equivalent_lower_codes
+ for i in t}
+
+ items = []
+ for i, t in sorted(mapping.items()):
+ items.append(' # %s: %s' % (
+ uname(i),
+ ', '.join(map(uname, t)),
+ ))
+ items.append(" %r: %r, # '%s': '%s'" % (
+ hexint(i),
+ tuple(map(hexint, t)),
+ alpha(i),
+ ''.join(map(alpha, t)),
+ ))
+
+ update_file(outfile, re_casefix_template % '\n'.join(items))
+
+
+if __name__ == '__main__':
+ import sys
+ main(*sys.argv[1:])
diff --git a/Tools/build/generate_sre_constants.py b/Tools/build/generate_sre_constants.py
new file mode 100755
index 0000000..abea069
--- /dev/null
+++ b/Tools/build/generate_sre_constants.py
@@ -0,0 +1,80 @@
+#! /usr/bin/env python3
+# This script generates Modules/_sre/sre_constants.h from Lib/re/_constants.py.
+
+SCRIPT_NAME = 'Tools/build/generate_sre_constants.py'
+
+
+def update_file(file, content):
+ try:
+ with open(file, 'r') as fobj:
+ if fobj.read() == content:
+ return False
+ except (OSError, ValueError):
+ pass
+ with open(file, 'w') as fobj:
+ fobj.write(content)
+ return True
+
+sre_constants_header = f"""\
+/*
+ * Secret Labs' Regular Expression Engine
+ *
+ * regular expression matching engine
+ *
+ * Auto-generated by {SCRIPT_NAME} from
+ * Lib/re/_constants.py.
+ *
+ * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
+ *
+ * See the sre.c file for information on usage and redistribution.
+ */
+
+"""
+
+def main(
+ infile="Lib/re/_constants.py",
+ outfile_constants="Modules/_sre/sre_constants.h",
+ outfile_targets="Modules/_sre/sre_targets.h",
+):
+ ns = {}
+ with open(infile) as fp:
+ code = fp.read()
+ exec(code, ns)
+
+ def dump(d, prefix):
+ items = sorted(d)
+ for item in items:
+ yield "#define %s_%s %d\n" % (prefix, item, item)
+
+ def dump2(d, prefix):
+ items = [(value, name) for name, value in d.items()
+ if name.startswith(prefix)]
+ for value, name in sorted(items):
+ yield "#define %s %d\n" % (name, value)
+
+ def dump_gotos(d, prefix):
+ for i, item in enumerate(sorted(d)):
+ assert i == item
+ yield f" &&{prefix}_{item},\n"
+
+ content = [sre_constants_header]
+ content.append("#define SRE_MAGIC %d\n" % ns["MAGIC"])
+ content.extend(dump(ns["OPCODES"], "SRE_OP"))
+ content.extend(dump(ns["ATCODES"], "SRE"))
+ content.extend(dump(ns["CHCODES"], "SRE"))
+ content.extend(dump2(ns, "SRE_FLAG_"))
+ content.extend(dump2(ns, "SRE_INFO_"))
+
+ update_file(outfile_constants, ''.join(content))
+
+ content = [sre_constants_header]
+ content.append(f"static void *sre_targets[{len(ns['OPCODES'])}] = {{\n")
+ content.extend(dump_gotos(ns["OPCODES"], "TARGET_SRE_OP"))
+ content.append("};\n")
+
+ update_file(outfile_targets, ''.join(content))
+
+
+if __name__ == '__main__':
+ import sys
+ main(*sys.argv[1:])
diff --git a/Tools/build/generate_stdlib_module_names.py b/Tools/build/generate_stdlib_module_names.py
new file mode 100644
index 0000000..e4f09f8
--- /dev/null
+++ b/Tools/build/generate_stdlib_module_names.py
@@ -0,0 +1,139 @@
+# This script lists the names of standard library modules
+# to update Python/stdlib_mod_names.h
+import _imp
+import os.path
+import re
+import subprocess
+import sys
+import sysconfig
+
+from check_extension_modules import ModuleChecker
+
+
+SCRIPT_NAME = 'Tools/build/generate_stdlib_module_names.py'
+
+SRC_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+STDLIB_PATH = os.path.join(SRC_DIR, 'Lib')
+
+IGNORE = {
+ '__init__',
+ '__pycache__',
+ 'site-packages',
+
+ # Test modules and packages
+ '__hello__',
+ '__phello__',
+ '__hello_alias__',
+ '__phello_alias__',
+ '__hello_only__',
+ '_ctypes_test',
+ '_testbuffer',
+ '_testcapi',
+ '_testconsole',
+ '_testimportmultiple',
+ '_testinternalcapi',
+ '_testmultiphase',
+ '_xxsubinterpreters',
+ '_xxtestfuzz',
+ 'distutils.tests',
+ 'idlelib.idle_test',
+ 'test',
+ 'xxlimited',
+ 'xxlimited_35',
+ 'xxsubtype',
+}
+
+# Pure Python modules (Lib/*.py)
+def list_python_modules(names):
+ for filename in os.listdir(STDLIB_PATH):
+ if not filename.endswith(".py"):
+ continue
+ name = filename.removesuffix(".py")
+ names.add(name)
+
+
+# Packages in Lib/
+def list_packages(names):
+ for name in os.listdir(STDLIB_PATH):
+ if name in IGNORE:
+ continue
+ package_path = os.path.join(STDLIB_PATH, name)
+ if not os.path.isdir(package_path):
+ continue
+ if any(package_file.endswith(".py")
+ for package_file in os.listdir(package_path)):
+ names.add(name)
+
+
+# Built-in and extension modules built by Modules/Setup*
+# includes Windows and macOS extensions.
+def list_modules_setup_extensions(names):
+ checker = ModuleChecker()
+ names.update(checker.list_module_names(all=True))
+
+
+# List frozen modules of the PyImport_FrozenModules list (Python/frozen.c).
+# Use the "./Programs/_testembed list_frozen" command.
+def list_frozen(names):
+ submodules = set()
+ for name in _imp._frozen_module_names():
+ # To skip __hello__, __hello_alias__ and etc.
+ if name.startswith('__'):
+ continue
+ if '.' in name:
+ submodules.add(name)
+ else:
+ names.add(name)
+ # Make sure all frozen submodules have a known parent.
+ for name in list(submodules):
+ if name.partition('.')[0] in names:
+ submodules.remove(name)
+ if submodules:
+ raise Exception(f'unexpected frozen submodules: {sorted(submodules)}')
+
+
+def list_modules():
+ names = set(sys.builtin_module_names)
+ list_modules_setup_extensions(names)
+ list_packages(names)
+ list_python_modules(names)
+ list_frozen(names)
+
+ # Remove ignored packages and modules
+ for name in list(names):
+ package_name = name.split('.')[0]
+ # package_name can be equal to name
+ if package_name in IGNORE:
+ names.discard(name)
+
+ for name in names:
+ if "." in name:
+ raise Exception("sub-modules must not be listed")
+
+ return names
+
+
+def write_modules(fp, names):
+ print(f"// Auto-generated by {SCRIPT_NAME}.",
+ file=fp)
+ print("// List used to create sys.stdlib_module_names.", file=fp)
+ print(file=fp)
+ print("static const char* _Py_stdlib_module_names[] = {", file=fp)
+ for name in sorted(names):
+ print(f'"{name}",', file=fp)
+ print("};", file=fp)
+
+
+def main():
+ if not sysconfig.is_python_build():
+ print(f"ERROR: {sys.executable} is not a Python build",
+ file=sys.stderr)
+ sys.exit(1)
+
+ fp = sys.stdout
+ names = list_modules()
+ write_modules(fp, names)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/Tools/build/generate_token.py b/Tools/build/generate_token.py
new file mode 100755
index 0000000..fc12835
--- /dev/null
+++ b/Tools/build/generate_token.py
@@ -0,0 +1,282 @@
+#! /usr/bin/env python3
+# This script generates token related files from Grammar/Tokens:
+#
+# Doc/library/token-list.inc
+# Include/token.h
+# Parser/token.c
+# Lib/token.py
+
+
+SCRIPT_NAME = 'Tools/build/generate_token.py'
+AUTO_GENERATED_BY_SCRIPT = f'Auto-generated by {SCRIPT_NAME}'
+NT_OFFSET = 256
+
+def load_tokens(path):
+ tok_names = []
+ string_to_tok = {}
+ ERRORTOKEN = None
+ with open(path) as fp:
+ for line in fp:
+ line = line.strip()
+ # strip comments
+ i = line.find('#')
+ if i >= 0:
+ line = line[:i].strip()
+ if not line:
+ continue
+ fields = line.split()
+ name = fields[0]
+ value = len(tok_names)
+ if name == 'ERRORTOKEN':
+ ERRORTOKEN = value
+ string = fields[1] if len(fields) > 1 else None
+ if string:
+ string = eval(string)
+ string_to_tok[string] = value
+ tok_names.append(name)
+ return tok_names, ERRORTOKEN, string_to_tok
+
+
+def update_file(file, content):
+ try:
+ with open(file, 'r') as fobj:
+ if fobj.read() == content:
+ return False
+ except (OSError, ValueError):
+ pass
+ with open(file, 'w') as fobj:
+ fobj.write(content)
+ return True
+
+
+token_h_template = f"""\
+/* {AUTO_GENERATED_BY_SCRIPT} */
+"""
+token_h_template += """\
+
+/* Token types */
+#ifndef Py_INTERNAL_TOKEN_H
+#define Py_INTERNAL_TOKEN_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_BUILD_CORE
+# error "this header requires Py_BUILD_CORE define"
+#endif
+
+#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
+
+%s\
+#define N_TOKENS %d
+#define NT_OFFSET %d
+
+/* Special definitions for cooperation with parser */
+
+#define ISTERMINAL(x) ((x) < NT_OFFSET)
+#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
+#define ISEOF(x) ((x) == ENDMARKER)
+#define ISWHITESPACE(x) ((x) == ENDMARKER || \\
+ (x) == NEWLINE || \\
+ (x) == INDENT || \\
+ (x) == DEDENT)
+
+
+// Symbols exported for test_peg_generator
+PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
+PyAPI_FUNC(int) _PyToken_OneChar(int);
+PyAPI_FUNC(int) _PyToken_TwoChars(int, int);
+PyAPI_FUNC(int) _PyToken_ThreeChars(int, int, int);
+
+#ifdef __cplusplus
+}
+#endif
+#endif // !Py_INTERNAL_TOKEN_H
+"""
+
+def make_h(infile, outfile='Include/internal/pycore_token.h'):
+ tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
+
+ defines = []
+ for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
+ defines.append("#define %-15s %d\n" % (name, value))
+
+ if update_file(outfile, token_h_template % (
+ ''.join(defines),
+ len(tok_names),
+ NT_OFFSET
+ )):
+ print("%s regenerated from %s" % (outfile, infile))
+
+
+token_c_template = f"""\
+/* {AUTO_GENERATED_BY_SCRIPT} */
+"""
+token_c_template += """\
+
+#include "Python.h"
+#include "pycore_token.h"
+
+/* Token names */
+
+const char * const _PyParser_TokenNames[] = {
+%s\
+};
+
+/* Return the token corresponding to a single character */
+
+int
+_PyToken_OneChar(int c1)
+{
+%s\
+ return OP;
+}
+
+int
+_PyToken_TwoChars(int c1, int c2)
+{
+%s\
+ return OP;
+}
+
+int
+_PyToken_ThreeChars(int c1, int c2, int c3)
+{
+%s\
+ return OP;
+}
+"""
+
+def generate_chars_to_token(mapping, n=1):
+ result = []
+ write = result.append
+ indent = ' ' * n
+ write(indent)
+ write('switch (c%d) {\n' % (n,))
+ for c in sorted(mapping):
+ write(indent)
+ value = mapping[c]
+ if isinstance(value, dict):
+ write("case '%s':\n" % (c,))
+ write(generate_chars_to_token(value, n + 1))
+ write(indent)
+ write(' break;\n')
+ else:
+ write("case '%s': return %s;\n" % (c, value))
+ write(indent)
+ write('}\n')
+ return ''.join(result)
+
+def make_c(infile, outfile='Parser/token.c'):
+ tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
+ string_to_tok['<>'] = string_to_tok['!=']
+ chars_to_token = {}
+ for string, value in string_to_tok.items():
+ assert 1 <= len(string) <= 3
+ name = tok_names[value]
+ m = chars_to_token.setdefault(len(string), {})
+ for c in string[:-1]:
+ m = m.setdefault(c, {})
+ m[string[-1]] = name
+
+ names = []
+ for value, name in enumerate(tok_names):
+ if value >= ERRORTOKEN:
+ name = '<%s>' % name
+ names.append(' "%s",\n' % name)
+ names.append(' "<N_TOKENS>",\n')
+
+ if update_file(outfile, token_c_template % (
+ ''.join(names),
+ generate_chars_to_token(chars_to_token[1]),
+ generate_chars_to_token(chars_to_token[2]),
+ generate_chars_to_token(chars_to_token[3])
+ )):
+ print("%s regenerated from %s" % (outfile, infile))
+
+
+token_inc_template = f"""\
+.. {AUTO_GENERATED_BY_SCRIPT}
+%s
+.. data:: N_TOKENS
+
+.. data:: NT_OFFSET
+"""
+
+def make_rst(infile, outfile='Doc/library/token-list.inc'):
+ tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
+ tok_to_string = {value: s for s, value in string_to_tok.items()}
+
+ names = []
+ for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
+ names.append('.. data:: %s' % (name,))
+ if value in tok_to_string:
+ names.append('')
+ names.append(' Token value for ``"%s"``.' % tok_to_string[value])
+ names.append('')
+
+ if update_file(outfile, token_inc_template % '\n'.join(names)):
+ print("%s regenerated from %s" % (outfile, infile))
+
+
+token_py_template = f'''\
+"""Token constants."""
+# {AUTO_GENERATED_BY_SCRIPT}
+'''
+token_py_template += '''
+__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
+
+%s
+N_TOKENS = %d
+# Special definitions for cooperation with parser
+NT_OFFSET = %d
+
+tok_name = {value: name
+ for name, value in globals().items()
+ if isinstance(value, int) and not name.startswith('_')}
+__all__.extend(tok_name.values())
+
+EXACT_TOKEN_TYPES = {
+%s
+}
+
+def ISTERMINAL(x):
+ return x < NT_OFFSET
+
+def ISNONTERMINAL(x):
+ return x >= NT_OFFSET
+
+def ISEOF(x):
+ return x == ENDMARKER
+'''
+
+def make_py(infile, outfile='Lib/token.py'):
+ tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
+
+ constants = []
+ for value, name in enumerate(tok_names):
+ constants.append('%s = %d' % (name, value))
+ constants.insert(ERRORTOKEN,
+ "# These aren't used by the C tokenizer but are needed for tokenize.py")
+
+ token_types = []
+ for s, value in sorted(string_to_tok.items()):
+ token_types.append(' %r: %s,' % (s, tok_names[value]))
+
+ if update_file(outfile, token_py_template % (
+ '\n'.join(constants),
+ len(tok_names),
+ NT_OFFSET,
+ '\n'.join(token_types),
+ )):
+ print("%s regenerated from %s" % (outfile, infile))
+
+
+def main(op, infile='Grammar/Tokens', *args):
+ make = globals()['make_' + op]
+ make(infile, *args)
+
+
+if __name__ == '__main__':
+ import sys
+ main(*sys.argv[1:])
diff --git a/Tools/build/parse_html5_entities.py b/Tools/build/parse_html5_entities.py
new file mode 100755
index 0000000..d2bf290
--- /dev/null
+++ b/Tools/build/parse_html5_entities.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+"""
+Utility for parsing HTML5 entity definitions available from:
+
+ https://html.spec.whatwg.org/entities.json
+ https://html.spec.whatwg.org/multipage/named-characters.html
+
+The page now contains the following note:
+
+ "This list is static and will not be expanded or changed in the future."
+
+Written by Ezio Melotti and Iuliia Proskurnia.
+"""
+
+import os
+import sys
+import json
+from urllib.request import urlopen
+from html.entities import html5
+
+SCRIPT_NAME = 'Tools/build/parse_html5_entities.py'
+PAGE_URL = 'https://html.spec.whatwg.org/multipage/named-characters.html'
+ENTITIES_URL = 'https://html.spec.whatwg.org/entities.json'
+HTML5_SECTION_START = '# HTML5 named character references'
+
+def get_json(url):
+ """Download the json file from the url and returns a decoded object."""
+ with urlopen(url) as f:
+ data = f.read().decode('utf-8')
+ return json.loads(data)
+
+def create_dict(entities):
+ """Create the html5 dict from the decoded json object."""
+ new_html5 = {}
+ for name, value in entities.items():
+ new_html5[name.lstrip('&')] = value['characters']
+ return new_html5
+
+def compare_dicts(old, new):
+ """Compare the old and new dicts and print the differences."""
+ added = new.keys() - old.keys()
+ if added:
+ print('{} entitie(s) have been added:'.format(len(added)))
+ for name in sorted(added):
+ print(' {!r}: {!r}'.format(name, new[name]))
+ removed = old.keys() - new.keys()
+ if removed:
+ print('{} entitie(s) have been removed:'.format(len(removed)))
+ for name in sorted(removed):
+ print(' {!r}: {!r}'.format(name, old[name]))
+ changed = set()
+ for name in (old.keys() & new.keys()):
+ if old[name] != new[name]:
+ changed.add((name, old[name], new[name]))
+ if changed:
+ print('{} entitie(s) have been modified:'.format(len(changed)))
+ for item in sorted(changed):
+ print(' {!r}: {!r} -> {!r}'.format(*item))
+
+def write_items(entities, file=sys.stdout):
+ """Write the items of the dictionary in the specified file."""
+ # The keys in the generated dictionary should be sorted
+ # in a case-insensitive way, however, when two keys are equal,
+ # the uppercase version should come first so that the result
+ # looks like: ['Aacute', 'aacute', 'Aacute;', 'aacute;', ...]
+ # To do this we first sort in a case-sensitive way (so all the
+ # uppercase chars come first) and then sort with key=str.lower.
+ # Since the sorting is stable the uppercase keys will eventually
+ # be before their equivalent lowercase version.
+ keys = sorted(entities.keys())
+ keys = sorted(keys, key=str.lower)
+ print(HTML5_SECTION_START, file=file)
+ print(f'# Generated by {SCRIPT_NAME}\n'
+ f'# from {ENTITIES_URL} and\n'
+ f'# {PAGE_URL}.\n'
+ f'# Map HTML5 named character references to the '
+ f'equivalent Unicode character(s).', file=file)
+ print('html5 = {', file=file)
+ for name in keys:
+ print(f' {name!r}: {entities[name]!a},', file=file)
+ print('}', file=file)
+
+
+if __name__ == '__main__':
+ # without args print a diff between html.entities.html5 and new_html5
+ # with --create print the new html5 dict
+ # with --patch patch the Lib/html/entities.py file
+ new_html5 = create_dict(get_json(ENTITIES_URL))
+ if '--create' in sys.argv:
+ write_items(new_html5)
+ elif '--patch' in sys.argv:
+ fname = 'Lib/html/entities.py'
+ temp_fname = fname + '.temp'
+ with open(fname) as f1, open(temp_fname, 'w') as f2:
+ skip = False
+ for line in f1:
+ if line.startswith(HTML5_SECTION_START):
+ write_items(new_html5, file=f2)
+ skip = True
+ continue
+ if skip:
+ # skip the old items until the }
+ if line.startswith('}'):
+ skip = False
+ continue
+ f2.write(line)
+ os.remove(fname)
+ os.rename(temp_fname, fname)
+ else:
+ if html5 == new_html5:
+ print('The current dictionary is updated.')
+ else:
+ compare_dicts(html5, new_html5)
+ print('Run "./python {0} --patch" to update Lib/html/entities.html '
+ 'or "./python {0} --create" to see the generated ' 'dictionary.'.format(__file__))
diff --git a/Tools/build/smelly.py b/Tools/build/smelly.py
new file mode 100755
index 0000000..276a5ab
--- /dev/null
+++ b/Tools/build/smelly.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python
+# Script checking that all symbols exported by libpython start with Py or _Py
+
+import os.path
+import subprocess
+import sys
+import sysconfig
+
+
+ALLOWED_PREFIXES = ('Py', '_Py')
+if sys.platform == 'darwin':
+ ALLOWED_PREFIXES += ('__Py',)
+
+IGNORED_EXTENSION = "_ctypes_test"
+# Ignore constructor and destructor functions
+IGNORED_SYMBOLS = {'_init', '_fini'}
+
+
+def is_local_symbol_type(symtype):
+ # Ignore local symbols.
+
+ # If lowercase, the symbol is usually local; if uppercase, the symbol
+ # is global (external). There are however a few lowercase symbols that
+ # are shown for special global symbols ("u", "v" and "w").
+ if symtype.islower() and symtype not in "uvw":
+ return True
+
+ # Ignore the initialized data section (d and D) and the BSS data
+ # section. For example, ignore "__bss_start (type: B)"
+ # and "_edata (type: D)".
+ if symtype in "bBdD":
+ return True
+
+ return False
+
+
+def get_exported_symbols(library, dynamic=False):
+ print(f"Check that {library} only exports symbols starting with Py or _Py")
+
+ # Only look at dynamic symbols
+ args = ['nm', '--no-sort']
+ if dynamic:
+ args.append('--dynamic')
+ args.append(library)
+ print("+ %s" % ' '.join(args))
+ proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True)
+ if proc.returncode:
+ sys.stdout.write(proc.stdout)
+ sys.exit(proc.returncode)
+
+ stdout = proc.stdout.rstrip()
+ if not stdout:
+ raise Exception("command output is empty")
+ return stdout
+
+
+def get_smelly_symbols(stdout):
+ smelly_symbols = []
+ python_symbols = []
+ local_symbols = []
+
+ for line in stdout.splitlines():
+ # Split line '0000000000001b80 D PyTextIOWrapper_Type'
+ if not line:
+ continue
+
+ parts = line.split(maxsplit=2)
+ if len(parts) < 3:
+ continue
+
+ symtype = parts[1].strip()
+ symbol = parts[-1]
+ result = '%s (type: %s)' % (symbol, symtype)
+
+ if symbol.startswith(ALLOWED_PREFIXES):
+ python_symbols.append(result)
+ continue
+
+ if is_local_symbol_type(symtype):
+ local_symbols.append(result)
+ elif symbol in IGNORED_SYMBOLS:
+ local_symbols.append(result)
+ else:
+ smelly_symbols.append(result)
+
+ if local_symbols:
+ print(f"Ignore {len(local_symbols)} local symbols")
+ return smelly_symbols, python_symbols
+
+
+def check_library(library, dynamic=False):
+ nm_output = get_exported_symbols(library, dynamic)
+ smelly_symbols, python_symbols = get_smelly_symbols(nm_output)
+
+ if not smelly_symbols:
+ print(f"OK: no smelly symbol found ({len(python_symbols)} Python symbols)")
+ return 0
+
+ print()
+ smelly_symbols.sort()
+ for symbol in smelly_symbols:
+ print("Smelly symbol: %s" % symbol)
+
+ print()
+ print("ERROR: Found %s smelly symbols!" % len(smelly_symbols))
+ return len(smelly_symbols)
+
+
+def check_extensions():
+ print(__file__)
+ # This assumes pybuilddir.txt is in same directory as pyconfig.h.
+ # In the case of out-of-tree builds, we can't assume pybuilddir.txt is
+ # in the source folder.
+ config_dir = os.path.dirname(sysconfig.get_config_h_filename())
+ filename = os.path.join(config_dir, "pybuilddir.txt")
+ try:
+ with open(filename, encoding="utf-8") as fp:
+ pybuilddir = fp.readline()
+ except FileNotFoundError:
+ print(f"Cannot check extensions because {filename} does not exist")
+ return True
+
+ print(f"Check extension modules from {pybuilddir} directory")
+ builddir = os.path.join(config_dir, pybuilddir)
+ nsymbol = 0
+ for name in os.listdir(builddir):
+ if not name.endswith(".so"):
+ continue
+ if IGNORED_EXTENSION in name:
+ print()
+ print(f"Ignore extension: {name}")
+ continue
+
+ print()
+ filename = os.path.join(builddir, name)
+ nsymbol += check_library(filename, dynamic=True)
+
+ return nsymbol
+
+
+def main():
+ nsymbol = 0
+
+ # static library
+ LIBRARY = sysconfig.get_config_var('LIBRARY')
+ if not LIBRARY:
+ raise Exception("failed to get LIBRARY variable from sysconfig")
+ if os.path.exists(LIBRARY):
+ nsymbol += check_library(LIBRARY)
+
+ # dynamic library
+ LDLIBRARY = sysconfig.get_config_var('LDLIBRARY')
+ if not LDLIBRARY:
+ raise Exception("failed to get LDLIBRARY variable from sysconfig")
+ if LDLIBRARY != LIBRARY:
+ print()
+ nsymbol += check_library(LDLIBRARY, dynamic=True)
+
+ # Check extension modules like _ssl.cpython-310d-x86_64-linux-gnu.so
+ nsymbol += check_extensions()
+
+ if nsymbol:
+ print()
+ print(f"ERROR: Found {nsymbol} smelly symbols in total!")
+ sys.exit(1)
+
+ print()
+ print(f"OK: all exported symbols of all libraries "
+ f"are prefixed with {' or '.join(map(repr, ALLOWED_PREFIXES))}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/Tools/build/stable_abi.py b/Tools/build/stable_abi.py
new file mode 100644
index 0000000..88db93e
--- /dev/null
+++ b/Tools/build/stable_abi.py
@@ -0,0 +1,757 @@
+"""Check the stable ABI manifest or generate files from it
+
+By default, the tool only checks existing files/libraries.
+Pass --generate to recreate auto-generated files instead.
+
+For actions that take a FILENAME, the filename can be left out to use a default
+(relative to the manifest file, as they appear in the CPython codebase).
+"""
+
+from functools import partial
+from pathlib import Path
+import dataclasses
+import subprocess
+import sysconfig
+import argparse
+import textwrap
+import tomllib
+import difflib
+import pprint
+import sys
+import os
+import os.path
+import io
+import re
+import csv
+
+SCRIPT_NAME = 'Tools/build/stable_abi.py'
+MISSING = object()
+
+EXCLUDED_HEADERS = {
+ "bytes_methods.h",
+ "cellobject.h",
+ "classobject.h",
+ "code.h",
+ "compile.h",
+ "datetime.h",
+ "dtoa.h",
+ "frameobject.h",
+ "genobject.h",
+ "longintrepr.h",
+ "parsetok.h",
+ "pyatomic.h",
+ "pytime.h",
+ "token.h",
+ "ucnhash.h",
+}
+MACOS = (sys.platform == "darwin")
+UNIXY = MACOS or (sys.platform == "linux") # XXX should this be "not Windows"?
+
+
+# The stable ABI manifest (Misc/stable_abi.toml) exists only to fill the
+# following dataclasses.
+# Feel free to change its syntax (and the `parse_manifest` function)
+# to better serve that purpose (while keeping it human-readable).
+
+class Manifest:
+ """Collection of `ABIItem`s forming the stable ABI/limited API."""
+ def __init__(self):
+ self.contents = dict()
+
+ def add(self, item):
+ if item.name in self.contents:
+ # We assume that stable ABI items do not share names,
+ # even if they're different kinds (e.g. function vs. macro).
+ raise ValueError(f'duplicate ABI item {item.name}')
+ self.contents[item.name] = item
+
+ def select(self, kinds, *, include_abi_only=True, ifdef=None):
+ """Yield selected items of the manifest
+
+ kinds: set of requested kinds, e.g. {'function', 'macro'}
+ include_abi_only: if True (default), include all items of the
+ stable ABI.
+ If False, include only items from the limited API
+ (i.e. items people should use today)
+ ifdef: set of feature macros (e.g. {'HAVE_FORK', 'MS_WINDOWS'}).
+ If None (default), items are not filtered by this. (This is
+ different from the empty set, which filters out all such
+ conditional items.)
+ """
+ for name, item in sorted(self.contents.items()):
+ if item.kind not in kinds:
+ continue
+ if item.abi_only and not include_abi_only:
+ continue
+ if (ifdef is not None
+ and item.ifdef is not None
+ and item.ifdef not in ifdef):
+ continue
+ yield item
+
+ def dump(self):
+ """Yield lines to recreate the manifest file (sans comments/newlines)"""
+ for item in self.contents.values():
+ fields = dataclasses.fields(item)
+ yield f"[{item.kind}.{item.name}]"
+ for field in fields:
+ if field.name in {'name', 'value', 'kind'}:
+ continue
+ value = getattr(item, field.name)
+ if value == field.default:
+ pass
+ elif value is True:
+ yield f" {field.name} = true"
+ elif value:
+ yield f" {field.name} = {value!r}"
+
+
+itemclasses = {}
+def itemclass(kind):
+ """Register the decorated class in `itemclasses`"""
+ def decorator(cls):
+ itemclasses[kind] = cls
+ return cls
+ return decorator
+
+@itemclass('function')
+@itemclass('macro')
+@itemclass('data')
+@itemclass('const')
+@itemclass('typedef')
+@dataclasses.dataclass
+class ABIItem:
+ """Information on one item (function, macro, struct, etc.)"""
+
+ name: str
+ kind: str
+ added: str = None
+ abi_only: bool = False
+ ifdef: str = None
+
+@itemclass('feature_macro')
+@dataclasses.dataclass(kw_only=True)
+class FeatureMacro(ABIItem):
+ name: str
+ doc: str
+ windows: bool = False
+ abi_only: bool = True
+
+@itemclass('struct')
+@dataclasses.dataclass(kw_only=True)
+class Struct(ABIItem):
+ struct_abi_kind: str
+ members: list = None
+
+
+def parse_manifest(file):
+ """Parse the given file (iterable of lines) to a Manifest"""
+
+ manifest = Manifest()
+
+ data = tomllib.load(file)
+
+ for kind, itemclass in itemclasses.items():
+ for name, item_data in data[kind].items():
+ try:
+ item = itemclass(name=name, kind=kind, **item_data)
+ manifest.add(item)
+ except BaseException as exc:
+ exc.add_note(f'in {kind} {name}')
+ raise
+
+ return manifest
+
+# The tool can run individual "actions".
+# Most actions are "generators", which generate a single file from the
+# manifest. (Checking works by generating a temp file & comparing.)
+# Other actions, like "--unixy-check", don't work on a single file.
+
+generators = []
+def generator(var_name, default_path):
+ """Decorates a file generator: function that writes to a file"""
+ def _decorator(func):
+ func.var_name = var_name
+ func.arg_name = '--' + var_name.replace('_', '-')
+ func.default_path = default_path
+ generators.append(func)
+ return func
+ return _decorator
+
+
+@generator("python3dll", 'PC/python3dll.c')
+def gen_python3dll(manifest, args, outfile):
+ """Generate/check the source for the Windows stable ABI library"""
+ write = partial(print, file=outfile)
+ content = f"""
+ /* Re-export stable Python ABI */
+
+ /* Generated by {SCRIPT_NAME} */
+ """
+ content += r"""
+ #ifdef _M_IX86
+ #define DECORATE "_"
+ #else
+ #define DECORATE
+ #endif
+
+ #define EXPORT_FUNC(name) \
+ __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name))
+ #define EXPORT_DATA(name) \
+ __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name ",DATA"))
+ """
+ write(textwrap.dedent(content))
+
+ def sort_key(item):
+ return item.name.lower()
+
+ windows_feature_macros = {
+ item.name for item in manifest.select({'feature_macro'}) if item.windows
+ }
+ for item in sorted(
+ manifest.select(
+ {'function'},
+ include_abi_only=True,
+ ifdef=windows_feature_macros),
+ key=sort_key):
+ write(f'EXPORT_FUNC({item.name})')
+
+ write()
+
+ for item in sorted(
+ manifest.select(
+ {'data'},
+ include_abi_only=True,
+ ifdef=windows_feature_macros),
+ key=sort_key):
+ write(f'EXPORT_DATA({item.name})')
+
+REST_ROLES = {
+ 'function': 'function',
+ 'data': 'var',
+ 'struct': 'type',
+ 'macro': 'macro',
+ # 'const': 'const', # all undocumented
+ 'typedef': 'type',
+}
+
+@generator("doc_list", 'Doc/data/stable_abi.dat')
+def gen_doc_annotations(manifest, args, outfile):
+ """Generate/check the stable ABI list for documentation annotations"""
+ writer = csv.DictWriter(
+ outfile,
+ ['role', 'name', 'added', 'ifdef_note', 'struct_abi_kind'],
+ lineterminator='\n')
+ writer.writeheader()
+ for item in manifest.select(REST_ROLES.keys(), include_abi_only=False):
+ if item.ifdef:
+ ifdef_note = manifest.contents[item.ifdef].doc
+ else:
+ ifdef_note = None
+ row = {
+ 'role': REST_ROLES[item.kind],
+ 'name': item.name,
+ 'added': item.added,
+ 'ifdef_note': ifdef_note}
+ rows = [row]
+ if item.kind == 'struct':
+ row['struct_abi_kind'] = item.struct_abi_kind
+ for member_name in item.members or ():
+ rows.append({
+ 'role': 'member',
+ 'name': f'{item.name}.{member_name}',
+ 'added': item.added})
+ writer.writerows(rows)
+
+@generator("ctypes_test", 'Lib/test/test_stable_abi_ctypes.py')
+def gen_ctypes_test(manifest, args, outfile):
+ """Generate/check the ctypes-based test for exported symbols"""
+ write = partial(print, file=outfile)
+ write(textwrap.dedent('''
+ # Generated by Tools/scripts/stable_abi.py
+
+ """Test that all symbols of the Stable ABI are accessible using ctypes
+ """
+
+ import sys
+ import unittest
+ from test.support.import_helper import import_module
+ from _testcapi import get_feature_macros
+
+ feature_macros = get_feature_macros()
+ ctypes_test = import_module('ctypes')
+
+ class TestStableABIAvailability(unittest.TestCase):
+ def test_available_symbols(self):
+
+ for symbol_name in SYMBOL_NAMES:
+ with self.subTest(symbol_name):
+ ctypes_test.pythonapi[symbol_name]
+
+ def test_feature_macros(self):
+ self.assertEqual(
+ set(get_feature_macros()), EXPECTED_FEATURE_MACROS)
+
+ # The feature macros for Windows are used in creating the DLL
+ # definition, so they must be known on all platforms.
+ # If we are on Windows, we check that the hardcoded data matches
+ # the reality.
+ @unittest.skipIf(sys.platform != "win32", "Windows specific test")
+ def test_windows_feature_macros(self):
+ for name, value in WINDOWS_FEATURE_MACROS.items():
+ if value != 'maybe':
+ with self.subTest(name):
+ self.assertEqual(feature_macros[name], value)
+
+ SYMBOL_NAMES = (
+ '''))
+ items = manifest.select(
+ {'function', 'data'},
+ include_abi_only=True,
+ )
+ optional_items = {}
+ for item in items:
+ if item.name in (
+ # Some symbols aren't exported on all platforms.
+ # This is a bug: https://bugs.python.org/issue44133
+ 'PyModule_Create2', 'PyModule_FromDefAndSpec2',
+ ):
+ continue
+ if item.ifdef:
+ optional_items.setdefault(item.ifdef, []).append(item.name)
+ else:
+ write(f' "{item.name}",')
+ write(")")
+ for ifdef, names in optional_items.items():
+ write(f"if feature_macros[{ifdef!r}]:")
+ write(f" SYMBOL_NAMES += (")
+ for name in names:
+ write(f" {name!r},")
+ write(" )")
+ write("")
+ feature_macros = list(manifest.select({'feature_macro'}))
+ feature_names = sorted(m.name for m in feature_macros)
+ write(f"EXPECTED_FEATURE_MACROS = set({pprint.pformat(feature_names)})")
+
+ windows_feature_macros = {m.name: m.windows for m in feature_macros}
+ write(f"WINDOWS_FEATURE_MACROS = {pprint.pformat(windows_feature_macros)}")
+
+
+@generator("testcapi_feature_macros", 'Modules/_testcapi_feature_macros.inc')
+def gen_testcapi_feature_macros(manifest, args, outfile):
+ """Generate/check the stable ABI list for documentation annotations"""
+ write = partial(print, file=outfile)
+ write('// Generated by Tools/scripts/stable_abi.py')
+ write()
+ write('// Add an entry in dict `result` for each Stable ABI feature macro.')
+ write()
+ for macro in manifest.select({'feature_macro'}):
+ name = macro.name
+ write(f'#ifdef {name}')
+ write(f' res = PyDict_SetItemString(result, "{name}", Py_True);')
+ write('#else')
+ write(f' res = PyDict_SetItemString(result, "{name}", Py_False);')
+ write('#endif')
+ write('if (res) {')
+ write(' Py_DECREF(result); return NULL;')
+ write('}')
+ write()
+
+
+def generate_or_check(manifest, args, path, func):
+ """Generate/check a file with a single generator
+
+ Return True if successful; False if a comparison failed.
+ """
+
+ outfile = io.StringIO()
+ func(manifest, args, outfile)
+ generated = outfile.getvalue()
+ existing = path.read_text()
+
+ if generated != existing:
+ if args.generate:
+ path.write_text(generated)
+ else:
+ print(f'File {path} differs from expected!')
+ diff = difflib.unified_diff(
+ generated.splitlines(), existing.splitlines(),
+ str(path), '<expected>',
+ lineterm='',
+ )
+ for line in diff:
+ print(line)
+ return False
+ return True
+
+
+def do_unixy_check(manifest, args):
+ """Check headers & library using "Unixy" tools (GCC/clang, binutils)"""
+ okay = True
+
+ # Get all macros first: we'll need feature macros like HAVE_FORK and
+ # MS_WINDOWS for everything else
+ present_macros = gcc_get_limited_api_macros(['Include/Python.h'])
+ feature_macros = set(m.name for m in manifest.select({'feature_macro'}))
+ feature_macros &= present_macros
+
+ # Check that we have all needed macros
+ expected_macros = set(
+ item.name for item in manifest.select({'macro'})
+ )
+ missing_macros = expected_macros - present_macros
+ okay &= _report_unexpected_items(
+ missing_macros,
+ 'Some macros from are not defined from "Include/Python.h"'
+ + 'with Py_LIMITED_API:')
+
+ expected_symbols = set(item.name for item in manifest.select(
+ {'function', 'data'}, include_abi_only=True, ifdef=feature_macros,
+ ))
+
+ # Check the static library (*.a)
+ LIBRARY = sysconfig.get_config_var("LIBRARY")
+ if not LIBRARY:
+ raise Exception("failed to get LIBRARY variable from sysconfig")
+ if os.path.exists(LIBRARY):
+ okay &= binutils_check_library(
+ manifest, LIBRARY, expected_symbols, dynamic=False)
+
+ # Check the dynamic library (*.so)
+ LDLIBRARY = sysconfig.get_config_var("LDLIBRARY")
+ if not LDLIBRARY:
+ raise Exception("failed to get LDLIBRARY variable from sysconfig")
+ okay &= binutils_check_library(
+ manifest, LDLIBRARY, expected_symbols, dynamic=False)
+
+ # Check definitions in the header files
+ expected_defs = set(item.name for item in manifest.select(
+ {'function', 'data'}, include_abi_only=False, ifdef=feature_macros,
+ ))
+ found_defs = gcc_get_limited_api_definitions(['Include/Python.h'])
+ missing_defs = expected_defs - found_defs
+ okay &= _report_unexpected_items(
+ missing_defs,
+ 'Some expected declarations were not declared in '
+ + '"Include/Python.h" with Py_LIMITED_API:')
+
+ # Some Limited API macros are defined in terms of private symbols.
+ # These are not part of Limited API (even though they're defined with
+ # Py_LIMITED_API). They must be part of the Stable ABI, though.
+ private_symbols = {n for n in expected_symbols if n.startswith('_')}
+ extra_defs = found_defs - expected_defs - private_symbols
+ okay &= _report_unexpected_items(
+ extra_defs,
+ 'Some extra declarations were found in "Include/Python.h" '
+ + 'with Py_LIMITED_API:')
+
+ return okay
+
+
+def _report_unexpected_items(items, msg):
+ """If there are any `items`, report them using "msg" and return false"""
+ if items:
+ print(msg, file=sys.stderr)
+ for item in sorted(items):
+ print(' -', item, file=sys.stderr)
+ return False
+ return True
+
+
+def binutils_get_exported_symbols(library, dynamic=False):
+ """Retrieve exported symbols using the nm(1) tool from binutils"""
+ # Only look at dynamic symbols
+ args = ["nm", "--no-sort"]
+ if dynamic:
+ args.append("--dynamic")
+ args.append(library)
+ proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True)
+ if proc.returncode:
+ sys.stdout.write(proc.stdout)
+ sys.exit(proc.returncode)
+
+ stdout = proc.stdout.rstrip()
+ if not stdout:
+ raise Exception("command output is empty")
+
+ for line in stdout.splitlines():
+ # Split line '0000000000001b80 D PyTextIOWrapper_Type'
+ if not line:
+ continue
+
+ parts = line.split(maxsplit=2)
+ if len(parts) < 3:
+ continue
+
+ symbol = parts[-1]
+ if MACOS and symbol.startswith("_"):
+ yield symbol[1:]
+ else:
+ yield symbol
+
+
+def binutils_check_library(manifest, library, expected_symbols, dynamic):
+ """Check that library exports all expected_symbols"""
+ available_symbols = set(binutils_get_exported_symbols(library, dynamic))
+ missing_symbols = expected_symbols - available_symbols
+ if missing_symbols:
+ print(textwrap.dedent(f"""\
+ Some symbols from the limited API are missing from {library}:
+ {', '.join(missing_symbols)}
+
+ This error means that there are some missing symbols among the
+ ones exported in the library.
+ This normally means that some symbol, function implementation or
+ a prototype belonging to a symbol in the limited API has been
+ deleted or is missing.
+ """), file=sys.stderr)
+ return False
+ return True
+
+
+def gcc_get_limited_api_macros(headers):
+ """Get all limited API macros from headers.
+
+ Runs the preprocessor over all the header files in "Include" setting
+ "-DPy_LIMITED_API" to the correct value for the running version of the
+ interpreter and extracting all macro definitions (via adding -dM to the
+ compiler arguments).
+
+ Requires Python built with a GCC-compatible compiler. (clang might work)
+ """
+
+ api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16
+
+ preprocesor_output_with_macros = subprocess.check_output(
+ sysconfig.get_config_var("CC").split()
+ + [
+ # Prevent the expansion of the exported macros so we can
+ # capture them later
+ "-DSIZEOF_WCHAR_T=4", # The actual value is not important
+ f"-DPy_LIMITED_API={api_hexversion}",
+ "-I.",
+ "-I./Include",
+ "-dM",
+ "-E",
+ ]
+ + [str(file) for file in headers],
+ text=True,
+ )
+
+ return {
+ target
+ for target in re.findall(
+ r"#define (\w+)", preprocesor_output_with_macros
+ )
+ }
+
+
+def gcc_get_limited_api_definitions(headers):
+ """Get all limited API definitions from headers.
+
+ Run the preprocessor over all the header files in "Include" setting
+ "-DPy_LIMITED_API" to the correct value for the running version of the
+ interpreter.
+
+ The limited API symbols will be extracted from the output of this command
+ as it includes the prototypes and definitions of all the exported symbols
+ that are in the limited api.
+
+ This function does *NOT* extract the macros defined on the limited API
+
+ Requires Python built with a GCC-compatible compiler. (clang might work)
+ """
+ api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16
+ preprocesor_output = subprocess.check_output(
+ sysconfig.get_config_var("CC").split()
+ + [
+ # Prevent the expansion of the exported macros so we can capture
+ # them later
+ "-DPyAPI_FUNC=__PyAPI_FUNC",
+ "-DPyAPI_DATA=__PyAPI_DATA",
+ "-DEXPORT_DATA=__EXPORT_DATA",
+ "-D_Py_NO_RETURN=",
+ "-DSIZEOF_WCHAR_T=4", # The actual value is not important
+ f"-DPy_LIMITED_API={api_hexversion}",
+ "-I.",
+ "-I./Include",
+ "-E",
+ ]
+ + [str(file) for file in headers],
+ text=True,
+ stderr=subprocess.DEVNULL,
+ )
+ stable_functions = set(
+ re.findall(r"__PyAPI_FUNC\(.*?\)\s*(.*?)\s*\(", preprocesor_output)
+ )
+ stable_exported_data = set(
+ re.findall(r"__EXPORT_DATA\((.*?)\)", preprocesor_output)
+ )
+ stable_data = set(
+ re.findall(r"__PyAPI_DATA\(.*?\)[\s\*\(]*([^);]*)\)?.*;", preprocesor_output)
+ )
+ return stable_data | stable_exported_data | stable_functions
+
+def check_private_names(manifest):
+ """Ensure limited API doesn't contain private names
+
+ Names prefixed by an underscore are private by definition.
+ """
+ for name, item in manifest.contents.items():
+ if name.startswith('_') and not item.abi_only:
+ raise ValueError(
+ f'`{name}` is private (underscore-prefixed) and should be '
+ + 'removed from the stable ABI list or or marked `abi_only`')
+
+def check_dump(manifest, filename):
+ """Check that manifest.dump() corresponds to the data.
+
+ Mainly useful when debugging this script.
+ """
+ dumped = tomllib.loads('\n'.join(manifest.dump()))
+ with filename.open('rb') as file:
+ from_file = tomllib.load(file)
+ if dumped != from_file:
+ print(f'Dump differs from loaded data!', file=sys.stderr)
+ diff = difflib.unified_diff(
+ pprint.pformat(dumped).splitlines(),
+ pprint.pformat(from_file).splitlines(),
+ '<dumped>', str(filename),
+ lineterm='',
+ )
+ for line in diff:
+ print(line, file=sys.stderr)
+ return False
+ else:
+ return True
+
+def main():
+ parser = argparse.ArgumentParser(
+ description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ )
+ parser.add_argument(
+ "file", type=Path, metavar='FILE',
+ help="file with the stable abi manifest",
+ )
+ parser.add_argument(
+ "--generate", action='store_true',
+ help="generate file(s), rather than just checking them",
+ )
+ parser.add_argument(
+ "--generate-all", action='store_true',
+ help="as --generate, but generate all file(s) using default filenames."
+ + " (unlike --all, does not run any extra checks)",
+ )
+ parser.add_argument(
+ "-a", "--all", action='store_true',
+ help="run all available checks using default filenames",
+ )
+ parser.add_argument(
+ "-l", "--list", action='store_true',
+ help="list available generators and their default filenames; then exit",
+ )
+ parser.add_argument(
+ "--dump", action='store_true',
+ help="dump the manifest contents (used for debugging the parser)",
+ )
+
+ actions_group = parser.add_argument_group('actions')
+ for gen in generators:
+ actions_group.add_argument(
+ gen.arg_name, dest=gen.var_name,
+ type=str, nargs="?", default=MISSING,
+ metavar='FILENAME',
+ help=gen.__doc__,
+ )
+ actions_group.add_argument(
+ '--unixy-check', action='store_true',
+ help=do_unixy_check.__doc__,
+ )
+ args = parser.parse_args()
+
+ base_path = args.file.parent.parent
+
+ if args.list:
+ for gen in generators:
+ print(f'{gen.arg_name}: {base_path / gen.default_path}')
+ sys.exit(0)
+
+ run_all_generators = args.generate_all
+
+ if args.generate_all:
+ args.generate = True
+
+ if args.all:
+ run_all_generators = True
+ args.unixy_check = True
+
+ try:
+ file = args.file.open('rb')
+ except FileNotFoundError as err:
+ if args.file.suffix == '.txt':
+ # Provide a better error message
+ suggestion = args.file.with_suffix('.toml')
+ raise FileNotFoundError(
+ f'{args.file} not found. Did you mean {suggestion} ?') from err
+ raise
+ with file:
+ manifest = parse_manifest(file)
+
+ check_private_names(manifest)
+
+ # Remember results of all actions (as booleans).
+ # At the end we'll check that at least one action was run,
+ # and also fail if any are false.
+ results = {}
+
+ if args.dump:
+ for line in manifest.dump():
+ print(line)
+ results['dump'] = check_dump(manifest, args.file)
+
+ for gen in generators:
+ filename = getattr(args, gen.var_name)
+ if filename is None or (run_all_generators and filename is MISSING):
+ filename = base_path / gen.default_path
+ elif filename is MISSING:
+ continue
+
+ results[gen.var_name] = generate_or_check(manifest, args, filename, gen)
+
+ if args.unixy_check:
+ results['unixy_check'] = do_unixy_check(manifest, args)
+
+ if not results:
+ if args.generate:
+ parser.error('No file specified. Use --help for usage.')
+ parser.error('No check specified. Use --help for usage.')
+
+ failed_results = [name for name, result in results.items() if not result]
+
+ if failed_results:
+ raise Exception(f"""
+ These checks related to the stable ABI did not succeed:
+ {', '.join(failed_results)}
+
+ If you see diffs in the output, files derived from the stable
+ ABI manifest the were not regenerated.
+ Run `make regen-limited-abi` to fix this.
+
+ Otherwise, see the error(s) above.
+
+ The stable ABI manifest is at: {args.file}
+ Note that there is a process to follow when modifying it.
+
+ You can read more about the limited API and its contracts at:
+
+ https://docs.python.org/3/c-api/stable.html
+
+ And in PEP 384:
+
+ https://peps.python.org/pep-0384/
+ """)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/Tools/build/umarshal.py b/Tools/build/umarshal.py
new file mode 100644
index 0000000..f61570c
--- /dev/null
+++ b/Tools/build/umarshal.py
@@ -0,0 +1,325 @@
+# Implementat marshal.loads() in pure Python
+
+import ast
+
+from typing import Any, Tuple
+
+
+class Type:
+ # Adapted from marshal.c
+ NULL = ord('0')
+ NONE = ord('N')
+ FALSE = ord('F')
+ TRUE = ord('T')
+ STOPITER = ord('S')
+ ELLIPSIS = ord('.')
+ INT = ord('i')
+ INT64 = ord('I')
+ FLOAT = ord('f')
+ BINARY_FLOAT = ord('g')
+ COMPLEX = ord('x')
+ BINARY_COMPLEX = ord('y')
+ LONG = ord('l')
+ STRING = ord('s')
+ INTERNED = ord('t')
+ REF = ord('r')
+ TUPLE = ord('(')
+ LIST = ord('[')
+ DICT = ord('{')
+ CODE = ord('c')
+ UNICODE = ord('u')
+ UNKNOWN = ord('?')
+ SET = ord('<')
+ FROZENSET = ord('>')
+ ASCII = ord('a')
+ ASCII_INTERNED = ord('A')
+ SMALL_TUPLE = ord(')')
+ SHORT_ASCII = ord('z')
+ SHORT_ASCII_INTERNED = ord('Z')
+
+
+FLAG_REF = 0x80 # with a type, add obj to index
+
+NULL = object() # marker
+
+# Cell kinds
+CO_FAST_LOCAL = 0x20
+CO_FAST_CELL = 0x40
+CO_FAST_FREE = 0x80
+
+
+class Code:
+ def __init__(self, **kwds: Any):
+ self.__dict__.update(kwds)
+
+ def __repr__(self) -> str:
+ return f"Code(**{self.__dict__})"
+
+ co_localsplusnames: Tuple[str]
+ co_localspluskinds: Tuple[int]
+
+ def get_localsplus_names(self, select_kind: int) -> Tuple[str, ...]:
+ varnames: list[str] = []
+ for name, kind in zip(self.co_localsplusnames,
+ self.co_localspluskinds):
+ if kind & select_kind:
+ varnames.append(name)
+ return tuple(varnames)
+
+ @property
+ def co_varnames(self) -> Tuple[str, ...]:
+ return self.get_localsplus_names(CO_FAST_LOCAL)
+
+ @property
+ def co_cellvars(self) -> Tuple[str, ...]:
+ return self.get_localsplus_names(CO_FAST_CELL)
+
+ @property
+ def co_freevars(self) -> Tuple[str, ...]:
+ return self.get_localsplus_names(CO_FAST_FREE)
+
+ @property
+ def co_nlocals(self) -> int:
+ return len(self.co_varnames)
+
+
+class Reader:
+ # A fairly literal translation of the marshal reader.
+
+ def __init__(self, data: bytes):
+ self.data: bytes = data
+ self.end: int = len(self.data)
+ self.pos: int = 0
+ self.refs: list[Any] = []
+ self.level: int = 0
+
+ def r_string(self, n: int) -> bytes:
+ assert 0 <= n <= self.end - self.pos
+ buf = self.data[self.pos : self.pos + n]
+ self.pos += n
+ return buf
+
+ def r_byte(self) -> int:
+ buf = self.r_string(1)
+ return buf[0]
+
+ def r_short(self) -> int:
+ buf = self.r_string(2)
+ x = buf[0]
+ x |= buf[1] << 8
+ x |= -(x & (1<<15)) # Sign-extend
+ return x
+
+ def r_long(self) -> int:
+ buf = self.r_string(4)
+ x = buf[0]
+ x |= buf[1] << 8
+ x |= buf[2] << 16
+ x |= buf[3] << 24
+ x |= -(x & (1<<31)) # Sign-extend
+ return x
+
+ def r_long64(self) -> int:
+ buf = self.r_string(8)
+ x = buf[0]
+ x |= buf[1] << 8
+ x |= buf[2] << 16
+ x |= buf[3] << 24
+ x |= buf[1] << 32
+ x |= buf[1] << 40
+ x |= buf[1] << 48
+ x |= buf[1] << 56
+ x |= -(x & (1<<63)) # Sign-extend
+ return x
+
+ def r_PyLong(self) -> int:
+ n = self.r_long()
+ size = abs(n)
+ x = 0
+ # Pray this is right
+ for i in range(size):
+ x |= self.r_short() << i*15
+ if n < 0:
+ x = -x
+ return x
+
+ def r_float_bin(self) -> float:
+ buf = self.r_string(8)
+ import struct # Lazy import to avoid breaking UNIX build
+ return struct.unpack("d", buf)[0]
+
+ def r_float_str(self) -> float:
+ n = self.r_byte()
+ buf = self.r_string(n)
+ return ast.literal_eval(buf.decode("ascii"))
+
+ def r_ref_reserve(self, flag: int) -> int:
+ if flag:
+ idx = len(self.refs)
+ self.refs.append(None)
+ return idx
+ else:
+ return 0
+
+ def r_ref_insert(self, obj: Any, idx: int, flag: int) -> Any:
+ if flag:
+ self.refs[idx] = obj
+ return obj
+
+ def r_ref(self, obj: Any, flag: int) -> Any:
+ assert flag & FLAG_REF
+ self.refs.append(obj)
+ return obj
+
+ def r_object(self) -> Any:
+ old_level = self.level
+ try:
+ return self._r_object()
+ finally:
+ self.level = old_level
+
+ def _r_object(self) -> Any:
+ code = self.r_byte()
+ flag = code & FLAG_REF
+ type = code & ~FLAG_REF
+ # print(" "*self.level + f"{code} {flag} {type} {chr(type)!r}")
+ self.level += 1
+
+ def R_REF(obj: Any) -> Any:
+ if flag:
+ obj = self.r_ref(obj, flag)
+ return obj
+
+ if type == Type.NULL:
+ return NULL
+ elif type == Type.NONE:
+ return None
+ elif type == Type.ELLIPSIS:
+ return Ellipsis
+ elif type == Type.FALSE:
+ return False
+ elif type == Type.TRUE:
+ return True
+ elif type == Type.INT:
+ return R_REF(self.r_long())
+ elif type == Type.INT64:
+ return R_REF(self.r_long64())
+ elif type == Type.LONG:
+ return R_REF(self.r_PyLong())
+ elif type == Type.FLOAT:
+ return R_REF(self.r_float_str())
+ elif type == Type.BINARY_FLOAT:
+ return R_REF(self.r_float_bin())
+ elif type == Type.COMPLEX:
+ return R_REF(complex(self.r_float_str(),
+ self.r_float_str()))
+ elif type == Type.BINARY_COMPLEX:
+ return R_REF(complex(self.r_float_bin(),
+ self.r_float_bin()))
+ elif type == Type.STRING:
+ n = self.r_long()
+ return R_REF(self.r_string(n))
+ elif type == Type.ASCII_INTERNED or type == Type.ASCII:
+ n = self.r_long()
+ return R_REF(self.r_string(n).decode("ascii"))
+ elif type == Type.SHORT_ASCII_INTERNED or type == Type.SHORT_ASCII:
+ n = self.r_byte()
+ return R_REF(self.r_string(n).decode("ascii"))
+ elif type == Type.INTERNED or type == Type.UNICODE:
+ n = self.r_long()
+ return R_REF(self.r_string(n).decode("utf8", "surrogatepass"))
+ elif type == Type.SMALL_TUPLE:
+ n = self.r_byte()
+ idx = self.r_ref_reserve(flag)
+ retval: Any = tuple(self.r_object() for _ in range(n))
+ self.r_ref_insert(retval, idx, flag)
+ return retval
+ elif type == Type.TUPLE:
+ n = self.r_long()
+ idx = self.r_ref_reserve(flag)
+ retval = tuple(self.r_object() for _ in range(n))
+ self.r_ref_insert(retval, idx, flag)
+ return retval
+ elif type == Type.LIST:
+ n = self.r_long()
+ retval = R_REF([])
+ for _ in range(n):
+ retval.append(self.r_object())
+ return retval
+ elif type == Type.DICT:
+ retval = R_REF({})
+ while True:
+ key = self.r_object()
+ if key == NULL:
+ break
+ val = self.r_object()
+ retval[key] = val
+ return retval
+ elif type == Type.SET:
+ n = self.r_long()
+ retval = R_REF(set())
+ for _ in range(n):
+ v = self.r_object()
+ retval.add(v)
+ return retval
+ elif type == Type.FROZENSET:
+ n = self.r_long()
+ s: set[Any] = set()
+ idx = self.r_ref_reserve(flag)
+ for _ in range(n):
+ v = self.r_object()
+ s.add(v)
+ retval = frozenset(s)
+ self.r_ref_insert(retval, idx, flag)
+ return retval
+ elif type == Type.CODE:
+ retval = R_REF(Code())
+ retval.co_argcount = self.r_long()
+ retval.co_posonlyargcount = self.r_long()
+ retval.co_kwonlyargcount = self.r_long()
+ retval.co_stacksize = self.r_long()
+ retval.co_flags = self.r_long()
+ retval.co_code = self.r_object()
+ retval.co_consts = self.r_object()
+ retval.co_names = self.r_object()
+ retval.co_localsplusnames = self.r_object()
+ retval.co_localspluskinds = self.r_object()
+ retval.co_filename = self.r_object()
+ retval.co_name = self.r_object()
+ retval.co_qualname = self.r_object()
+ retval.co_firstlineno = self.r_long()
+ retval.co_linetable = self.r_object()
+ retval.co_exceptiontable = self.r_object()
+ return retval
+ elif type == Type.REF:
+ n = self.r_long()
+ retval = self.refs[n]
+ assert retval is not None
+ return retval
+ else:
+ breakpoint()
+ raise AssertionError(f"Unknown type {type} {chr(type)!r}")
+
+
+def loads(data: bytes) -> Any:
+ assert isinstance(data, bytes)
+ r = Reader(data)
+ return r.r_object()
+
+
+def main():
+ # Test
+ import marshal, pprint
+ sample = {'foo': {(42, "bar", 3.14)}}
+ data = marshal.dumps(sample)
+ retval = loads(data)
+ assert retval == sample, retval
+ sample = main.__code__
+ data = marshal.dumps(sample)
+ retval = loads(data)
+ assert isinstance(retval, Code), retval
+ pprint.pprint(retval.__dict__)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/Tools/build/update_file.py b/Tools/build/update_file.py
new file mode 100644
index 0000000..b4182c1
--- /dev/null
+++ b/Tools/build/update_file.py
@@ -0,0 +1,92 @@
+"""
+A script that replaces an old file with a new one, only if the contents
+actually changed. If not, the new file is simply deleted.
+
+This avoids wholesale rebuilds when a code (re)generation phase does not
+actually change the in-tree generated code.
+"""
+
+import contextlib
+import os
+import os.path
+import sys
+
+
+@contextlib.contextmanager
+def updating_file_with_tmpfile(filename, tmpfile=None):
+ """A context manager for updating a file via a temp file.
+
+ The context manager provides two open files: the source file open
+ for reading, and the temp file, open for writing.
+
+ Upon exiting: both files are closed, and the source file is replaced
+ with the temp file.
+ """
+ # XXX Optionally use tempfile.TemporaryFile?
+ if not tmpfile:
+ tmpfile = filename + '.tmp'
+ elif os.path.isdir(tmpfile):
+ tmpfile = os.path.join(tmpfile, filename + '.tmp')
+
+ with open(filename, 'rb') as infile:
+ line = infile.readline()
+
+ if line.endswith(b'\r\n'):
+ newline = "\r\n"
+ elif line.endswith(b'\r'):
+ newline = "\r"
+ elif line.endswith(b'\n'):
+ newline = "\n"
+ else:
+ raise ValueError(f"unknown end of line: {filename}: {line!a}")
+
+ with open(tmpfile, 'w', newline=newline) as outfile:
+ with open(filename) as infile:
+ yield infile, outfile
+ update_file_with_tmpfile(filename, tmpfile)
+
+
+def update_file_with_tmpfile(filename, tmpfile, *, create=False):
+ try:
+ targetfile = open(filename, 'rb')
+ except FileNotFoundError:
+ if not create:
+ raise # re-raise
+ outcome = 'created'
+ os.replace(tmpfile, filename)
+ else:
+ with targetfile:
+ old_contents = targetfile.read()
+ with open(tmpfile, 'rb') as f:
+ new_contents = f.read()
+ # Now compare!
+ if old_contents != new_contents:
+ outcome = 'updated'
+ os.replace(tmpfile, filename)
+ else:
+ outcome = 'same'
+ os.unlink(tmpfile)
+ return outcome
+
+
+if __name__ == '__main__':
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--create', action='store_true')
+ parser.add_argument('--exitcode', action='store_true')
+ parser.add_argument('filename', help='path to be updated')
+ parser.add_argument('tmpfile', help='path with new contents')
+ args = parser.parse_args()
+ kwargs = vars(args)
+ setexitcode = kwargs.pop('exitcode')
+
+ outcome = update_file_with_tmpfile(**kwargs)
+ if setexitcode:
+ if outcome == 'same':
+ sys.exit(0)
+ elif outcome == 'updated':
+ sys.exit(1)
+ elif outcome == 'created':
+ sys.exit(2)
+ else:
+ raise NotImplementedError
diff --git a/Tools/build/verify_ensurepip_wheels.py b/Tools/build/verify_ensurepip_wheels.py
new file mode 100755
index 0000000..044d1fd
--- /dev/null
+++ b/Tools/build/verify_ensurepip_wheels.py
@@ -0,0 +1,98 @@
+#! /usr/bin/env python3
+
+"""
+Compare checksums for wheels in :mod:`ensurepip` against the Cheeseshop.
+
+When GitHub Actions executes the script, output is formatted accordingly.
+https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-notice-message
+"""
+
+import hashlib
+import json
+import os
+import re
+from pathlib import Path
+from urllib.request import urlopen
+
+PACKAGE_NAMES = ("pip", "setuptools")
+ENSURE_PIP_ROOT = Path(__file__).parent.parent.parent / "Lib/ensurepip"
+WHEEL_DIR = ENSURE_PIP_ROOT / "_bundled"
+ENSURE_PIP_INIT_PY_TEXT = (ENSURE_PIP_ROOT / "__init__.py").read_text(encoding="utf-8")
+GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true"
+
+
+def print_notice(file_path: str, message: str) -> None:
+ if GITHUB_ACTIONS:
+ message = f"::notice file={file_path}::{message}"
+ print(message, end="\n\n")
+
+
+def print_error(file_path: str, message: str) -> None:
+ if GITHUB_ACTIONS:
+ message = f"::error file={file_path}::{message}"
+ print(message, end="\n\n")
+
+
+def verify_wheel(package_name: str) -> bool:
+ # Find the package on disk
+ package_path = next(WHEEL_DIR.glob(f"{package_name}*.whl"), None)
+ if not package_path:
+ print_error("", f"Could not find a {package_name} wheel on disk.")
+ return False
+
+ print(f"Verifying checksum for {package_path}.")
+
+ # Find the version of the package used by ensurepip
+ package_version_match = re.search(
+ f'_{package_name.upper()}_VERSION = "([^"]+)', ENSURE_PIP_INIT_PY_TEXT
+ )
+ if not package_version_match:
+ print_error(
+ package_path,
+ f"No {package_name} version found in Lib/ensurepip/__init__.py.",
+ )
+ return False
+ package_version = package_version_match[1]
+
+ # Get the SHA 256 digest from the Cheeseshop
+ try:
+ raw_text = urlopen(f"https://pypi.org/pypi/{package_name}/json").read()
+ except (OSError, ValueError):
+ print_error(package_path, f"Could not fetch JSON metadata for {package_name}.")
+ return False
+
+ release_files = json.loads(raw_text)["releases"][package_version]
+ for release_info in release_files:
+ if package_path.name != release_info["filename"]:
+ continue
+ expected_digest = release_info["digests"].get("sha256", "")
+ break
+ else:
+ print_error(package_path, f"No digest for {package_name} found from PyPI.")
+ return False
+
+ # Compute the SHA 256 digest of the wheel on disk
+ actual_digest = hashlib.sha256(package_path.read_bytes()).hexdigest()
+
+ print(f"Expected digest: {expected_digest}")
+ print(f"Actual digest: {actual_digest}")
+
+ if actual_digest != expected_digest:
+ print_error(
+ package_path, f"Failed to verify the checksum of the {package_name} wheel."
+ )
+ return False
+
+ print_notice(
+ package_path,
+ f"Successfully verified the checksum of the {package_name} wheel.",
+ )
+ return True
+
+
+if __name__ == "__main__":
+ exit_status = 0
+ for package_name in PACKAGE_NAMES:
+ if not verify_wheel(package_name):
+ exit_status = 1
+ raise SystemExit(exit_status)