16 files changed, 4529 insertions, 0 deletions
diff --git a/Tools/build/check_extension_modules.py b/Tools/build/check_extension_modules.py
new file mode 100644
index 0000000..59239c6
--- /dev/null
+++ b/Tools/build/check_extension_modules.py
@@ -0,0 +1,484 @@
+"""Check extension modules
+
+The script checks shared and built-in extension modules. It verifies that the
+modules have been built and that they can be imported successfully. Missing
+modules and failed imports are reported to the user. Shared extension
+files are renamed on failed import.
+
+Module information is parsed from several sources:
+
+- core modules hard-coded in Modules/config.c.in
+- Windows-specific modules that are hard-coded in PC/config.c
+- MODULE_{name}_STATE entries in Makefile (provided through sysconfig)
+- Various makesetup files:
+  - $(srcdir)/Modules/Setup
+  - Modules/Setup.[local|bootstrap|stdlib] files, which are generated
+    from $(srcdir)/Modules/Setup.*.in files
+
+See --help for more information
+"""
+import argparse
+import collections
+import enum
+import logging
+import os
+import pathlib
+import re
+import sys
+import sysconfig
+import warnings
+
+from importlib._bootstrap import _load as bootstrap_load
+from importlib.machinery import BuiltinImporter, ExtensionFileLoader, ModuleSpec
+from importlib.util import spec_from_file_location, spec_from_loader
+from typing import Iterable
+
+SRC_DIR = pathlib.Path(__file__).parent.parent.parent
+
+# core modules, hard-coded in Modules/config.h.in
+CORE_MODULES = {
+    "_ast",
+    "_imp",
+    "_string",
+    "_tokenize",
+    "_warnings",
+    "builtins",
+    "gc",
+    "marshal",
+    "sys",
+}
+
+# Windows-only modules
+WINDOWS_MODULES = {
+    "_msi",
+    "_overlapped",
+    "_testconsole",
+    "_winapi",
+    "msvcrt",
+    "nt",
+    "winreg",
+    "winsound",
+}
+
+
+logger = logging.getLogger(__name__)
+
+parser = argparse.ArgumentParser(
+    prog="check_extension_modules",
+    description=__doc__,
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+)
+
+parser.add_argument(
+    "--verbose",
+    action="store_true",
+    help="Verbose, report builtin, shared, and unavailable modules",
+)
+
+parser.add_argument(
+    "--debug",
+    action="store_true",
+    help="Enable debug logging",
+)
+
+parser.add_argument(
+    "--strict",
+    action=argparse.BooleanOptionalAction,
+    help=(
+        "Strict check, fail when a module is missing or fails to import"
+        "(default: no, unless env var PYTHONSTRICTEXTENSIONBUILD is set)"
+    ),
+    default=bool(os.environ.get("PYTHONSTRICTEXTENSIONBUILD")),
+)
+
+parser.add_argument(
+    "--cross-compiling",
+    action=argparse.BooleanOptionalAction,
+    help=(
+        "Use cross-compiling checks "
+        "(default: no, unless env var _PYTHON_HOST_PLATFORM is set)."
+    ),
+    default="_PYTHON_HOST_PLATFORM" in os.environ,
+)
+
+parser.add_argument(
+    "--list-module-names",
+    action="store_true",
+    help="Print a list of module names to stdout and exit",
+)
+
+
+class ModuleState(enum.Enum):
+    # Makefile state "yes"
+    BUILTIN = "builtin"
+    SHARED = "shared"
+
+    DISABLED = "disabled"
+    MISSING = "missing"
+    NA = "n/a"
+    # disabled by Setup / makesetup rule
+    DISABLED_SETUP = "disabled_setup"
+
+    def __bool__(self):
+        return self.value in {"builtin", "shared"}
+
+
+ModuleInfo = collections.namedtuple("ModuleInfo", "name state")
+
+
+class ModuleChecker:
+    pybuilddir_txt = "pybuilddir.txt"
+
+    setup_files = (
+        # see end of configure.ac
+        "Modules/Setup.local",
+        "Modules/Setup.stdlib",
+        "Modules/Setup.bootstrap",
+        SRC_DIR / "Modules/Setup",
+    )
+
+    def __init__(self, cross_compiling: bool = False, strict: bool = False):
+        self.cross_compiling = cross_compiling
+        self.strict_extensions_build = strict
+        self.ext_suffix = sysconfig.get_config_var("EXT_SUFFIX")
+        self.platform = sysconfig.get_platform()
+        self.builddir = self.get_builddir()
+        self.modules = self.get_modules()
+
+        self.builtin_ok = []
+        self.shared_ok = []
+        self.failed_on_import = []
+        self.missing = []
+        self.disabled_configure = []
+        self.disabled_setup = []
+        self.notavailable = []
+
+    def check(self):
+        for modinfo in self.modules:
+            logger.debug("Checking '%s' (%s)", modinfo.name, self.get_location(modinfo))
+            if modinfo.state == ModuleState.DISABLED:
+                self.disabled_configure.append(modinfo)
+            elif modinfo.state == ModuleState.DISABLED_SETUP:
+                self.disabled_setup.append(modinfo)
+            elif modinfo.state == ModuleState.MISSING:
+                self.missing.append(modinfo)
+            elif modinfo.state == ModuleState.NA:
+                self.notavailable.append(modinfo)
+            else:
+                try:
+                    if self.cross_compiling:
+                        self.check_module_cross(modinfo)
+                    else:
+                        self.check_module_import(modinfo)
+                except (ImportError, FileNotFoundError):
+                    self.rename_module(modinfo)
+                    self.failed_on_import.append(modinfo)
+                else:
+                    if modinfo.state == ModuleState.BUILTIN:
+                        self.builtin_ok.append(modinfo)
+                    else:
+                        assert modinfo.state == ModuleState.SHARED
+                        self.shared_ok.append(modinfo)
+
+    def summary(self, *, verbose: bool = False):
+        longest = max([len(e.name) for e in self.modules], default=0)
+
+        def print_three_column(modinfos: list[ModuleInfo]):
+            names = [modinfo.name for modinfo in modinfos]
+            names.sort(key=str.lower)
+            # guarantee zip() doesn't drop anything
+            while len(names) % 3:
+                names.append("")
+            for l, m, r in zip(names[::3], names[1::3], names[2::3]):
+                print("%-*s   %-*s   %-*s" % (longest, l, longest, m, longest, r))
+
+        if verbose and self.builtin_ok:
+            print("The following *built-in* modules have been successfully built:")
+            print_three_column(self.builtin_ok)
+            print()
+
+        if verbose and self.shared_ok:
+            print("The following *shared* modules have been successfully built:")
+            print_three_column(self.shared_ok)
+            print()
+
+        if self.disabled_configure:
+            print("The following modules are *disabled* in configure script:")
+            print_three_column(self.disabled_configure)
+            print()
+
+        if self.disabled_setup:
+            print("The following modules are *disabled* in Modules/Setup files:")
+            print_three_column(self.disabled_setup)
+            print()
+
+        if verbose and self.notavailable:
+            print(
+                f"The following modules are not available on platform '{self.platform}':"
+            )
+            print_three_column(self.notavailable)
+            print()
+
+        if self.missing:
+            print("The necessary bits to build these optional modules were not found:")
+            print_three_column(self.missing)
+            print("To find the necessary bits, look in configure.ac and config.log.")
+            print()
+
+        if self.failed_on_import:
+            print(
+                "Following modules built successfully "
+                "but were removed because they could not be imported:"
+            )
+            print_three_column(self.failed_on_import)
+            print()
+
+        if any(
+            modinfo.name == "_ssl" for modinfo in self.missing + self.failed_on_import
+        ):
+            print("Could not build the ssl module!")
+            print("Python requires a OpenSSL 1.1.1 or newer")
+            if sysconfig.get_config_var("OPENSSL_LDFLAGS"):
+                print("Custom linker flags may require --with-openssl-rpath=auto")
+            print()
+
+        disabled = len(self.disabled_configure) + len(self.disabled_setup)
+        print(
+            f"Checked {len(self.modules)} modules ("
+            f"{len(self.builtin_ok)} built-in, "
+            f"{len(self.shared_ok)} shared, "
+            f"{len(self.notavailable)} n/a on {self.platform}, "
+            f"{disabled} disabled, "
+            f"{len(self.missing)} missing, "
+            f"{len(self.failed_on_import)} failed on import)"
+        )
+
+    def check_strict_build(self):
+        """Fail if modules are missing and it's a strict build"""
+        if self.strict_extensions_build and (self.failed_on_import or self.missing):
+            raise RuntimeError("Failed to build some stdlib modules")
+
+    def list_module_names(self, *, all: bool = False) -> set:
+        names = {modinfo.name for modinfo in self.modules}
+        if all:
+            names.update(WINDOWS_MODULES)
+        return names
+
+    def get_builddir(self) -> pathlib.Path:
+        try:
+            with open(self.pybuilddir_txt, encoding="utf-8") as f:
+                builddir = f.read()
+        except FileNotFoundError:
+            logger.error("%s must be run from the top build directory", __file__)
+            raise
+        builddir = pathlib.Path(builddir)
+        logger.debug("%s: %s", self.pybuilddir_txt, builddir)
+        return builddir
+
+    def get_modules(self) -> list[ModuleInfo]:
+        """Get module info from sysconfig and Modules/Setup* files"""
+        seen = set()
+        modules = []
+        # parsing order is important, first entry wins
+        for modinfo in self.get_core_modules():
+            modules.append(modinfo)
+            seen.add(modinfo.name)
+        for setup_file in self.setup_files:
+            for modinfo in self.parse_setup_file(setup_file):
+                if modinfo.name not in seen:
+                    modules.append(modinfo)
+                    seen.add(modinfo.name)
+        for modinfo in self.get_sysconfig_modules():
+            if modinfo.name not in seen:
+                modules.append(modinfo)
+                seen.add(modinfo.name)
+        logger.debug("Found %i modules in total", len(modules))
+        modules.sort()
+        return modules
+
+    def get_core_modules(self) -> Iterable[ModuleInfo]:
+        """Get hard-coded core modules"""
+        for name in CORE_MODULES:
+            modinfo = ModuleInfo(name, ModuleState.BUILTIN)
+            logger.debug("Found core module %s", modinfo)
+            yield modinfo
+
+    def get_sysconfig_modules(self) -> Iterable[ModuleInfo]:
+        """Get modules defined in Makefile through sysconfig
+
+        MODBUILT_NAMES: modules in *static* block
+        MODSHARED_NAMES: modules in *shared* block
+        MODDISABLED_NAMES: modules in *disabled* block
+        """
+        moddisabled = set(sysconfig.get_config_var("MODDISABLED_NAMES").split())
+        if self.cross_compiling:
+            modbuiltin = set(sysconfig.get_config_var("MODBUILT_NAMES").split())
+        else:
+            modbuiltin = set(sys.builtin_module_names)
+
+        for key, value in sysconfig.get_config_vars().items():
+            if not key.startswith("MODULE_") or not key.endswith("_STATE"):
+                continue
+            if value not in {"yes", "disabled", "missing", "n/a"}:
+                raise ValueError(f"Unsupported value '{value}' for {key}")
+
+            modname = key[7:-6].lower()
+            if modname in moddisabled:
+                # Setup "*disabled*" rule
+                state = ModuleState.DISABLED_SETUP
+            elif value in {"disabled", "missing", "n/a"}:
+                state = ModuleState(value)
+            elif modname in modbuiltin:
+                assert value == "yes"
+                state = ModuleState.BUILTIN
+            else:
+                assert value == "yes"
+                state = ModuleState.SHARED
+
+            modinfo = ModuleInfo(modname, state)
+            logger.debug("Found %s in Makefile", modinfo)
+            yield modinfo
+
+    def parse_setup_file(self, setup_file: pathlib.Path) -> Iterable[ModuleInfo]:
+        """Parse a Modules/Setup file"""
+        assign_var = re.compile(r"^\w+=")  # EGG_SPAM=foo
+        # default to static module
+        state = ModuleState.BUILTIN
+        logger.debug("Parsing Setup file %s", setup_file)
+        with open(setup_file, encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line or line.startswith("#") or assign_var.match(line):
+                    continue
+                match line.split():
+                    case ["*shared*"]:
+                        state = ModuleState.SHARED
+                    case ["*static*"]:
+                        state = ModuleState.BUILTIN
+                    case ["*disabled*"]:
+                        state = ModuleState.DISABLED
+                    case ["*noconfig*"]:
+                        state = None
+                    case [*items]:
+                        if state == ModuleState.DISABLED:
+                            # *disabled* can disable multiple modules per line
+                            for item in items:
+                                modinfo = ModuleInfo(item, state)
+                                logger.debug("Found %s in %s", modinfo, setup_file)
+                                yield modinfo
+                        elif state in {ModuleState.SHARED, ModuleState.BUILTIN}:
+                            # *shared* and *static*, first item is the name of the module.
+                            modinfo = ModuleInfo(items[0], state)
+                            logger.debug("Found %s in %s", modinfo, setup_file)
+                            yield modinfo
+
+    def get_spec(self, modinfo: ModuleInfo) -> ModuleSpec:
+        """Get ModuleSpec for builtin or extension module"""
+        if modinfo.state == ModuleState.SHARED:
+            location = os.fspath(self.get_location(modinfo))
+            loader = ExtensionFileLoader(modinfo.name, location)
+            return spec_from_file_location(modinfo.name, location, loader=loader)
+        elif modinfo.state == ModuleState.BUILTIN:
+            return spec_from_loader(modinfo.name, loader=BuiltinImporter)
+        else:
+            raise ValueError(modinfo)
+
+    def get_location(self, modinfo: ModuleInfo) -> pathlib.Path:
+        """Get shared library location in build directory"""
+        if modinfo.state == ModuleState.SHARED:
+            return self.builddir / f"{modinfo.name}{self.ext_suffix}"
+        else:
+            return None
+
+    def _check_file(self, modinfo: ModuleInfo, spec: ModuleSpec):
+        """Check that the module file is present and not empty"""
+        if spec.loader is BuiltinImporter:
+            return
+        try:
+            st = os.stat(spec.origin)
+        except FileNotFoundError:
+            logger.error("%s (%s) is missing", modinfo.name, spec.origin)
+            raise
+        if not st.st_size:
+            raise ImportError(f"{spec.origin} is an empty file")
+
+    def check_module_import(self, modinfo: ModuleInfo):
+        """Attempt to import module and report errors"""
+        spec = self.get_spec(modinfo)
+        self._check_file(modinfo, spec)
+        try:
+            with warnings.catch_warnings():
+                # ignore deprecation warning from deprecated modules
+                warnings.simplefilter("ignore", DeprecationWarning)
+                bootstrap_load(spec)
+        except ImportError as e:
+            logger.error("%s failed to import: %s", modinfo.name, e)
+            raise
+        except Exception as e:
+            logger.exception("Importing extension '%s' failed!", modinfo.name)
+            raise
+
+    def check_module_cross(self, modinfo: ModuleInfo):
+        """Sanity check for cross compiling"""
+        spec = self.get_spec(modinfo)
+        self._check_file(modinfo, spec)
+
+    def rename_module(self, modinfo: ModuleInfo) -> None:
+        """Rename module file"""
+        if modinfo.state == ModuleState.BUILTIN:
+            logger.error("Cannot mark builtin module '%s' as failed!", modinfo.name)
+            return
+
+        failed_name = f"{modinfo.name}_failed{self.ext_suffix}"
+        builddir_path = self.get_location(modinfo)
+        if builddir_path.is_symlink():
+            symlink = builddir_path
+            module_path = builddir_path.resolve().relative_to(os.getcwd())
+            failed_path = module_path.parent / failed_name
+        else:
+            symlink = None
+            module_path = builddir_path
+            failed_path = self.builddir / failed_name
+
+        # remove old failed file
+        failed_path.unlink(missing_ok=True)
+        # remove symlink
+        if symlink is not None:
+            symlink.unlink(missing_ok=True)
+        # rename shared extension file
+        try:
+            module_path.rename(failed_path)
+        except FileNotFoundError:
+            logger.debug("Shared extension file '%s' does not exist.", module_path)
+        else:
+            logger.debug("Rename '%s' -> '%s'", module_path, failed_path)
+
+
+def main():
+    args = parser.parse_args()
+    if args.debug:
+        args.verbose = True
+    logging.basicConfig(
+        level=logging.DEBUG if args.debug else logging.INFO,
+        format="[%(levelname)s] %(message)s",
+    )
+
+    checker = ModuleChecker(
+        cross_compiling=args.cross_compiling,
+        strict=args.strict,
+    )
+    if args.list_module_names:
+        names = checker.list_module_names(all=True)
+        for name in sorted(names):
+            print(name)
+    else:
+        checker.check()
+        checker.summary(verbose=args.verbose)
+        try:
+            checker.check_strict_build()
+        except RuntimeError as e:
+            parser.exit(1, f"\nError: {e}\n")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/build/deepfreeze.py b/Tools/build/deepfreeze.py
new file mode 100644
index 0000000..28ac2b1
--- /dev/null
+++ b/Tools/build/deepfreeze.py
@@ -0,0 +1,504 @@
+"""Deep freeze
+
+The script may be executed by _bootstrap_python interpreter.
+Shared library extension modules are not available in that case.
+On Windows, and in cross-compilation cases, it is executed
+by Python 3.10, and 3.11 features are not available.
+"""
+import argparse
+import ast
+import builtins
+import collections
+import contextlib
+import os
+import re
+import time
+import types
+from typing import Dict, FrozenSet, TextIO, Tuple
+
+import umarshal
+from generate_global_objects import get_identifiers_and_strings
+
+verbose = False
+identifiers, strings = get_identifiers_and_strings()
+
+# This must be kept in sync with opcode.py
+RESUME = 151
+
+def isprintable(b: bytes) -> bool:
+    return all(0x20 <= c < 0x7f for c in b)
+
+
+def make_string_literal(b: bytes) -> str:
+    res = ['"']
+    if isprintable(b):
+        res.append(b.decode("ascii").replace("\\", "\\\\").replace("\"", "\\\""))
+    else:
+        for i in b:
+            res.append(f"\\x{i:02x}")
+    res.append('"')
+    return "".join(res)
+
+
+CO_FAST_LOCAL = 0x20
+CO_FAST_CELL = 0x40
+CO_FAST_FREE = 0x80
+
+
+def get_localsplus(code: types.CodeType):
+    a = collections.defaultdict(int)
+    for name in code.co_varnames:
+        a[name] |= CO_FAST_LOCAL
+    for name in code.co_cellvars:
+        a[name] |= CO_FAST_CELL
+    for name in code.co_freevars:
+        a[name] |= CO_FAST_FREE
+    return tuple(a.keys()), bytes(a.values())
+
+
+def get_localsplus_counts(code: types.CodeType,
+                          names: Tuple[str, ...],
+                          kinds: bytes) -> Tuple[int, int, int, int]:
+    nlocals = 0
+    nplaincellvars = 0
+    ncellvars = 0
+    nfreevars = 0
+    assert len(names) == len(kinds)
+    for name, kind in zip(names, kinds):
+        if kind & CO_FAST_LOCAL:
+            nlocals += 1
+            if kind & CO_FAST_CELL:
+                ncellvars += 1
+        elif kind & CO_FAST_CELL:
+            ncellvars += 1
+            nplaincellvars += 1
+        elif kind & CO_FAST_FREE:
+            nfreevars += 1
+    assert nlocals == len(code.co_varnames) == code.co_nlocals, \
+        (nlocals, len(code.co_varnames), code.co_nlocals)
+    assert ncellvars == len(code.co_cellvars)
+    assert nfreevars == len(code.co_freevars)
+    assert len(names) == nlocals + nplaincellvars + nfreevars
+    return nlocals, nplaincellvars, ncellvars, nfreevars
+
+
+PyUnicode_1BYTE_KIND = 1
+PyUnicode_2BYTE_KIND = 2
+PyUnicode_4BYTE_KIND = 4
+
+
+def analyze_character_width(s: str) -> Tuple[int, bool]:
+    maxchar = ' '
+    for c in s:
+        maxchar = max(maxchar, c)
+    ascii = False
+    if maxchar <= '\xFF':
+        kind = PyUnicode_1BYTE_KIND
+        ascii = maxchar <= '\x7F'
+    elif maxchar <= '\uFFFF':
+        kind = PyUnicode_2BYTE_KIND
+    else:
+        kind = PyUnicode_4BYTE_KIND
+    return kind, ascii
+
+
+def removesuffix(base: str, suffix: str) -> str:
+    if base.endswith(suffix):
+        return base[:len(base) - len(suffix)]
+    return base
+
+class Printer:
+
+    def __init__(self, file: TextIO) -> None:
+        self.level = 0
+        self.file = file
+        self.cache: Dict[tuple[type, object, str], str] = {}
+        self.hits, self.misses = 0, 0
+        self.patchups: list[str] = []
+        self.deallocs: list[str] = []
+        self.interns: list[str] = []
+        self.write('#include "Python.h"')
+        self.write('#include "internal/pycore_gc.h"')
+        self.write('#include "internal/pycore_code.h"')
+        self.write('#include "internal/pycore_frame.h"')
+        self.write('#include "internal/pycore_long.h"')
+        self.write("")
+
+    @contextlib.contextmanager
+    def indent(self) -> None:
+        save_level = self.level
+        try:
+            self.level += 1
+            yield
+        finally:
+            self.level = save_level
+
+    def write(self, arg: str) -> None:
+        self.file.writelines(("    "*self.level, arg, "\n"))
+
+    @contextlib.contextmanager
+    def block(self, prefix: str, suffix: str = "") -> None:
+        self.write(prefix + " {")
+        with self.indent():
+            yield
+        self.write("}" + suffix)
+
+    def object_head(self, typename: str) -> None:
+        with self.block(".ob_base =", ","):
+            self.write(f".ob_refcnt = 999999999,")
+            self.write(f".ob_type = &{typename},")
+
+    def object_var_head(self, typename: str, size: int) -> None:
+        with self.block(".ob_base =", ","):
+            self.object_head(typename)
+            self.write(f".ob_size = {size},")
+
+    def field(self, obj: object, name: str) -> None:
+        self.write(f".{name} = {getattr(obj, name)},")
+
+    def generate_bytes(self, name: str, b: bytes) -> str:
+        if b == b"":
+            return "(PyObject *)&_Py_SINGLETON(bytes_empty)"
+        if len(b) == 1:
+            return f"(PyObject *)&_Py_SINGLETON(bytes_characters[{b[0]}])"
+        self.write("static")
+        with self.indent():
+            with self.block("struct"):
+                self.write("PyObject_VAR_HEAD")
+                self.write("Py_hash_t ob_shash;")
+                self.write(f"char ob_sval[{len(b) + 1}];")
+        with self.block(f"{name} =", ";"):
+            self.object_var_head("PyBytes_Type", len(b))
+            self.write(".ob_shash = -1,")
+            self.write(f".ob_sval = {make_string_literal(b)},")
+        return f"& {name}.ob_base.ob_base"
+
+    def generate_unicode(self, name: str, s: str) -> str:
+        if s in strings:
+            return f"&_Py_STR({strings[s]})"
+        if s in identifiers:
+            return f"&_Py_ID({s})"
+        if re.match(r'\A[A-Za-z0-9_]+\Z', s):
+            name = f"const_str_{s}"
+        kind, ascii = analyze_character_width(s)
+        if kind == PyUnicode_1BYTE_KIND:
+            datatype = "uint8_t"
+        elif kind == PyUnicode_2BYTE_KIND:
+            datatype = "uint16_t"
+        else:
+            datatype = "uint32_t"
+        self.write("static")
+        with self.indent():
+            with self.block("struct"):
+                if ascii:
+                    self.write("PyASCIIObject _ascii;")
+                else:
+                    self.write("PyCompactUnicodeObject _compact;")
+                self.write(f"{datatype} _data[{len(s)+1}];")
+        with self.block(f"{name} =", ";"):
+            if ascii:
+                with self.block("._ascii =", ","):
+                    self.object_head("PyUnicode_Type")
+                    self.write(f".length = {len(s)},")
+                    self.write(".hash = -1,")
+                    with self.block(".state =", ","):
+                        self.write(".kind = 1,")
+                        self.write(".compact = 1,")
+                        self.write(".ascii = 1,")
+                self.write(f"._data = {make_string_literal(s.encode('ascii'))},")
+                return f"& {name}._ascii.ob_base"
+            else:
+                with self.block("._compact =", ","):
+                    with self.block("._base =", ","):
+                        self.object_head("PyUnicode_Type")
+                        self.write(f".length = {len(s)},")
+                        self.write(".hash = -1,")
+                        with self.block(".state =", ","):
+                            self.write(f".kind = {kind},")
+                            self.write(".compact = 1,")
+                            self.write(".ascii = 0,")
+                    utf8 = s.encode('utf-8')
+                    self.write(f'.utf8 = {make_string_literal(utf8)},')
+                    self.write(f'.utf8_length = {len(utf8)},')
+                with self.block(f"._data =", ","):
+                    for i in range(0, len(s), 16):
+                        data = s[i:i+16]
+                        self.write(", ".join(map(str, map(ord, data))) + ",")
+                return f"& {name}._compact._base.ob_base"
+
+
+    def generate_code(self, name: str, code: types.CodeType) -> str:
+        # The ordering here matches PyCode_NewWithPosOnlyArgs()
+        # (but see below).
+        co_consts = self.generate(name + "_consts", code.co_consts)
+        co_names = self.generate(name + "_names", code.co_names)
+        co_filename = self.generate(name + "_filename", code.co_filename)
+        co_name = self.generate(name + "_name", code.co_name)
+        co_qualname = self.generate(name + "_qualname", code.co_qualname)
+        co_linetable = self.generate(name + "_linetable", code.co_linetable)
+        co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable)
+        # These fields are not directly accessible
+        localsplusnames, localspluskinds = get_localsplus(code)
+        co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames)
+        co_localspluskinds = self.generate(name + "_localspluskinds", localspluskinds)
+        # Derived values
+        nlocals, nplaincellvars, ncellvars, nfreevars = \
+            get_localsplus_counts(code, localsplusnames, localspluskinds)
+        co_code_adaptive = make_string_literal(code.co_code)
+        self.write("static")
+        with self.indent():
+            self.write(f"struct _PyCode_DEF({len(code.co_code)})")
+        with self.block(f"{name} =", ";"):
+            self.object_var_head("PyCode_Type", len(code.co_code) // 2)
+            # But the ordering here must match that in cpython/code.h
+            # (which is a pain because we tend to reorder those for perf)
+            # otherwise MSVC doesn't like it.
+            self.write(f".co_consts = {co_consts},")
+            self.write(f".co_names = {co_names},")
+            self.write(f".co_exceptiontable = {co_exceptiontable},")
+            self.field(code, "co_flags")
+            self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,")
+            self.write("._co_linearray_entry_size = 0,")
+            self.field(code, "co_argcount")
+            self.field(code, "co_posonlyargcount")
+            self.field(code, "co_kwonlyargcount")
+            self.write(f".co_framesize = {code.co_stacksize + len(localsplusnames)} + FRAME_SPECIALS_SIZE,")
+            self.field(code, "co_stacksize")
+            self.field(code, "co_firstlineno")
+            self.write(f".co_nlocalsplus = {len(localsplusnames)},")
+            self.field(code, "co_nlocals")
+            self.write(f".co_nplaincellvars = {nplaincellvars},")
+            self.write(f".co_ncellvars = {ncellvars},")
+            self.write(f".co_nfreevars = {nfreevars},")
+            self.write(f".co_localsplusnames = {co_localsplusnames},")
+            self.write(f".co_localspluskinds = {co_localspluskinds},")
+            self.write(f".co_filename = {co_filename},")
+            self.write(f".co_name = {co_name},")
+            self.write(f".co_qualname = {co_qualname},")
+            self.write(f".co_linetable = {co_linetable},")
+            self.write(f"._co_cached = NULL,")
+            self.write("._co_linearray = NULL,")
+            self.write(f".co_code_adaptive = {co_code_adaptive},")
+            for i, op in enumerate(code.co_code[::2]):
+                if op == RESUME:
+                    self.write(f"._co_firsttraceable = {i},")
+                    break
+        name_as_code = f"(PyCodeObject *)&{name}"
+        self.deallocs.append(f"_PyStaticCode_Dealloc({name_as_code});")
+        self.interns.append(f"_PyStaticCode_InternStrings({name_as_code})")
+        return f"& {name}.ob_base.ob_base"
+
+    def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str:
+        if len(t) == 0:
+            return f"(PyObject *)& _Py_SINGLETON(tuple_empty)"
+        items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)]
+        self.write("static")
+        with self.indent():
+            with self.block("struct"):
+                self.write("PyGC_Head _gc_head;")
+                with self.block("struct", "_object;"):
+                    self.write("PyObject_VAR_HEAD")
+                    if t:
+                        self.write(f"PyObject *ob_item[{len(t)}];")
+        with self.block(f"{name} =", ";"):
+            with self.block("._object =", ","):
+                self.object_var_head("PyTuple_Type", len(t))
+                if items:
+                    with self.block(f".ob_item =", ","):
+                        for item in items:
+                            self.write(item + ",")
+        return f"& {name}._object.ob_base.ob_base"
+
+    def _generate_int_for_bits(self, name: str, i: int, digit: int) -> None:
+        sign = -1 if i < 0 else 0 if i == 0 else +1
+        i = abs(i)
+        digits: list[int] = []
+        while i:
+            i, rem = divmod(i, digit)
+            digits.append(rem)
+        self.write("static")
+        with self.indent():
+            with self.block("struct"):
+                self.write("PyObject_VAR_HEAD")
+                self.write(f"digit ob_digit[{max(1, len(digits))}];")
+        with self.block(f"{name} =", ";"):
+            self.object_var_head("PyLong_Type", sign*len(digits))
+            if digits:
+                ds = ", ".join(map(str, digits))
+                self.write(f".ob_digit = {{ {ds} }},")
+
+    def generate_int(self, name: str, i: int) -> str:
+        if -5 <= i <= 256:
+            return f"(PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + {i}]"
+        if i >= 0:
+            name = f"const_int_{i}"
+        else:
+            name = f"const_int_negative_{abs(i)}"
+        if abs(i) < 2**15:
+            self._generate_int_for_bits(name, i, 2**15)
+        else:
+            connective = "if"
+            for bits_in_digit in 15, 30:
+                self.write(f"#{connective} PYLONG_BITS_IN_DIGIT == {bits_in_digit}")
+                self._generate_int_for_bits(name, i, 2**bits_in_digit)
+                connective = "elif"
+            self.write("#else")
+            self.write('#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"')
+            self.write("#endif")
+            # If neither clause applies, it won't compile
+        return f"& {name}.ob_base.ob_base"
+
+    def generate_float(self, name: str, x: float) -> str:
+        with self.block(f"static PyFloatObject {name} =", ";"):
+            self.object_head("PyFloat_Type")
+            self.write(f".ob_fval = {x},")
+        return f"&{name}.ob_base"
+
+    def generate_complex(self, name: str, z: complex) -> str:
+        with self.block(f"static PyComplexObject {name} =", ";"):
+            self.object_head("PyComplex_Type")
+            self.write(f".cval = {{ {z.real}, {z.imag} }},")
+        return f"&{name}.ob_base"
+
+    def generate_frozenset(self, name: str, fs: FrozenSet[object]) -> str:
+        try:
+            fs = sorted(fs)
+        except TypeError:
+            # frozen set with incompatible types, fallback to repr()
+            fs = sorted(fs, key=repr)
+        ret = self.generate_tuple(name, tuple(fs))
+        self.write("// TODO: The above tuple should be a frozenset")
+        return ret
+
+    def generate_file(self, module: str, code: object)-> None:
+        module = module.replace(".", "_")
+        self.generate(f"{module}_toplevel", code)
+        with self.block(f"static void {module}_do_patchups(void)"):
+            for p in self.patchups:
+                self.write(p)
+        self.patchups.clear()
+        self.write(EPILOGUE.replace("%%NAME%%", module))
+
+    def generate(self, name: str, obj: object) -> str:
+        # Use repr() in the key to distinguish -0.0 from +0.0
+        key = (type(obj), obj, repr(obj))
+        if key in self.cache:
+            self.hits += 1
+            # print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}")
+            return self.cache[key]
+        self.misses += 1
+        if isinstance(obj, (types.CodeType, umarshal.Code)) :
+            val = self.generate_code(name, obj)
+        elif isinstance(obj, tuple):
+            val = self.generate_tuple(name, obj)
+        elif isinstance(obj, str):
+            val = self.generate_unicode(name, obj)
+        elif isinstance(obj, bytes):
+            val = self.generate_bytes(name, obj)
+        elif obj is True:
+            return "Py_True"
+        elif obj is False:
+            return "Py_False"
+        elif isinstance(obj, int):
+            val = self.generate_int(name, obj)
+        elif isinstance(obj, float):
+            val = self.generate_float(name, obj)
+        elif isinstance(obj, complex):
+            val = self.generate_complex(name, obj)
+        elif isinstance(obj, frozenset):
+            val = self.generate_frozenset(name, obj)
+        elif obj is builtins.Ellipsis:
+            return "Py_Ellipsis"
+        elif obj is None:
+            return "Py_None"
+        else:
+            raise TypeError(
+                f"Cannot generate code for {type(obj).__name__} object")
+        # print(f"Cache store {key!r:.40}: {val!r:.40}")
+        self.cache[key] = val
+        return val
+
+
+EPILOGUE = """
+PyObject *
+_Py_get_%%NAME%%_toplevel(void)
+{
+    %%NAME%%_do_patchups();
+    return Py_NewRef((PyObject *) &%%NAME%%_toplevel);
+}
+"""
+
+FROZEN_COMMENT_C = "/* Auto-generated by Programs/_freeze_module.c */"
+FROZEN_COMMENT_PY = "/* Auto-generated by Programs/_freeze_module.py */"
+
+FROZEN_DATA_LINE = r"\s*(\d+,\s*)+\s*"
+
+
+def is_frozen_header(source: str) -> bool:
+    return source.startswith((FROZEN_COMMENT_C, FROZEN_COMMENT_PY))
+
+
+def decode_frozen_data(source: str) -> types.CodeType:
+    lines = source.splitlines()
+    while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None:
+        del lines[0]
+    while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None:
+        del lines[-1]
+    values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip())
+    data = bytes(values)
+    return umarshal.loads(data)
+
+
+def generate(args: list[str], output: TextIO) -> None:
+    printer = Printer(output)
+    for arg in args:
+        file, modname = arg.rsplit(':', 1)
+        with open(file, "r", encoding="utf8") as fd:
+            source = fd.read()
+            if is_frozen_header(source):
+                code = decode_frozen_data(source)
+            else:
+                code = compile(fd.read(), f"<frozen {modname}>", "exec")
+            printer.generate_file(modname, code)
+    with printer.block(f"void\n_Py_Deepfreeze_Fini(void)"):
+        for p in printer.deallocs:
+            printer.write(p)
+    with printer.block(f"int\n_Py_Deepfreeze_Init(void)"):
+        for p in printer.interns:
+            with printer.block(f"if ({p} < 0)"):
+                printer.write("return -1;")
+        printer.write("return 0;")
+    if verbose:
+        print(f"Cache hits: {printer.hits}, misses: {printer.misses}")
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("-o", "--output", help="Defaults to deepfreeze.c", default="deepfreeze.c")
+parser.add_argument("-v", "--verbose", action="store_true", help="Print diagnostics")
+parser.add_argument('args', nargs="+", help="Input file and module name (required) in file:modname format")
+
+@contextlib.contextmanager
+def report_time(label: str):
+    t0 = time.time()
+    try:
+        yield
+    finally:
+        t1 = time.time()
+    if verbose:
+        print(f"{label}: {t1-t0:.3f} sec")
+
+
+def main() -> None:
+    global verbose
+    args = parser.parse_args()
+    verbose = args.verbose
+    output = args.output
+    with open(output, "w", encoding="utf-8") as file:
+        with report_time("generate"):
+            generate(args.args, file)
+    if verbose:
+        print(f"Wrote {os.path.getsize(output)} bytes to {output}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/build/freeze_modules.py b/Tools/build/freeze_modules.py
new file mode 100644
index 0000000..810224b
--- /dev/null
+++ b/Tools/build/freeze_modules.py
@@ -0,0 +1,733 @@
+"""Freeze modules and regen related files (e.g. Python/frozen.c).
+
+See the notes at the top of Python/frozen.c for more info.
+"""
+
+from collections import namedtuple
+import hashlib
+import os
+import ntpath
+import posixpath
+import argparse
+from update_file import updating_file_with_tmpfile
+
+
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+ROOT_DIR = os.path.abspath(ROOT_DIR)
+FROZEN_ONLY = os.path.join(ROOT_DIR, 'Tools', 'freeze', 'flag.py')
+
+STDLIB_DIR = os.path.join(ROOT_DIR, 'Lib')
+# If FROZEN_MODULES_DIR or DEEPFROZEN_MODULES_DIR is changed then the
+# .gitattributes and .gitignore files needs to be updated.
+FROZEN_MODULES_DIR = os.path.join(ROOT_DIR, 'Python', 'frozen_modules')
+DEEPFROZEN_MODULES_DIR = os.path.join(ROOT_DIR, 'Python', 'deepfreeze')
+
+FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c')
+MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in')
+PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj')
+PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filters')
+PCBUILD_PYTHONCORE = os.path.join(ROOT_DIR, 'PCbuild', 'pythoncore.vcxproj')
+
+
+OS_PATH = 'ntpath' if os.name == 'nt' else 'posixpath'
+
+# These are modules that get frozen.
+TESTS_SECTION = 'Test module'
+FROZEN = [
+    # See parse_frozen_spec() for the format.
+    # In cases where the frozenid is duplicated, the first one is re-used.
+    ('import system', [
+        # These frozen modules are necessary for bootstrapping
+        # the import system.
+        'importlib._bootstrap : _frozen_importlib',
+        'importlib._bootstrap_external : _frozen_importlib_external',
+        # This module is important because some Python builds rely
+        # on a builtin zip file instead of a filesystem.
+        'zipimport',
+        ]),
+    ('stdlib - startup, without site (python -S)', [
+        'abc',
+        'codecs',
+        # For now we do not freeze the encodings, due # to the noise all
+        # those extra modules add to the text printed during the build.
+        # (See https://github.com/python/cpython/pull/28398#pullrequestreview-756856469.)
+        #'<encodings.*>',
+        'io',
+        ]),
+    ('stdlib - startup, with site', [
+        '_collections_abc',
+        '_sitebuiltins',
+        'genericpath',
+        'ntpath',
+        'posixpath',
+        # We must explicitly mark os.path as a frozen module
+        # even though it will never be imported.
+        f'{OS_PATH} : os.path',
+        'os',
+        'site',
+        'stat',
+        ]),
+    ('runpy - run module with -m', [
+        "importlib.util",
+        "importlib.machinery",
+        "runpy",
+    ]),
+    (TESTS_SECTION, [
+        '__hello__',
+        '__hello__ : __hello_alias__',
+        '__hello__ : <__phello_alias__>',
+        '__hello__ : __phello_alias__.spam',
+        '<__phello__.**.*>',
+        f'frozen_only : __hello_only__ = {FROZEN_ONLY}',
+        ]),
+]
+BOOTSTRAP = {
+    'importlib._bootstrap',
+    'importlib._bootstrap_external',
+    'zipimport',
+}
+
+
+#######################################
+# platform-specific helpers
+
+if os.path is posixpath:
+    relpath_for_posix_display = os.path.relpath
+
+    def relpath_for_windows_display(path, base):
+        return ntpath.relpath(
+            ntpath.join(*path.split(os.path.sep)),
+            ntpath.join(*base.split(os.path.sep)),
+        )
+
+else:
+    relpath_for_windows_display = ntpath.relpath
+
+    def relpath_for_posix_display(path, base):
+        return posixpath.relpath(
+            posixpath.join(*path.split(os.path.sep)),
+            posixpath.join(*base.split(os.path.sep)),
+        )
+
+
+#######################################
+# specs
+
+def parse_frozen_specs():
+    seen = {}
+    for section, specs in FROZEN:
+        parsed = _parse_specs(specs, section, seen)
+        for item in parsed:
+            frozenid, pyfile, modname, ispkg, section = item
+            try:
+                source = seen[frozenid]
+            except KeyError:
+                source = FrozenSource.from_id(frozenid, pyfile)
+                seen[frozenid] = source
+            else:
+                assert not pyfile or pyfile == source.pyfile, item
+            yield FrozenModule(modname, ispkg, section, source)
+
+
+def _parse_specs(specs, section, seen):
+    for spec in specs:
+        info, subs = _parse_spec(spec, seen, section)
+        yield info
+        for info in subs or ():
+            yield info
+
+
+def _parse_spec(spec, knownids=None, section=None):
+    """Yield an info tuple for each module corresponding to the given spec.
+
+    The info consists of: (frozenid, pyfile, modname, ispkg, section).
+
+    Supported formats:
+
+      frozenid
+      frozenid : modname
+      frozenid : modname = pyfile
+
+    "frozenid" and "modname" must be valid module names (dot-separated
+    identifiers).  If "modname" is not provided then "frozenid" is used.
+    If "pyfile" is not provided then the filename of the module
+    corresponding to "frozenid" is used.
+
+    Angle brackets around a frozenid (e.g. '<encodings>") indicate
+    it is a package.  This also means it must be an actual module
+    (i.e. "pyfile" cannot have been provided).  Such values can have
+    patterns to expand submodules:
+
+      <encodings.*>    - also freeze all direct submodules
+      <encodings.**.*> - also freeze the full submodule tree
+
+    As with "frozenid", angle brackets around "modname" indicate
+    it is a package.  However, in this case "pyfile" should not
+    have been provided and patterns in "modname" are not supported.
+    Also, if "modname" has brackets then "frozenid" should not,
+    and "pyfile" should have been provided..
+    """
+    frozenid, _, remainder = spec.partition(':')
+    modname, _, pyfile = remainder.partition('=')
+    frozenid = frozenid.strip()
+    modname = modname.strip()
+    pyfile = pyfile.strip()
+
+    submodules = None
+    if modname.startswith('<') and modname.endswith('>'):
+        assert check_modname(frozenid), spec
+        modname = modname[1:-1]
+        assert check_modname(modname), spec
+        if frozenid in knownids:
+            pass
+        elif pyfile:
+            assert not os.path.isdir(pyfile), spec
+        else:
+            pyfile = _resolve_module(frozenid, ispkg=False)
+        ispkg = True
+    elif pyfile:
+        assert check_modname(frozenid), spec
+        assert not knownids or frozenid not in knownids, spec
+        assert check_modname(modname), spec
+        assert not os.path.isdir(pyfile), spec
+        ispkg = False
+    elif knownids and frozenid in knownids:
+        assert check_modname(frozenid), spec
+        assert check_modname(modname), spec
+        ispkg = False
+    else:
+        assert not modname or check_modname(modname), spec
+        resolved = iter(resolve_modules(frozenid))
+        frozenid, pyfile, ispkg = next(resolved)
+        if not modname:
+            modname = frozenid
+        if ispkg:
+            pkgid = frozenid
+            pkgname = modname
+            pkgfiles = {pyfile: pkgid}
+            def iter_subs():
+                for frozenid, pyfile, ispkg in resolved:
+                    if pkgname:
+                        modname = frozenid.replace(pkgid, pkgname, 1)
+                    else:
+                        modname = frozenid
+                    if pyfile:
+                        if pyfile in pkgfiles:
+                            frozenid = pkgfiles[pyfile]
+                            pyfile = None
+                        elif ispkg:
+                            pkgfiles[pyfile] = frozenid
+                    yield frozenid, pyfile, modname, ispkg, section
+            submodules = iter_subs()
+
+    info = (frozenid, pyfile or None, modname, ispkg, section)
+    return info, submodules
+
+
+#######################################
+# frozen source files
+
+class FrozenSource(namedtuple('FrozenSource', 'id pyfile frozenfile deepfreezefile')):
+
+    @classmethod
+    def from_id(cls, frozenid, pyfile=None):
+        if not pyfile:
+            pyfile = os.path.join(STDLIB_DIR, *frozenid.split('.')) + '.py'
+            #assert os.path.exists(pyfile), (frozenid, pyfile)
+        frozenfile = resolve_frozen_file(frozenid, FROZEN_MODULES_DIR)
+        deepfreezefile = resolve_frozen_file(frozenid, DEEPFROZEN_MODULES_DIR)
+        return cls(frozenid, pyfile, frozenfile, deepfreezefile)
+
+    @property
+    def frozenid(self):
+        return self.id
+
+    @property
+    def modname(self):
+        if self.pyfile.startswith(STDLIB_DIR):
+            return self.id
+        return None
+
+    @property
+    def symbol(self):
+        # This matches what we do in Programs/_freeze_module.c:
+        name = self.frozenid.replace('.', '_')
+        return '_Py_M__' + name
+
+    @property
+    def ispkg(self):
+        if not self.pyfile:
+            return False
+        elif self.frozenid.endswith('.__init__'):
+            return False
+        else:
+            return os.path.basename(self.pyfile) == '__init__.py'
+
+    @property
+    def isbootstrap(self):
+        return self.id in BOOTSTRAP
+
+
+def resolve_frozen_file(frozenid, destdir):
+    """Return the filename corresponding to the given frozen ID.
+
+    For stdlib modules the ID will always be the full name
+    of the source module.
+    """
+    if not isinstance(frozenid, str):
+        try:
+            frozenid = frozenid.frozenid
+        except AttributeError:
+            raise ValueError(f'unsupported frozenid {frozenid!r}')
+    # We use a consistent naming convention for all frozen modules.
+    frozenfile = f'{frozenid}.h'
+    if not destdir:
+        return frozenfile
+    return os.path.join(destdir, frozenfile)
+
+
+#######################################
+# frozen modules
+
+class FrozenModule(namedtuple('FrozenModule', 'name ispkg section source')):
+
+    def __getattr__(self, name):
+        return getattr(self.source, name)
+
+    @property
+    def modname(self):
+        return self.name
+
+    @property
+    def orig(self):
+        return self.source.modname
+
+    @property
+    def isalias(self):
+        orig = self.source.modname
+        if not orig:
+            return True
+        return self.name != orig
+
+    def summarize(self):
+        source = self.source.modname
+        if source:
+            source = f'<{source}>'
+        else:
+            source = relpath_for_posix_display(self.pyfile, ROOT_DIR)
+        return {
+            'module': self.name,
+            'ispkg': self.ispkg,
+            'source': source,
+            'frozen': os.path.basename(self.frozenfile),
+            'checksum': _get_checksum(self.frozenfile),
+        }
+
+
+def _iter_sources(modules):
+    seen = set()
+    for mod in modules:
+        if mod.source not in seen:
+            yield mod.source
+            seen.add(mod.source)
+
+
+#######################################
+# generic helpers
+
+def _get_checksum(filename):
+    with open(filename, "rb") as infile:
+        contents = infile.read()
+    m = hashlib.sha256()
+    m.update(contents)
+    return m.hexdigest()
+
+
+def resolve_modules(modname, pyfile=None):
+    if modname.startswith('<') and modname.endswith('>'):
+        if pyfile:
+            assert os.path.isdir(pyfile) or os.path.basename(pyfile) == '__init__.py', pyfile
+        ispkg = True
+        modname = modname[1:-1]
+        rawname = modname
+        # For now, we only expect match patterns at the end of the name.
+        _modname, sep, match = modname.rpartition('.')
+        if sep:
+            if _modname.endswith('.**'):
+                modname = _modname[:-3]
+                match = f'**.{match}'
+            elif match and not match.isidentifier():
+                modname = _modname
+            # Otherwise it's a plain name so we leave it alone.
+        else:
+            match = None
+    else:
+        ispkg = False
+        rawname = modname
+        match = None
+
+    if not check_modname(modname):
+        raise ValueError(f'not a valid module name ({rawname})')
+
+    if not pyfile:
+        pyfile = _resolve_module(modname, ispkg=ispkg)
+    elif os.path.isdir(pyfile):
+        pyfile = _resolve_module(modname, pyfile, ispkg)
+    yield modname, pyfile, ispkg
+
+    if match:
+        pkgdir = os.path.dirname(pyfile)
+        yield from iter_submodules(modname, pkgdir, match)
+
+
+def check_modname(modname):
+    return all(n.isidentifier() for n in modname.split('.'))
+
+
+def iter_submodules(pkgname, pkgdir=None, match='*'):
+    if not pkgdir:
+        pkgdir = os.path.join(STDLIB_DIR, *pkgname.split('.'))
+    if not match:
+        match = '**.*'
+    match_modname = _resolve_modname_matcher(match, pkgdir)
+
+    def _iter_submodules(pkgname, pkgdir):
+        for entry in sorted(os.scandir(pkgdir), key=lambda e: e.name):
+            matched, recursive = match_modname(entry.name)
+            if not matched:
+                continue
+            modname = f'{pkgname}.{entry.name}'
+            if modname.endswith('.py'):
+                yield modname[:-3], entry.path, False
+            elif entry.is_dir():
+                pyfile = os.path.join(entry.path, '__init__.py')
+                # We ignore namespace packages.
+                if os.path.exists(pyfile):
+                    yield modname, pyfile, True
+                    if recursive:
+                        yield from _iter_submodules(modname, entry.path)
+
+    return _iter_submodules(pkgname, pkgdir)
+
+
+def _resolve_modname_matcher(match, rootdir=None):
+    if isinstance(match, str):
+        if match.startswith('**.'):
+            recursive = True
+            pat = match[3:]
+            assert match
+        else:
+            recursive = False
+            pat = match
+
+        if pat == '*':
+            def match_modname(modname):
+                return True, recursive
+        else:
+            raise NotImplementedError(match)
+    elif callable(match):
+        match_modname = match(rootdir)
+    else:
+        raise ValueError(f'unsupported matcher {match!r}')
+    return match_modname
+
+
+def _resolve_module(modname, pathentry=STDLIB_DIR, ispkg=False):
+    assert pathentry, pathentry
+    pathentry = os.path.normpath(pathentry)
+    assert os.path.isabs(pathentry)
+    if ispkg:
+        return os.path.join(pathentry, *modname.split('.'), '__init__.py')
+    return os.path.join(pathentry, *modname.split('.')) + '.py'
+
+
+#######################################
+# regenerating dependent files
+
+def find_marker(lines, marker, file):
+    for pos, line in enumerate(lines):
+        if marker in line:
+            return pos
+    raise Exception(f"Can't find {marker!r} in file {file}")
+
+
+def replace_block(lines, start_marker, end_marker, replacements, file):
+    start_pos = find_marker(lines, start_marker, file)
+    end_pos = find_marker(lines, end_marker, file)
+    if end_pos <= start_pos:
+        raise Exception(f"End marker {end_marker!r} "
+                        f"occurs before start marker {start_marker!r} "
+                        f"in file {file}")
+    replacements = [line.rstrip() + '\n' for line in replacements]
+    return lines[:start_pos + 1] + replacements + lines[end_pos:]
+
+
+def regen_frozen(modules, frozen_modules: bool):
+    headerlines = []
+    parentdir = os.path.dirname(FROZEN_FILE)
+    if frozen_modules:
+        for src in _iter_sources(modules):
+            # Adding a comment to separate sections here doesn't add much,
+            # so we don't.
+            header = relpath_for_posix_display(src.frozenfile, parentdir)
+            headerlines.append(f'#include "{header}"')
+
+    externlines = []
+    bootstraplines = []
+    stdliblines = []
+    testlines = []
+    aliaslines = []
+    indent = '    '
+    lastsection = None
+    for mod in modules:
+        if mod.isbootstrap:
+            lines = bootstraplines
+        elif mod.section == TESTS_SECTION:
+            lines = testlines
+        else:
+            lines = stdliblines
+            if mod.section != lastsection:
+                if lastsection is not None:
+                    lines.append('')
+                lines.append(f'/* {mod.section} */')
+            lastsection = mod.section
+
+        # Also add a extern declaration for the corresponding
+        # deepfreeze-generated function.
+        orig_name = mod.source.id
+        code_name = orig_name.replace(".", "_")
+        get_code_name = "_Py_get_%s_toplevel" % code_name
+        externlines.append("extern PyObject *%s(void);" % get_code_name)
+
+        symbol = mod.symbol
+        pkg = 'true' if mod.ispkg else 'false'
+        if not frozen_modules:
+            line = ('{"%s", NULL, 0, %s, GET_CODE(%s)},'
+                ) % (mod.name, pkg, code_name)
+        else:
+            line = ('{"%s", %s, (int)sizeof(%s), %s, GET_CODE(%s)},'
+                ) % (mod.name, symbol, symbol, pkg, code_name)
+        lines.append(line)
+
+        if mod.isalias:
+            if not mod.orig:
+                entry = '{"%s", NULL},' % (mod.name,)
+            elif mod.source.ispkg:
+                entry = '{"%s", "<%s"},' % (mod.name, mod.orig)
+            else:
+                entry = '{"%s", "%s"},' % (mod.name, mod.orig)
+            aliaslines.append(indent + entry)
+
+    for lines in (bootstraplines, stdliblines, testlines):
+        # TODO: Is this necessary any more?
+        if not lines[0]:
+            del lines[0]
+        for i, line in enumerate(lines):
+            if line:
+                lines[i] = indent + line
+
+    print(f'# Updating {os.path.relpath(FROZEN_FILE)}')
+    with updating_file_with_tmpfile(FROZEN_FILE) as (infile, outfile):
+        lines = infile.readlines()
+        # TODO: Use more obvious markers, e.g.
+        # $START GENERATED FOOBAR$ / $END GENERATED FOOBAR$
+        lines = replace_block(
+            lines,
+            "/* Includes for frozen modules: */",
+            "/* End includes */",
+            headerlines,
+            FROZEN_FILE,
+        )
+        lines = replace_block(
+            lines,
+            "/* Start extern declarations */",
+            "/* End extern declarations */",
+            externlines,
+            FROZEN_FILE,
+        )
+        lines = replace_block(
+            lines,
+            "static const struct _frozen bootstrap_modules[] =",
+            "/* bootstrap sentinel */",
+            bootstraplines,
+            FROZEN_FILE,
+        )
+        lines = replace_block(
+            lines,
+            "static const struct _frozen stdlib_modules[] =",
+            "/* stdlib sentinel */",
+            stdliblines,
+            FROZEN_FILE,
+        )
+        lines = replace_block(
+            lines,
+            "static const struct _frozen test_modules[] =",
+            "/* test sentinel */",
+            testlines,
+            FROZEN_FILE,
+        )
+        lines = replace_block(
+            lines,
+            "const struct _module_alias aliases[] =",
+            "/* aliases sentinel */",
+            aliaslines,
+            FROZEN_FILE,
+        )
+        outfile.writelines(lines)
+
+
+def regen_makefile(modules):
+    pyfiles = []
+    frozenfiles = []
+    rules = ['']
+    deepfreezerules = ["Python/deepfreeze/deepfreeze.c: $(DEEPFREEZE_DEPS)",
+                       "\t$(PYTHON_FOR_FREEZE) $(srcdir)/Tools/build/deepfreeze.py \\"]
+    for src in _iter_sources(modules):
+        frozen_header = relpath_for_posix_display(src.frozenfile, ROOT_DIR)
+        frozenfiles.append(f'\t\t{frozen_header} \\')
+
+        pyfile = relpath_for_posix_display(src.pyfile, ROOT_DIR)
+        pyfiles.append(f'\t\t{pyfile} \\')
+
+        if src.isbootstrap:
+            freezecmd = '$(FREEZE_MODULE_BOOTSTRAP)'
+            freezedep = '$(FREEZE_MODULE_BOOTSTRAP_DEPS)'
+        else:
+            freezecmd = '$(FREEZE_MODULE)'
+            freezedep = '$(FREEZE_MODULE_DEPS)'
+
+        freeze = (f'{freezecmd} {src.frozenid} '
+                    f'$(srcdir)/{pyfile} {frozen_header}')
+        rules.extend([
+            f'{frozen_header}: {pyfile} {freezedep}',
+            f'\t{freeze}',
+            '',
+        ])
+        deepfreezerules.append(f"\t{frozen_header}:{src.frozenid} \\")
+    deepfreezerules.append('\t-o Python/deepfreeze/deepfreeze.c')
+    pyfiles[-1] = pyfiles[-1].rstrip(" \\")
+    frozenfiles[-1] = frozenfiles[-1].rstrip(" \\")
+
+    print(f'# Updating {os.path.relpath(MAKEFILE)}')
+    with updating_file_with_tmpfile(MAKEFILE) as (infile, outfile):
+        lines = infile.readlines()
+        lines = replace_block(
+            lines,
+            "FROZEN_FILES_IN =",
+            "# End FROZEN_FILES_IN",
+            pyfiles,
+            MAKEFILE,
+        )
+        lines = replace_block(
+            lines,
+            "FROZEN_FILES_OUT =",
+            "# End FROZEN_FILES_OUT",
+            frozenfiles,
+            MAKEFILE,
+        )
+        lines = replace_block(
+            lines,
+            "# BEGIN: freezing modules",
+            "# END: freezing modules",
+            rules,
+            MAKEFILE,
+        )
+        lines = replace_block(
+            lines,
+            "# BEGIN: deepfreeze modules",
+            "# END: deepfreeze modules",
+            deepfreezerules,
+            MAKEFILE,
+        )
+        outfile.writelines(lines)
+
+
+def regen_pcbuild(modules):
+    projlines = []
+    filterlines = []
+    corelines = []
+    deepfreezerules = ['\t<Exec Command=\'$(PythonForBuild) "$(PySourcePath)Tools\\build\\deepfreeze.py" ^']
+    for src in _iter_sources(modules):
+        pyfile = relpath_for_windows_display(src.pyfile, ROOT_DIR)
+        header = relpath_for_windows_display(src.frozenfile, ROOT_DIR)
+        intfile = ntpath.splitext(ntpath.basename(header))[0] + '.g.h'
+        projlines.append(f'    <None Include="..\\{pyfile}">')
+        projlines.append(f'      <ModName>{src.frozenid}</ModName>')
+        projlines.append(f'      <IntFile>$(IntDir){intfile}</IntFile>')
+        projlines.append(f'      <OutFile>$(PySourcePath){header}</OutFile>')
+        projlines.append(f'    </None>')
+
+        filterlines.append(f'    <None Include="..\\{pyfile}">')
+        filterlines.append('      <Filter>Python Files</Filter>')
+        filterlines.append('    </None>')
+        deepfreezerules.append(f'\t\t "$(PySourcePath){header}:{src.frozenid}" ^')
+    deepfreezerules.append('\t\t "-o" "$(PySourcePath)Python\\deepfreeze\\deepfreeze.c"\'/>' )
+
+    corelines.append(f'    <ClCompile Include="..\\Python\\deepfreeze\\deepfreeze.c" />')
+
+    print(f'# Updating {os.path.relpath(PCBUILD_PROJECT)}')
+    with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile):
+        lines = infile.readlines()
+        lines = replace_block(
+            lines,
+            '<!-- BEGIN frozen modules -->',
+            '<!-- END frozen modules -->',
+            projlines,
+            PCBUILD_PROJECT,
+        )
+        outfile.writelines(lines)
+    with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile):
+        lines = infile.readlines()
+        lines = replace_block(
+            lines,
+            '<!-- BEGIN deepfreeze rule -->',
+            '<!-- END deepfreeze rule -->',
+            deepfreezerules,
+            PCBUILD_PROJECT,
+        )
+        outfile.writelines(lines)
+    print(f'# Updating {os.path.relpath(PCBUILD_FILTERS)}')
+    with updating_file_with_tmpfile(PCBUILD_FILTERS) as (infile, outfile):
+        lines = infile.readlines()
+        lines = replace_block(
+            lines,
+            '<!-- BEGIN frozen modules -->',
+            '<!-- END frozen modules -->',
+            filterlines,
+            PCBUILD_FILTERS,
+        )
+        outfile.writelines(lines)
+    print(f'# Updating {os.path.relpath(PCBUILD_PYTHONCORE)}')
+    with updating_file_with_tmpfile(PCBUILD_PYTHONCORE) as (infile, outfile):
+        lines = infile.readlines()
+        lines = replace_block(
+            lines,
+            '<!-- BEGIN deepfreeze -->',
+            '<!-- END deepfreeze -->',
+            corelines,
+            PCBUILD_FILTERS,
+        )
+        outfile.writelines(lines)
+
+
+#######################################
+# the script
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--frozen-modules", action="store_true",
+        help="Use both frozen and deepfrozen modules. (default: uses only deepfrozen modules)")
+
+def main():
+    args = parser.parse_args()
+    frozen_modules: bool = args.frozen_modules
+    # Expand the raw specs, preserving order.
+    modules = list(parse_frozen_specs())
+
+    # Regen build-related files.
+    regen_makefile(modules)
+    regen_pcbuild(modules)
+    regen_frozen(modules, frozen_modules)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/Tools/build/generate_global_objects.py b/Tools/build/generate_global_objects.py
new file mode 100644
index 0000000..dd67cfe
--- /dev/null
+++ b/Tools/build/generate_global_objects.py
@@ -0,0 +1,382 @@
+import contextlib
+import io
+import os.path
+import re
+
+SCRIPT_NAME = 'Tools/build/generate_global_objects.py'
+__file__ = os.path.abspath(__file__)
+ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+INTERNAL = os.path.join(ROOT, 'Include', 'internal')
+
+
+IGNORED = {
+    'ACTION',  # Python/_warnings.c
+    'ATTR',  # Python/_warnings.c and Objects/funcobject.c
+    'DUNDER',  # Objects/typeobject.c
+    'RDUNDER',  # Objects/typeobject.c
+    'SPECIAL',  # Objects/weakrefobject.c
+    'NAME',  # Objects/typeobject.c
+}
+IDENTIFIERS = [
+    # from ADD() Python/_warnings.c
+    'default',
+    'ignore',
+
+    # from GET_WARNINGS_ATTR() in Python/_warnings.c
+    'WarningMessage',
+    '_showwarnmsg',
+    '_warn_unawaited_coroutine',
+    'defaultaction',
+    'filters',
+    'onceregistry',
+
+    # from WRAP_METHOD() in Objects/weakrefobject.c
+    '__bytes__',
+    '__reversed__',
+
+    # from COPY_ATTR() in Objects/funcobject.c
+    '__module__',
+    '__name__',
+    '__qualname__',
+    '__doc__',
+    '__annotations__',
+
+    # from SLOT* in Objects/typeobject.c
+    '__abs__',
+    '__add__',
+    '__aiter__',
+    '__and__',
+    '__anext__',
+    '__await__',
+    '__bool__',
+    '__call__',
+    '__contains__',
+    '__del__',
+    '__delattr__',
+    '__delete__',
+    '__delitem__',
+    '__eq__',
+    '__float__',
+    '__floordiv__',
+    '__ge__',
+    '__get__',
+    '__getattr__',
+    '__getattribute__',
+    '__getitem__',
+    '__gt__',
+    '__hash__',
+    '__iadd__',
+    '__iand__',
+    '__ifloordiv__',
+    '__ilshift__',
+    '__imatmul__',
+    '__imod__',
+    '__imul__',
+    '__index__',
+    '__init__',
+    '__int__',
+    '__invert__',
+    '__ior__',
+    '__ipow__',
+    '__irshift__',
+    '__isub__',
+    '__iter__',
+    '__itruediv__',
+    '__ixor__',
+    '__le__',
+    '__len__',
+    '__lshift__',
+    '__lt__',
+    '__matmul__',
+    '__mod__',
+    '__mul__',
+    '__ne__',
+    '__neg__',
+    '__new__',
+    '__next__',
+    '__or__',
+    '__pos__',
+    '__pow__',
+    '__radd__',
+    '__rand__',
+    '__repr__',
+    '__rfloordiv__',
+    '__rlshift__',
+    '__rmatmul__',
+    '__rmod__',
+    '__rmul__',
+    '__ror__',
+    '__rpow__',
+    '__rrshift__',
+    '__rshift__',
+    '__rsub__',
+    '__rtruediv__',
+    '__rxor__',
+    '__set__',
+    '__setattr__',
+    '__setitem__',
+    '__str__',
+    '__sub__',
+    '__truediv__',
+    '__xor__',
+    '__divmod__',
+    '__rdivmod__',
+]
+
+
+#######################################
+# helpers
+
+def iter_files():
+    for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'):
+        root = os.path.join(ROOT, name)
+        for dirname, _, files in os.walk(root):
+            for name in files:
+                if not name.endswith(('.c', '.h')):
+                    continue
+                yield os.path.join(dirname, name)
+
+
+def iter_global_strings():
+    id_regex = re.compile(r'\b_Py_ID\((\w+)\)')
+    str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')
+    for filename in iter_files():
+        try:
+            infile = open(filename, encoding='utf-8')
+        except FileNotFoundError:
+            # The file must have been a temporary file.
+            continue
+        with infile:
+            for lno, line in enumerate(infile, 1):
+                for m in id_regex.finditer(line):
+                    identifier, = m.groups()
+                    yield identifier, None, filename, lno, line
+                for m in str_regex.finditer(line):
+                    varname, string = m.groups()
+                    yield varname, string, filename, lno, line
+
+
+def iter_to_marker(lines, marker):
+    for line in lines:
+        if line.rstrip() == marker:
+            break
+        yield line
+
+
+class Printer:
+
+    def __init__(self, file):
+        self.level = 0
+        self.file = file
+        self.continuation = [False]
+
+    @contextlib.contextmanager
+    def indent(self):
+        save_level = self.level
+        try:
+            self.level += 1
+            yield
+        finally:
+            self.level = save_level
+
+    def write(self, arg):
+        eol = '\n'
+        if self.continuation[-1]:
+            eol = f' \\{eol}' if arg else f'\\{eol}'
+        self.file.writelines(("    "*self.level, arg, eol))
+
+    @contextlib.contextmanager
+    def block(self, prefix, suffix="", *, continuation=None):
+        if continuation is None:
+            continuation = self.continuation[-1]
+        self.continuation.append(continuation)
+
+        self.write(prefix + " {")
+        with self.indent():
+            yield
+        self.continuation.pop()
+        self.write("}" + suffix)
+
+
+@contextlib.contextmanager
+def open_for_changes(filename, orig):
+    """Like open() but only write to the file if it changed."""
+    outfile = io.StringIO()
+    yield outfile
+    text = outfile.getvalue()
+    if text != orig:
+        with open(filename, 'w', encoding='utf-8') as outfile:
+            outfile.write(text)
+    else:
+        print(f'# not changed: {filename}')
+
+
+#######################################
+# the global objects
+
+START = f'/* The following is auto-generated by {SCRIPT_NAME}. */'
+END = '/* End auto-generated code */'
+
+
+def generate_global_strings(identifiers, strings):
+    filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
+
+    # Read the non-generated part of the file.
+    with open(filename) as infile:
+        orig = infile.read()
+    lines = iter(orig.rstrip().splitlines())
+    before = '\n'.join(iter_to_marker(lines, START))
+    for _ in iter_to_marker(lines, END):
+        pass
+    after = '\n'.join(lines)
+
+    # Generate the file.
+    with open_for_changes(filename, orig) as outfile:
+        printer = Printer(outfile)
+        printer.write(before)
+        printer.write(START)
+        with printer.block('struct _Py_global_strings', ';'):
+            with printer.block('struct', ' literals;'):
+                for literal, name in sorted(strings.items(), key=lambda x: x[1]):
+                    printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
+            outfile.write('\n')
+            with printer.block('struct', ' identifiers;'):
+                for name in sorted(identifiers):
+                    assert name.isidentifier(), name
+                    printer.write(f'STRUCT_FOR_ID({name})')
+            with printer.block('struct', ' ascii[128];'):
+                printer.write("PyASCIIObject _ascii;")
+                printer.write("uint8_t _data[2];")
+            with printer.block('struct', ' latin1[128];'):
+                printer.write("PyCompactUnicodeObject _latin1;")
+                printer.write("uint8_t _data[2];")
+        printer.write(END)
+        printer.write(after)
+
+
+def generate_runtime_init(identifiers, strings):
+    # First get some info from the declarations.
+    nsmallposints = None
+    nsmallnegints = None
+    with open(os.path.join(INTERNAL, 'pycore_global_objects.h')) as infile:
+        for line in infile:
+            if line.startswith('#define _PY_NSMALLPOSINTS'):
+                nsmallposints = int(line.split()[-1])
+            elif line.startswith('#define _PY_NSMALLNEGINTS'):
+                nsmallnegints = int(line.split()[-1])
+                break
+        else:
+            raise NotImplementedError
+    assert nsmallposints and nsmallnegints
+
+    # Then target the runtime initializer.
+    filename = os.path.join(INTERNAL, 'pycore_runtime_init_generated.h')
+
+    # Read the non-generated part of the file.
+    with open(filename) as infile:
+        orig = infile.read()
+    lines = iter(orig.rstrip().splitlines())
+    before = '\n'.join(iter_to_marker(lines, START))
+    for _ in iter_to_marker(lines, END):
+        pass
+    after = '\n'.join(lines)
+
+    # Generate the file.
+    with open_for_changes(filename, orig) as outfile:
+        immortal_objects = []
+        printer = Printer(outfile)
+        printer.write(before)
+        printer.write(START)
+        with printer.block('#define _Py_global_objects_INIT', continuation=True):
+            with printer.block('.singletons =', ','):
+                # Global int objects.
+                with printer.block('.small_ints =', ','):
+                    for i in range(-nsmallnegints, nsmallposints):
+                        printer.write(f'_PyLong_DIGIT_INIT({i}),')
+                        immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + {i}]')
+                printer.write('')
+                # Global bytes objects.
+                printer.write('.bytes_empty = _PyBytes_SIMPLE_INIT(0, 0),')
+                immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_empty)')
+                with printer.block('.bytes_characters =', ','):
+                    for i in range(256):
+                        printer.write(f'_PyBytes_CHAR_INIT({i}),')
+                        immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_characters)[{i}]')
+                printer.write('')
+                # Global strings.
+                with printer.block('.strings =', ','):
+                    with printer.block('.literals =', ','):
+                        for literal, name in sorted(strings.items(), key=lambda x: x[1]):
+                            printer.write(f'INIT_STR({name}, "{literal}"),')
+                            immortal_objects.append(f'(PyObject *)&_Py_STR({name})')
+                    with printer.block('.identifiers =', ','):
+                        for name in sorted(identifiers):
+                            assert name.isidentifier(), name
+                            printer.write(f'INIT_ID({name}),')
+                            immortal_objects.append(f'(PyObject *)&_Py_ID({name})')
+                    with printer.block('.ascii =', ','):
+                        for i in range(128):
+                            printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),')
+                            immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).ascii[{i}]')
+                    with printer.block('.latin1 =', ','):
+                        for i in range(128, 256):
+                            utf8 = ['"']
+                            for c in chr(i).encode('utf-8'):
+                                utf8.append(f"\\x{c:02x}")
+                            utf8.append('"')
+                            printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}", {"".join(utf8)}),')
+                            immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).latin1[{i} - 128]')
+                printer.write('')
+                with printer.block('.tuple_empty =', ','):
+                    printer.write('.ob_base = _PyVarObject_IMMORTAL_INIT(&PyTuple_Type, 0)')
+                    immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(tuple_empty)')
+        printer.write('')
+        printer.write("static inline void")
+        with printer.block("_PyUnicode_InitStaticStrings(void)"):
+            printer.write(f'PyObject *string;')
+            for i in sorted(identifiers):
+                # This use of _Py_ID() is ignored by iter_global_strings()
+                # since iter_files() ignores .h files.
+                printer.write(f'string = &_Py_ID({i});')
+                printer.write(f'PyUnicode_InternInPlace(&string);')
+        printer.write('')
+        printer.write('#ifdef Py_DEBUG')
+        printer.write("static inline void")
+        with printer.block("_PyStaticObjects_CheckRefcnt(void)"):
+            for i in immortal_objects:
+                with printer.block(f'if (Py_REFCNT({i}) < _PyObject_IMMORTAL_REFCNT)', ';'):
+                    printer.write(f'_PyObject_Dump({i});')
+                    printer.write(f'Py_FatalError("immortal object has less refcnt than '
+                                    'expected _PyObject_IMMORTAL_REFCNT");')
+        printer.write('#endif')
+        printer.write(END)
+        printer.write(after)
+
+
+def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]':
+    identifiers = set(IDENTIFIERS)
+    strings = {}
+    for name, string, *_ in iter_global_strings():
+        if string is None:
+            if name not in IGNORED:
+                identifiers.add(name)
+        else:
+            if string not in strings:
+                strings[string] = name
+            elif name != strings[string]:
+                raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
+    return identifiers, strings
+
+
+#######################################
+# the script
+
+def main() -> None:
+    identifiers, strings = get_identifiers_and_strings()
+
+    generate_global_strings(identifiers, strings)
+    generate_runtime_init(identifiers, strings)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/Tools/build/generate_levenshtein_examples.py b/Tools/build/generate_levenshtein_examples.py
new file mode 100644
index 0000000..5a8360f
--- /dev/null
+++ b/Tools/build/generate_levenshtein_examples.py
@@ -0,0 +1,70 @@
+"""Generate 10,000 unique examples for the Levenshtein short-circuit tests."""
+
+import argparse
+from functools import cache
+import json
+import os.path
+from random import choices, randrange
+
+
+# This should be in sync with Lib/traceback.py.  It's not importing those values
+# because this script is being executed by PYTHON_FOR_REGEN and not by the in-tree
+# build of Python.
+_MOVE_COST = 2
+_CASE_COST = 1
+
+
+def _substitution_cost(ch_a, ch_b):
+    if ch_a == ch_b:
+        return 0
+    if ch_a.lower() == ch_b.lower():
+        return _CASE_COST
+    return _MOVE_COST
+
+
+@cache
+def levenshtein(a, b):
+    if not a or not b:
+        return (len(a) + len(b)) * _MOVE_COST
+    option1 = levenshtein(a[:-1], b[:-1]) + _substitution_cost(a[-1], b[-1])
+    option2 = levenshtein(a[:-1], b) + _MOVE_COST
+    option3 = levenshtein(a, b[:-1]) + _MOVE_COST
+    return min(option1, option2, option3)
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('output_path', metavar='FILE', type=str)
+    parser.add_argument('--overwrite', dest='overwrite', action='store_const',
+                        const=True, default=False,
+                        help='overwrite an existing test file')
+
+    args = parser.parse_args()
+    output_path = os.path.realpath(args.output_path)
+    if not args.overwrite and os.path.isfile(output_path):
+        print(f"{output_path} already exists, skipping regeneration.")
+        print(
+            "To force, add --overwrite to the invocation of this tool or"
+            " delete the existing file."
+        )
+        return
+
+    examples = set()
+    # Create a lot of non-empty examples, which should end up with a Gauss-like
+    # distribution for even costs (moves) and odd costs (case substitutions).
+    while len(examples) < 9990:
+        a = ''.join(choices("abcABC", k=randrange(1, 10)))
+        b = ''.join(choices("abcABC", k=randrange(1, 10)))
+        expected = levenshtein(a, b)
+        examples.add((a, b, expected))
+    # Create one empty case each for strings between 0 and 9 in length.
+    for i in range(10):
+        b = ''.join(choices("abcABC", k=i))
+        expected = levenshtein("", b)
+        examples.add(("", b, expected))
+    with open(output_path, "w") as f:
+        json.dump(sorted(examples), f, indent=2)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/build/generate_opcode_h.py b/Tools/build/generate_opcode_h.py
new file mode 100644
index 0000000..372221a
--- /dev/null
+++ b/Tools/build/generate_opcode_h.py
@@ -0,0 +1,199 @@
+# This script generates the opcode.h header file.
+
+import sys
+import tokenize
+
+SCRIPT_NAME = "Tools/build/generate_opcode_h.py"
+PYTHON_OPCODE = "Lib/opcode.py"
+
+header = f"""
+// Auto-generated by {SCRIPT_NAME} from {PYTHON_OPCODE}
+
+#ifndef Py_OPCODE_H
+#define Py_OPCODE_H
+#ifdef __cplusplus
+extern "C" {{
+#endif
+
+
+/* Instruction opcodes for compiled code */
+""".lstrip()
+
+footer = """
+
+#define IS_PSEUDO_OPCODE(op) (((op) >= MIN_PSEUDO_OPCODE) && ((op) <= MAX_PSEUDO_OPCODE))
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_OPCODE_H */
+"""
+
+internal_header = f"""
+// Auto-generated by {SCRIPT_NAME} from {PYTHON_OPCODE}
+
+#ifndef Py_INTERNAL_OPCODE_H
+#define Py_INTERNAL_OPCODE_H
+#ifdef __cplusplus
+extern "C" {{
+#endif
+
+#ifndef Py_BUILD_CORE
+#  error "this header requires Py_BUILD_CORE define"
+#endif
+
+#include "opcode.h"
+""".lstrip()
+
+internal_footer = """
+#ifdef __cplusplus
+}
+#endif
+#endif  // !Py_INTERNAL_OPCODE_H
+"""
+
+DEFINE = "#define {:<38} {:>3}\n"
+
+UINT32_MASK = (1<<32)-1
+
+def write_int_array_from_ops(name, ops, out):
+    bits = 0
+    for op in ops:
+        bits |= 1<<op
+    out.write(f"static const uint32_t {name}[9] = {{\n")
+    for i in range(9):
+        out.write(f"    {bits & UINT32_MASK}U,\n")
+        bits >>= 32
+    assert bits == 0
+    out.write(f"}};\n")
+
+def main(opcode_py, outfile='Include/opcode.h', internaloutfile='Include/internal/pycore_opcode.h'):
+    opcode = {}
+    if hasattr(tokenize, 'open'):
+        fp = tokenize.open(opcode_py)   # Python 3.2+
+    else:
+        fp = open(opcode_py)            # Python 2.7
+    with fp:
+        code = fp.read()
+    exec(code, opcode)
+    opmap = opcode['opmap']
+    opname = opcode['opname']
+    hasarg = opcode['hasarg']
+    hasconst = opcode['hasconst']
+    hasjrel = opcode['hasjrel']
+    hasjabs = opcode['hasjabs']
+    is_pseudo = opcode['is_pseudo']
+    _pseudo_ops = opcode['_pseudo_ops']
+
+    HAVE_ARGUMENT = opcode["HAVE_ARGUMENT"]
+    MIN_PSEUDO_OPCODE = opcode["MIN_PSEUDO_OPCODE"]
+    MAX_PSEUDO_OPCODE = opcode["MAX_PSEUDO_OPCODE"]
+
+    NUM_OPCODES = len(opname)
+    used = [ False ] * len(opname)
+    next_op = 1
+
+    for name, op in opmap.items():
+        used[op] = True
+
+    specialized_opmap = {}
+    opname_including_specialized = opname.copy()
+    for name in opcode['_specialized_instructions']:
+        while used[next_op]:
+            next_op += 1
+        specialized_opmap[name] = next_op
+        opname_including_specialized[next_op] = name
+        used[next_op] = True
+    specialized_opmap['DO_TRACING'] = 255
+    opname_including_specialized[255] = 'DO_TRACING'
+    used[255] = True
+
+    with (open(outfile, 'w') as fobj, open(internaloutfile, 'w') as iobj):
+        fobj.write(header)
+        iobj.write(internal_header)
+
+        for name in opname:
+            if name in opmap:
+                op = opmap[name]
+                if op == HAVE_ARGUMENT:
+                    fobj.write(DEFINE.format("HAVE_ARGUMENT", HAVE_ARGUMENT))
+                if op == MIN_PSEUDO_OPCODE:
+                    fobj.write(DEFINE.format("MIN_PSEUDO_OPCODE", MIN_PSEUDO_OPCODE))
+
+                fobj.write(DEFINE.format(name, op))
+
+                if op == MAX_PSEUDO_OPCODE:
+                    fobj.write(DEFINE.format("MAX_PSEUDO_OPCODE", MAX_PSEUDO_OPCODE))
+
+
+        for name, op in specialized_opmap.items():
+            fobj.write(DEFINE.format(name, op))
+
+        iobj.write("\nextern const uint8_t _PyOpcode_Caches[256];\n")
+        iobj.write("\nextern const uint8_t _PyOpcode_Deopt[256];\n")
+        iobj.write("\n#ifdef NEED_OPCODE_TABLES\n")
+        write_int_array_from_ops("_PyOpcode_RelativeJump", opcode['hasjrel'], iobj)
+        write_int_array_from_ops("_PyOpcode_Jump", opcode['hasjrel'] + opcode['hasjabs'], iobj)
+
+        iobj.write("\nconst uint8_t _PyOpcode_Caches[256] = {\n")
+        for i, entries in enumerate(opcode["_inline_cache_entries"]):
+            if entries:
+                iobj.write(f"    [{opname[i]}] = {entries},\n")
+        iobj.write("};\n")
+
+        deoptcodes = {}
+        for basic, op in opmap.items():
+            if not is_pseudo(op):
+                deoptcodes[basic] = basic
+        for basic, family in opcode["_specializations"].items():
+            for specialized in family:
+                deoptcodes[specialized] = basic
+        iobj.write("\nconst uint8_t _PyOpcode_Deopt[256] = {\n")
+        for opt, deopt in sorted(deoptcodes.items()):
+            iobj.write(f"    [{opt}] = {deopt},\n")
+        iobj.write("};\n")
+        iobj.write("#endif   // NEED_OPCODE_TABLES\n")
+
+        fobj.write("\n")
+        fobj.write("#define HAS_ARG(op) ((((op) >= HAVE_ARGUMENT) && (!IS_PSEUDO_OPCODE(op)))\\")
+        for op in _pseudo_ops:
+            if opmap[op] in hasarg:
+                fobj.write(f"\n    || ((op) == {op}) \\")
+        fobj.write("\n    )\n")
+
+        fobj.write("\n")
+        fobj.write("#define HAS_CONST(op) (false\\")
+        for op in hasconst:
+            fobj.write(f"\n    || ((op) == {opname[op]}) \\")
+        fobj.write("\n    )\n")
+
+        fobj.write("\n")
+        for i, (op, _) in enumerate(opcode["_nb_ops"]):
+            fobj.write(DEFINE.format(op, i))
+
+        iobj.write("\n")
+        iobj.write("#ifdef Py_DEBUG\n")
+        iobj.write(f"static const char *const _PyOpcode_OpName[{NUM_OPCODES}] = {{\n")
+        for op, name in enumerate(opname_including_specialized):
+            if name[0] != "<":
+                op = name
+            iobj.write(f'''    [{op}] = "{name}",\n''')
+        iobj.write("};\n")
+        iobj.write("#endif\n")
+
+        iobj.write("\n")
+        iobj.write("#define EXTRA_CASES \\\n")
+        for i, flag in enumerate(used):
+            if not flag:
+                iobj.write(f"    case {i}: \\\n")
+        iobj.write("        ;\n")
+
+        fobj.write(footer)
+        iobj.write(internal_footer)
+
+
+    print(f"{outfile} regenerated from {opcode_py}")
+
+
+if __name__ == '__main__':
+    main(sys.argv[1], sys.argv[2], sys.argv[3])
diff --git a/Tools/build/generate_re_casefix.py b/Tools/build/generate_re_casefix.py
new file mode 100755
index 0000000..b57ac07
--- /dev/null
+++ b/Tools/build/generate_re_casefix.py
@@ -0,0 +1,96 @@
+#! /usr/bin/env python3
+# This script generates Lib/re/_casefix.py.
+
+import collections
+import sys
+import unicodedata
+
+SCRIPT_NAME = 'Tools/build/generate_re_casefix.py'
+
+def update_file(file, content):
+    try:
+        with open(file, 'r', encoding='utf-8') as fobj:
+            if fobj.read() == content:
+                return False
+    except (OSError, ValueError):
+        pass
+    with open(file, 'w', encoding='utf-8') as fobj:
+        fobj.write(content)
+    return True
+
+re_casefix_template = f"""\
+# Auto-generated by {SCRIPT_NAME}.
+
+# Maps the code of lowercased character to codes of different lowercased
+# characters which have the same uppercase.
+_EXTRA_CASES = {
+%s
+}
+"""
+
+def uname(i):
+    return unicodedata.name(chr(i), r'U+%04X' % i)
+
+class hexint(int):
+    def __repr__(self):
+        return '%#06x' % self
+
+def alpha(i):
+    c = chr(i)
+    return c if c.isalpha() else ascii(c)[1:-1]
+
+
+def main(outfile='Lib/re/_casefix.py'):
+    # Find sets of characters which have the same uppercase.
+    equivalent_chars = collections.defaultdict(str)
+    for c in map(chr, range(sys.maxunicode + 1)):
+        equivalent_chars[c.upper()] += c
+    equivalent_chars = [t for t in equivalent_chars.values() if len(t) > 1]
+
+    # List of codes of lowercased characters which have the same uppercase.
+    equivalent_lower_codes = [sorted(t)
+                              for s in equivalent_chars
+                              for t in [set(ord(c.lower()) for c in s)]
+                              if len(t) > 1]
+
+    bad_codes = []
+    for t in equivalent_lower_codes:
+        for i in t:
+            if i > 0xffff:
+                bad_codes.extend(t)
+                try:
+                    bad_codes.append(ord(chr(i).upper()))
+                except (ValueError, TypeError):
+                    pass
+                break
+    if bad_codes:
+        print('Case-insensitive matching may not work correctly for character:',
+              file=sys.stderr)
+        for i in sorted(bad_codes):
+            print("  '%s' (U+%04x, %s)" % (alpha(i), i, uname(i)),
+                  file=sys.stderr)
+        sys.exit(1)
+
+    mapping = {i: tuple(j for j in t if i != j)
+               for t in equivalent_lower_codes
+               for i in t}
+
+    items = []
+    for i, t in sorted(mapping.items()):
+        items.append('    # %s: %s' % (
+            uname(i),
+            ', '.join(map(uname, t)),
+        ))
+        items.append("    %r: %r, # '%s': '%s'" % (
+            hexint(i),
+            tuple(map(hexint, t)),
+            alpha(i),
+            ''.join(map(alpha, t)),
+        ))
+
+    update_file(outfile, re_casefix_template % '\n'.join(items))
+
+
+if __name__ == '__main__':
+    import sys
+    main(*sys.argv[1:])
diff --git a/Tools/build/generate_sre_constants.py b/Tools/build/generate_sre_constants.py
new file mode 100755
index 0000000..abea069
--- /dev/null
+++ b/Tools/build/generate_sre_constants.py
@@ -0,0 +1,80 @@
+#! /usr/bin/env python3
+# This script generates Modules/_sre/sre_constants.h from Lib/re/_constants.py.
+
+SCRIPT_NAME = 'Tools/build/generate_sre_constants.py'
+
+
+def update_file(file, content):
+    try:
+        with open(file, 'r') as fobj:
+            if fobj.read() == content:
+                return False
+    except (OSError, ValueError):
+        pass
+    with open(file, 'w') as fobj:
+        fobj.write(content)
+    return True
+
+sre_constants_header = f"""\
+/*
+ * Secret Labs' Regular Expression Engine
+ *
+ * regular expression matching engine
+ *
+ * Auto-generated by {SCRIPT_NAME} from
+ * Lib/re/_constants.py.
+ *
+ * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
+ *
+ * See the sre.c file for information on usage and redistribution.
+ */
+
+"""
+
+def main(
+    infile="Lib/re/_constants.py",
+    outfile_constants="Modules/_sre/sre_constants.h",
+    outfile_targets="Modules/_sre/sre_targets.h",
+):
+    ns = {}
+    with open(infile) as fp:
+        code = fp.read()
+    exec(code, ns)
+
+    def dump(d, prefix):
+        items = sorted(d)
+        for item in items:
+            yield "#define %s_%s %d\n" % (prefix, item, item)
+
+    def dump2(d, prefix):
+        items = [(value, name) for name, value in d.items()
+                 if name.startswith(prefix)]
+        for value, name in sorted(items):
+            yield "#define %s %d\n" % (name, value)
+
+    def dump_gotos(d, prefix):
+        for i, item in enumerate(sorted(d)):
+            assert i == item
+            yield f"    &&{prefix}_{item},\n"
+
+    content = [sre_constants_header]
+    content.append("#define SRE_MAGIC %d\n" % ns["MAGIC"])
+    content.extend(dump(ns["OPCODES"], "SRE_OP"))
+    content.extend(dump(ns["ATCODES"], "SRE"))
+    content.extend(dump(ns["CHCODES"], "SRE"))
+    content.extend(dump2(ns, "SRE_FLAG_"))
+    content.extend(dump2(ns, "SRE_INFO_"))
+
+    update_file(outfile_constants, ''.join(content))
+
+    content = [sre_constants_header]
+    content.append(f"static void *sre_targets[{len(ns['OPCODES'])}] = {{\n")
+    content.extend(dump_gotos(ns["OPCODES"], "TARGET_SRE_OP"))
+    content.append("};\n")
+
+    update_file(outfile_targets, ''.join(content))
+
+
+if __name__ == '__main__':
+    import sys
+    main(*sys.argv[1:])
diff --git a/Tools/build/generate_stdlib_module_names.py b/Tools/build/generate_stdlib_module_names.py
new file mode 100644
index 0000000..e4f09f8
--- /dev/null
+++ b/Tools/build/generate_stdlib_module_names.py
@@ -0,0 +1,139 @@
+# This script lists the names of standard library modules
+# to update Python/stdlib_mod_names.h
+import _imp
+import os.path
+import re
+import subprocess
+import sys
+import sysconfig
+
+from check_extension_modules import ModuleChecker
+
+
+SCRIPT_NAME = 'Tools/build/generate_stdlib_module_names.py'
+
+SRC_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+STDLIB_PATH = os.path.join(SRC_DIR, 'Lib')
+
+IGNORE = {
+    '__init__',
+    '__pycache__',
+    'site-packages',
+
+    # Test modules and packages
+    '__hello__',
+    '__phello__',
+    '__hello_alias__',
+    '__phello_alias__',
+    '__hello_only__',
+    '_ctypes_test',
+    '_testbuffer',
+    '_testcapi',
+    '_testconsole',
+    '_testimportmultiple',
+    '_testinternalcapi',
+    '_testmultiphase',
+    '_xxsubinterpreters',
+    '_xxtestfuzz',
+    'distutils.tests',
+    'idlelib.idle_test',
+    'test',
+    'xxlimited',
+    'xxlimited_35',
+    'xxsubtype',
+}
+
+# Pure Python modules (Lib/*.py)
+def list_python_modules(names):
+    for filename in os.listdir(STDLIB_PATH):
+        if not filename.endswith(".py"):
+            continue
+        name = filename.removesuffix(".py")
+        names.add(name)
+
+
+# Packages in Lib/
+def list_packages(names):
+    for name in os.listdir(STDLIB_PATH):
+        if name in IGNORE:
+            continue
+        package_path = os.path.join(STDLIB_PATH, name)
+        if not os.path.isdir(package_path):
+            continue
+        if any(package_file.endswith(".py")
+               for package_file in os.listdir(package_path)):
+            names.add(name)
+
+
+# Built-in and extension modules built by Modules/Setup*
+# includes Windows and macOS extensions.
+def list_modules_setup_extensions(names):
+    checker = ModuleChecker()
+    names.update(checker.list_module_names(all=True))
+
+
+# List frozen modules of the PyImport_FrozenModules list (Python/frozen.c).
+# Use the "./Programs/_testembed list_frozen" command.
+def list_frozen(names):
+    submodules = set()
+    for name in _imp._frozen_module_names():
+        # To skip __hello__, __hello_alias__ and etc.
+        if name.startswith('__'):
+            continue
+        if '.' in name:
+            submodules.add(name)
+        else:
+            names.add(name)
+    # Make sure all frozen submodules have a known parent.
+    for name in list(submodules):
+        if name.partition('.')[0] in names:
+            submodules.remove(name)
+    if submodules:
+        raise Exception(f'unexpected frozen submodules: {sorted(submodules)}')
+
+
+def list_modules():
+    names = set(sys.builtin_module_names)
+    list_modules_setup_extensions(names)
+    list_packages(names)
+    list_python_modules(names)
+    list_frozen(names)
+
+    # Remove ignored packages and modules
+    for name in list(names):
+        package_name = name.split('.')[0]
+        # package_name can be equal to name
+        if package_name in IGNORE:
+            names.discard(name)
+
+    for name in names:
+        if "." in name:
+            raise Exception("sub-modules must not be listed")
+
+    return names
+
+
+def write_modules(fp, names):
+    print(f"// Auto-generated by {SCRIPT_NAME}.",
+          file=fp)
+    print("// List used to create sys.stdlib_module_names.", file=fp)
+    print(file=fp)
+    print("static const char* _Py_stdlib_module_names[] = {", file=fp)
+    for name in sorted(names):
+        print(f'"{name}",', file=fp)
+    print("};", file=fp)
+
+
+def main():
+    if not sysconfig.is_python_build():
+        print(f"ERROR: {sys.executable} is not a Python build",
+              file=sys.stderr)
+        sys.exit(1)
+
+    fp = sys.stdout
+    names = list_modules()
+    write_modules(fp, names)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/build/generate_token.py b/Tools/build/generate_token.py
new file mode 100755
index 0000000..fc12835
--- /dev/null
+++ b/Tools/build/generate_token.py
@@ -0,0 +1,282 @@
+#! /usr/bin/env python3
+# This script generates token related files from Grammar/Tokens:
+#
+#   Doc/library/token-list.inc
+#   Include/token.h
+#   Parser/token.c
+#   Lib/token.py
+
+
+SCRIPT_NAME = 'Tools/build/generate_token.py'
+AUTO_GENERATED_BY_SCRIPT = f'Auto-generated by {SCRIPT_NAME}'
+NT_OFFSET = 256
+
+def load_tokens(path):
+    tok_names = []
+    string_to_tok = {}
+    ERRORTOKEN = None
+    with open(path) as fp:
+        for line in fp:
+            line = line.strip()
+            # strip comments
+            i = line.find('#')
+            if i >= 0:
+                line = line[:i].strip()
+            if not line:
+                continue
+            fields = line.split()
+            name = fields[0]
+            value = len(tok_names)
+            if name == 'ERRORTOKEN':
+                ERRORTOKEN = value
+            string = fields[1] if len(fields) > 1 else None
+            if string:
+                string = eval(string)
+                string_to_tok[string] = value
+            tok_names.append(name)
+    return tok_names, ERRORTOKEN, string_to_tok
+
+
+def update_file(file, content):
+    try:
+        with open(file, 'r') as fobj:
+            if fobj.read() == content:
+                return False
+    except (OSError, ValueError):
+        pass
+    with open(file, 'w') as fobj:
+        fobj.write(content)
+    return True
+
+
+token_h_template = f"""\
+/* {AUTO_GENERATED_BY_SCRIPT} */
+"""
+token_h_template += """\
+
+/* Token types */
+#ifndef Py_INTERNAL_TOKEN_H
+#define Py_INTERNAL_TOKEN_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_BUILD_CORE
+#  error "this header requires Py_BUILD_CORE define"
+#endif
+
+#undef TILDE   /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
+
+%s\
+#define N_TOKENS        %d
+#define NT_OFFSET       %d
+
+/* Special definitions for cooperation with parser */
+
+#define ISTERMINAL(x)           ((x) < NT_OFFSET)
+#define ISNONTERMINAL(x)        ((x) >= NT_OFFSET)
+#define ISEOF(x)                ((x) == ENDMARKER)
+#define ISWHITESPACE(x)         ((x) == ENDMARKER || \\
+                                 (x) == NEWLINE   || \\
+                                 (x) == INDENT    || \\
+                                 (x) == DEDENT)
+
+
+// Symbols exported for test_peg_generator
+PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
+PyAPI_FUNC(int) _PyToken_OneChar(int);
+PyAPI_FUNC(int) _PyToken_TwoChars(int, int);
+PyAPI_FUNC(int) _PyToken_ThreeChars(int, int, int);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // !Py_INTERNAL_TOKEN_H
+"""
+
+def make_h(infile, outfile='Include/internal/pycore_token.h'):
+    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
+
+    defines = []
+    for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
+        defines.append("#define %-15s %d\n" % (name, value))
+
+    if update_file(outfile, token_h_template % (
+            ''.join(defines),
+            len(tok_names),
+            NT_OFFSET
+        )):
+        print("%s regenerated from %s" % (outfile, infile))
+
+
+token_c_template = f"""\
+/* {AUTO_GENERATED_BY_SCRIPT} */
+"""
+token_c_template += """\
+
+#include "Python.h"
+#include "pycore_token.h"
+
+/* Token names */
+
+const char * const _PyParser_TokenNames[] = {
+%s\
+};
+
+/* Return the token corresponding to a single character */
+
+int
+_PyToken_OneChar(int c1)
+{
+%s\
+    return OP;
+}
+
+int
+_PyToken_TwoChars(int c1, int c2)
+{
+%s\
+    return OP;
+}
+
+int
+_PyToken_ThreeChars(int c1, int c2, int c3)
+{
+%s\
+    return OP;
+}
+"""
+
+def generate_chars_to_token(mapping, n=1):
+    result = []
+    write = result.append
+    indent = '    ' * n
+    write(indent)
+    write('switch (c%d) {\n' % (n,))
+    for c in sorted(mapping):
+        write(indent)
+        value = mapping[c]
+        if isinstance(value, dict):
+            write("case '%s':\n" % (c,))
+            write(generate_chars_to_token(value, n + 1))
+            write(indent)
+            write('    break;\n')
+        else:
+            write("case '%s': return %s;\n" % (c, value))
+    write(indent)
+    write('}\n')
+    return ''.join(result)
+
+def make_c(infile, outfile='Parser/token.c'):
+    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
+    string_to_tok['<>'] = string_to_tok['!=']
+    chars_to_token = {}
+    for string, value in string_to_tok.items():
+        assert 1 <= len(string) <= 3
+        name = tok_names[value]
+        m = chars_to_token.setdefault(len(string), {})
+        for c in string[:-1]:
+            m = m.setdefault(c, {})
+        m[string[-1]] = name
+
+    names = []
+    for value, name in enumerate(tok_names):
+        if value >= ERRORTOKEN:
+            name = '<%s>' % name
+        names.append('    "%s",\n' % name)
+    names.append('    "<N_TOKENS>",\n')
+
+    if update_file(outfile, token_c_template % (
+            ''.join(names),
+            generate_chars_to_token(chars_to_token[1]),
+            generate_chars_to_token(chars_to_token[2]),
+            generate_chars_to_token(chars_to_token[3])
+        )):
+        print("%s regenerated from %s" % (outfile, infile))
+
+
+token_inc_template = f"""\
+.. {AUTO_GENERATED_BY_SCRIPT}
+%s
+.. data:: N_TOKENS
+
+.. data:: NT_OFFSET
+"""
+
+def make_rst(infile, outfile='Doc/library/token-list.inc'):
+    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
+    tok_to_string = {value: s for s, value in string_to_tok.items()}
+
+    names = []
+    for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
+        names.append('.. data:: %s' % (name,))
+        if value in tok_to_string:
+            names.append('')
+            names.append('   Token value for ``"%s"``.' % tok_to_string[value])
+        names.append('')
+
+    if update_file(outfile, token_inc_template % '\n'.join(names)):
+        print("%s regenerated from %s" % (outfile, infile))
+
+
+token_py_template = f'''\
+"""Token constants."""
+# {AUTO_GENERATED_BY_SCRIPT}
+'''
+token_py_template += '''
+__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
+
+%s
+N_TOKENS = %d
+# Special definitions for cooperation with parser
+NT_OFFSET = %d
+
+tok_name = {value: name
+            for name, value in globals().items()
+            if isinstance(value, int) and not name.startswith('_')}
+__all__.extend(tok_name.values())
+
+EXACT_TOKEN_TYPES = {
+%s
+}
+
+def ISTERMINAL(x):
+    return x < NT_OFFSET
+
+def ISNONTERMINAL(x):
+    return x >= NT_OFFSET
+
+def ISEOF(x):
+    return x == ENDMARKER
+'''
+
+def make_py(infile, outfile='Lib/token.py'):
+    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
+
+    constants = []
+    for value, name in enumerate(tok_names):
+        constants.append('%s = %d' % (name, value))
+    constants.insert(ERRORTOKEN,
+        "# These aren't used by the C tokenizer but are needed for tokenize.py")
+
+    token_types = []
+    for s, value in sorted(string_to_tok.items()):
+        token_types.append('    %r: %s,' % (s, tok_names[value]))
+
+    if update_file(outfile, token_py_template % (
+            '\n'.join(constants),
+            len(tok_names),
+            NT_OFFSET,
+            '\n'.join(token_types),
+        )):
+        print("%s regenerated from %s" % (outfile, infile))
+
+
+def main(op, infile='Grammar/Tokens', *args):
+    make = globals()['make_' + op]
+    make(infile, *args)
+
+
+if __name__ == '__main__':
+    import sys
+    main(*sys.argv[1:])
diff --git a/Tools/build/parse_html5_entities.py b/Tools/build/parse_html5_entities.py
new file mode 100755
index 0000000..d2bf290
--- /dev/null
+++ b/Tools/build/parse_html5_entities.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+"""
+Utility for parsing HTML5 entity definitions available from:
+
+    https://html.spec.whatwg.org/entities.json
+    https://html.spec.whatwg.org/multipage/named-characters.html
+
+The page now contains the following note:
+
+    "This list is static and will not be expanded or changed in the future."
+
+Written by Ezio Melotti and Iuliia Proskurnia.
+"""
+
+import os
+import sys
+import json
+from urllib.request import urlopen
+from html.entities import html5
+
+SCRIPT_NAME = 'Tools/build/parse_html5_entities.py'
+PAGE_URL = 'https://html.spec.whatwg.org/multipage/named-characters.html'
+ENTITIES_URL = 'https://html.spec.whatwg.org/entities.json'
+HTML5_SECTION_START = '# HTML5 named character references'
+
+def get_json(url):
+    """Download the json file from the url and returns a decoded object."""
+    with urlopen(url) as f:
+        data = f.read().decode('utf-8')
+    return json.loads(data)
+
+def create_dict(entities):
+    """Create the html5 dict from the decoded json object."""
+    new_html5 = {}
+    for name, value in entities.items():
+        new_html5[name.lstrip('&')] = value['characters']
+    return new_html5
+
+def compare_dicts(old, new):
+    """Compare the old and new dicts and print the differences."""
+    added = new.keys() - old.keys()
+    if added:
+        print('{} entitie(s) have been added:'.format(len(added)))
+        for name in sorted(added):
+            print('  {!r}: {!r}'.format(name, new[name]))
+    removed = old.keys() - new.keys()
+    if removed:
+        print('{} entitie(s) have been removed:'.format(len(removed)))
+        for name in sorted(removed):
+            print('  {!r}: {!r}'.format(name, old[name]))
+    changed = set()
+    for name in (old.keys() & new.keys()):
+        if old[name] != new[name]:
+            changed.add((name, old[name], new[name]))
+    if changed:
+        print('{} entitie(s) have been modified:'.format(len(changed)))
+        for item in sorted(changed):
+            print('  {!r}: {!r} -> {!r}'.format(*item))
+
+def write_items(entities, file=sys.stdout):
+    """Write the items of the dictionary in the specified file."""
+    # The keys in the generated dictionary should be sorted
+    # in a case-insensitive way, however, when two keys are equal,
+    # the uppercase version should come first so that the result
+    # looks like: ['Aacute', 'aacute', 'Aacute;', 'aacute;', ...]
+    # To do this we first sort in a case-sensitive way (so all the
+    # uppercase chars come first) and then sort with key=str.lower.
+    # Since the sorting is stable the uppercase keys will eventually
+    # be before their equivalent lowercase version.
+    keys = sorted(entities.keys())
+    keys = sorted(keys, key=str.lower)
+    print(HTML5_SECTION_START, file=file)
+    print(f'# Generated by {SCRIPT_NAME}\n'
+          f'# from {ENTITIES_URL} and\n'
+          f'# {PAGE_URL}.\n'
+          f'# Map HTML5 named character references to the '
+          f'equivalent Unicode character(s).', file=file)
+    print('html5 = {', file=file)
+    for name in keys:
+        print(f'    {name!r}: {entities[name]!a},', file=file)
+    print('}', file=file)
+
+
+if __name__ == '__main__':
+    # without args print a diff between html.entities.html5 and new_html5
+    # with --create print the new html5 dict
+    # with --patch patch the Lib/html/entities.py file
+    new_html5 = create_dict(get_json(ENTITIES_URL))
+    if '--create' in sys.argv:
+        write_items(new_html5)
+    elif '--patch' in sys.argv:
+        fname = 'Lib/html/entities.py'
+        temp_fname = fname + '.temp'
+        with open(fname) as f1, open(temp_fname, 'w') as f2:
+            skip = False
+            for line in f1:
+                if line.startswith(HTML5_SECTION_START):
+                    write_items(new_html5, file=f2)
+                    skip = True
+                    continue
+                if skip:
+                    # skip the old items until the }
+                    if line.startswith('}'):
+                        skip = False
+                    continue
+                f2.write(line)
+        os.remove(fname)
+        os.rename(temp_fname, fname)
+    else:
+        if html5 == new_html5:
+            print('The current dictionary is updated.')
+        else:
+            compare_dicts(html5, new_html5)
+            print('Run "./python {0} --patch" to update Lib/html/entities.html '
+                  'or "./python {0} --create" to see the generated ' 'dictionary.'.format(__file__))
diff --git a/Tools/build/smelly.py b/Tools/build/smelly.py
new file mode 100755
index 0000000..276a5ab
--- /dev/null
+++ b/Tools/build/smelly.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python
+# Script checking that all symbols exported by libpython start with Py or _Py
+
+import os.path
+import subprocess
+import sys
+import sysconfig
+
+
+ALLOWED_PREFIXES = ('Py', '_Py')
+if sys.platform == 'darwin':
+    ALLOWED_PREFIXES += ('__Py',)
+
+IGNORED_EXTENSION = "_ctypes_test"
+# Ignore constructor and destructor functions
+IGNORED_SYMBOLS = {'_init', '_fini'}
+
+
+def is_local_symbol_type(symtype):
+    # Ignore local symbols.
+
+    # If lowercase, the symbol is usually local; if uppercase, the symbol
+    # is global (external).  There are however a few lowercase symbols that
+    # are shown for special global symbols ("u", "v" and "w").
+    if symtype.islower() and symtype not in "uvw":
+        return True
+
+    # Ignore the initialized data section (d and D) and the BSS data
+    # section. For example, ignore "__bss_start (type: B)"
+    # and "_edata (type: D)".
+    if symtype in "bBdD":
+        return True
+
+    return False
+
+
+def get_exported_symbols(library, dynamic=False):
+    print(f"Check that {library} only exports symbols starting with Py or _Py")
+
+    # Only look at dynamic symbols
+    args = ['nm', '--no-sort']
+    if dynamic:
+        args.append('--dynamic')
+    args.append(library)
+    print("+ %s" % ' '.join(args))
+    proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True)
+    if proc.returncode:
+        sys.stdout.write(proc.stdout)
+        sys.exit(proc.returncode)
+
+    stdout = proc.stdout.rstrip()
+    if not stdout:
+        raise Exception("command output is empty")
+    return stdout
+
+
+def get_smelly_symbols(stdout):
+    smelly_symbols = []
+    python_symbols = []
+    local_symbols = []
+
+    for line in stdout.splitlines():
+        # Split line '0000000000001b80 D PyTextIOWrapper_Type'
+        if not line:
+            continue
+
+        parts = line.split(maxsplit=2)
+        if len(parts) < 3:
+            continue
+
+        symtype = parts[1].strip()
+        symbol = parts[-1]
+        result = '%s (type: %s)' % (symbol, symtype)
+
+        if symbol.startswith(ALLOWED_PREFIXES):
+            python_symbols.append(result)
+            continue
+
+        if is_local_symbol_type(symtype):
+            local_symbols.append(result)
+        elif symbol in IGNORED_SYMBOLS:
+            local_symbols.append(result)
+        else:
+            smelly_symbols.append(result)
+
+    if local_symbols:
+        print(f"Ignore {len(local_symbols)} local symbols")
+    return smelly_symbols, python_symbols
+
+
+def check_library(library, dynamic=False):
+    nm_output = get_exported_symbols(library, dynamic)
+    smelly_symbols, python_symbols = get_smelly_symbols(nm_output)
+
+    if not smelly_symbols:
+        print(f"OK: no smelly symbol found ({len(python_symbols)} Python symbols)")
+        return 0
+
+    print()
+    smelly_symbols.sort()
+    for symbol in smelly_symbols:
+        print("Smelly symbol: %s" % symbol)
+
+    print()
+    print("ERROR: Found %s smelly symbols!" % len(smelly_symbols))
+    return len(smelly_symbols)
+
+
+def check_extensions():
+    print(__file__)
+    # This assumes pybuilddir.txt is in same directory as pyconfig.h.
+    # In the case of out-of-tree builds, we can't assume pybuilddir.txt is
+    # in the source folder.
+    config_dir = os.path.dirname(sysconfig.get_config_h_filename())
+    filename = os.path.join(config_dir, "pybuilddir.txt")
+    try:
+        with open(filename, encoding="utf-8") as fp:
+            pybuilddir = fp.readline()
+    except FileNotFoundError:
+        print(f"Cannot check extensions because {filename} does not exist")
+        return True
+
+    print(f"Check extension modules from {pybuilddir} directory")
+    builddir = os.path.join(config_dir, pybuilddir)
+    nsymbol = 0
+    for name in os.listdir(builddir):
+        if not name.endswith(".so"):
+            continue
+        if IGNORED_EXTENSION in name:
+            print()
+            print(f"Ignore extension: {name}")
+            continue
+
+        print()
+        filename = os.path.join(builddir, name)
+        nsymbol += check_library(filename, dynamic=True)
+
+    return nsymbol
+
+
+def main():
+    nsymbol = 0
+
+    # static library
+    LIBRARY = sysconfig.get_config_var('LIBRARY')
+    if not LIBRARY:
+        raise Exception("failed to get LIBRARY variable from sysconfig")
+    if os.path.exists(LIBRARY):
+        nsymbol += check_library(LIBRARY)
+
+    # dynamic library
+    LDLIBRARY = sysconfig.get_config_var('LDLIBRARY')
+    if not LDLIBRARY:
+        raise Exception("failed to get LDLIBRARY variable from sysconfig")
+    if LDLIBRARY != LIBRARY:
+        print()
+        nsymbol += check_library(LDLIBRARY, dynamic=True)
+
+    # Check extension modules like _ssl.cpython-310d-x86_64-linux-gnu.so
+    nsymbol += check_extensions()
+
+    if nsymbol:
+        print()
+        print(f"ERROR: Found {nsymbol} smelly symbols in total!")
+        sys.exit(1)
+
+    print()
+    print(f"OK: all exported symbols of all libraries "
+          f"are prefixed with {' or '.join(map(repr, ALLOWED_PREFIXES))}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/build/stable_abi.py b/Tools/build/stable_abi.py
new file mode 100644
index 0000000..88db93e
--- /dev/null
+++ b/Tools/build/stable_abi.py
@@ -0,0 +1,757 @@
+"""Check the stable ABI manifest or generate files from it
+
+By default, the tool only checks existing files/libraries.
+Pass --generate to recreate auto-generated files instead.
+
+For actions that take a FILENAME, the filename can be left out to use a default
+(relative to the manifest file, as they appear in the CPython codebase).
+"""
+
+from functools import partial
+from pathlib import Path
+import dataclasses
+import subprocess
+import sysconfig
+import argparse
+import textwrap
+import tomllib
+import difflib
+import pprint
+import sys
+import os
+import os.path
+import io
+import re
+import csv
+
+SCRIPT_NAME = 'Tools/build/stable_abi.py'
+MISSING = object()
+
+EXCLUDED_HEADERS = {
+    "bytes_methods.h",
+    "cellobject.h",
+    "classobject.h",
+    "code.h",
+    "compile.h",
+    "datetime.h",
+    "dtoa.h",
+    "frameobject.h",
+    "genobject.h",
+    "longintrepr.h",
+    "parsetok.h",
+    "pyatomic.h",
+    "pytime.h",
+    "token.h",
+    "ucnhash.h",
+}
+MACOS = (sys.platform == "darwin")
+UNIXY = MACOS or (sys.platform == "linux")  # XXX should this be "not Windows"?
+
+
+# The stable ABI manifest (Misc/stable_abi.toml) exists only to fill the
+# following dataclasses.
+# Feel free to change its syntax (and the `parse_manifest` function)
+# to better serve that purpose (while keeping it human-readable).
+
+class Manifest:
+    """Collection of `ABIItem`s forming the stable ABI/limited API."""
+    def __init__(self):
+        self.contents = dict()
+
+    def add(self, item):
+        if item.name in self.contents:
+            # We assume that stable ABI items do not share names,
+            # even if they're different kinds (e.g. function vs. macro).
+            raise ValueError(f'duplicate ABI item {item.name}')
+        self.contents[item.name] = item
+
+    def select(self, kinds, *, include_abi_only=True, ifdef=None):
+        """Yield selected items of the manifest
+
+        kinds: set of requested kinds, e.g. {'function', 'macro'}
+        include_abi_only: if True (default), include all items of the
+            stable ABI.
+            If False, include only items from the limited API
+            (i.e. items people should use today)
+        ifdef: set of feature macros (e.g. {'HAVE_FORK', 'MS_WINDOWS'}).
+            If None (default), items are not filtered by this. (This is
+            different from the empty set, which filters out all such
+            conditional items.)
+        """
+        for name, item in sorted(self.contents.items()):
+            if item.kind not in kinds:
+                continue
+            if item.abi_only and not include_abi_only:
+                continue
+            if (ifdef is not None
+                    and item.ifdef is not None
+                    and item.ifdef not in ifdef):
+                continue
+            yield item
+
+    def dump(self):
+        """Yield lines to recreate the manifest file (sans comments/newlines)"""
+        for item in self.contents.values():
+            fields = dataclasses.fields(item)
+            yield f"[{item.kind}.{item.name}]"
+            for field in fields:
+                if field.name in {'name', 'value', 'kind'}:
+                    continue
+                value = getattr(item, field.name)
+                if value == field.default:
+                    pass
+                elif value is True:
+                    yield f"    {field.name} = true"
+                elif value:
+                    yield f"    {field.name} = {value!r}"
+
+
+itemclasses = {}
+def itemclass(kind):
+    """Register the decorated class in `itemclasses`"""
+    def decorator(cls):
+        itemclasses[kind] = cls
+        return cls
+    return decorator
+
+@itemclass('function')
+@itemclass('macro')
+@itemclass('data')
+@itemclass('const')
+@itemclass('typedef')
+@dataclasses.dataclass
+class ABIItem:
+    """Information on one item (function, macro, struct, etc.)"""
+
+    name: str
+    kind: str
+    added: str = None
+    abi_only: bool = False
+    ifdef: str = None
+
+@itemclass('feature_macro')
+@dataclasses.dataclass(kw_only=True)
+class FeatureMacro(ABIItem):
+    name: str
+    doc: str
+    windows: bool = False
+    abi_only: bool = True
+
+@itemclass('struct')
+@dataclasses.dataclass(kw_only=True)
+class Struct(ABIItem):
+    struct_abi_kind: str
+    members: list = None
+
+
+def parse_manifest(file):
+    """Parse the given file (iterable of lines) to a Manifest"""
+
+    manifest = Manifest()
+
+    data = tomllib.load(file)
+
+    for kind, itemclass in itemclasses.items():
+        for name, item_data in data[kind].items():
+            try:
+                item = itemclass(name=name, kind=kind, **item_data)
+                manifest.add(item)
+            except BaseException as exc:
+                exc.add_note(f'in {kind} {name}')
+                raise
+
+    return manifest
+
+# The tool can run individual "actions".
+# Most actions are "generators", which generate a single file from the
+# manifest. (Checking works by generating a temp file & comparing.)
+# Other actions, like "--unixy-check", don't work on a single file.
+
+generators = []
+def generator(var_name, default_path):
+    """Decorates a file generator: function that writes to a file"""
+    def _decorator(func):
+        func.var_name = var_name
+        func.arg_name = '--' + var_name.replace('_', '-')
+        func.default_path = default_path
+        generators.append(func)
+        return func
+    return _decorator
+
+
+@generator("python3dll", 'PC/python3dll.c')
+def gen_python3dll(manifest, args, outfile):
+    """Generate/check the source for the Windows stable ABI library"""
+    write = partial(print, file=outfile)
+    content = f"""
+        /* Re-export stable Python ABI */
+
+        /* Generated by {SCRIPT_NAME} */
+    """
+    content += r"""
+        #ifdef _M_IX86
+        #define DECORATE "_"
+        #else
+        #define DECORATE
+        #endif
+
+        #define EXPORT_FUNC(name) \
+            __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name))
+        #define EXPORT_DATA(name) \
+            __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name ",DATA"))
+    """
+    write(textwrap.dedent(content))
+
+    def sort_key(item):
+        return item.name.lower()
+
+    windows_feature_macros = {
+        item.name for item in manifest.select({'feature_macro'}) if item.windows
+    }
+    for item in sorted(
+            manifest.select(
+                {'function'},
+                include_abi_only=True,
+                ifdef=windows_feature_macros),
+            key=sort_key):
+        write(f'EXPORT_FUNC({item.name})')
+
+    write()
+
+    for item in sorted(
+            manifest.select(
+                {'data'},
+                include_abi_only=True,
+                ifdef=windows_feature_macros),
+            key=sort_key):
+        write(f'EXPORT_DATA({item.name})')
+
+REST_ROLES = {
+    'function': 'function',
+    'data': 'var',
+    'struct': 'type',
+    'macro': 'macro',
+    # 'const': 'const',  # all undocumented
+    'typedef': 'type',
+}
+
+@generator("doc_list", 'Doc/data/stable_abi.dat')
+def gen_doc_annotations(manifest, args, outfile):
+    """Generate/check the stable ABI list for documentation annotations"""
+    writer = csv.DictWriter(
+        outfile,
+        ['role', 'name', 'added', 'ifdef_note', 'struct_abi_kind'],
+        lineterminator='\n')
+    writer.writeheader()
+    for item in manifest.select(REST_ROLES.keys(), include_abi_only=False):
+        if item.ifdef:
+            ifdef_note = manifest.contents[item.ifdef].doc
+        else:
+            ifdef_note = None
+        row = {
+            'role': REST_ROLES[item.kind],
+            'name': item.name,
+            'added': item.added,
+            'ifdef_note': ifdef_note}
+        rows = [row]
+        if item.kind == 'struct':
+            row['struct_abi_kind'] = item.struct_abi_kind
+            for member_name in item.members or ():
+                rows.append({
+                    'role': 'member',
+                    'name': f'{item.name}.{member_name}',
+                    'added': item.added})
+        writer.writerows(rows)
+
+@generator("ctypes_test", 'Lib/test/test_stable_abi_ctypes.py')
+def gen_ctypes_test(manifest, args, outfile):
+    """Generate/check the ctypes-based test for exported symbols"""
+    write = partial(print, file=outfile)
+    write(textwrap.dedent('''
+        # Generated by Tools/scripts/stable_abi.py
+
+        """Test that all symbols of the Stable ABI are accessible using ctypes
+        """
+
+        import sys
+        import unittest
+        from test.support.import_helper import import_module
+        from _testcapi import get_feature_macros
+
+        feature_macros = get_feature_macros()
+        ctypes_test = import_module('ctypes')
+
+        class TestStableABIAvailability(unittest.TestCase):
+            def test_available_symbols(self):
+
+                for symbol_name in SYMBOL_NAMES:
+                    with self.subTest(symbol_name):
+                        ctypes_test.pythonapi[symbol_name]
+
+            def test_feature_macros(self):
+                self.assertEqual(
+                    set(get_feature_macros()), EXPECTED_FEATURE_MACROS)
+
+            # The feature macros for Windows are used in creating the DLL
+            # definition, so they must be known on all platforms.
+            # If we are on Windows, we check that the hardcoded data matches
+            # the reality.
+            @unittest.skipIf(sys.platform != "win32", "Windows specific test")
+            def test_windows_feature_macros(self):
+                for name, value in WINDOWS_FEATURE_MACROS.items():
+                    if value != 'maybe':
+                        with self.subTest(name):
+                            self.assertEqual(feature_macros[name], value)
+
+        SYMBOL_NAMES = (
+    '''))
+    items = manifest.select(
+        {'function', 'data'},
+        include_abi_only=True,
+    )
+    optional_items = {}
+    for item in items:
+        if item.name in (
+                # Some symbols aren't exported on all platforms.
+                # This is a bug: https://bugs.python.org/issue44133
+                'PyModule_Create2', 'PyModule_FromDefAndSpec2',
+            ):
+            continue
+        if item.ifdef:
+            optional_items.setdefault(item.ifdef, []).append(item.name)
+        else:
+            write(f'    "{item.name}",')
+    write(")")
+    for ifdef, names in optional_items.items():
+        write(f"if feature_macros[{ifdef!r}]:")
+        write(f"    SYMBOL_NAMES += (")
+        for name in names:
+            write(f"        {name!r},")
+        write("    )")
+    write("")
+    feature_macros = list(manifest.select({'feature_macro'}))
+    feature_names = sorted(m.name for m in feature_macros)
+    write(f"EXPECTED_FEATURE_MACROS = set({pprint.pformat(feature_names)})")
+
+    windows_feature_macros = {m.name: m.windows for m in feature_macros}
+    write(f"WINDOWS_FEATURE_MACROS = {pprint.pformat(windows_feature_macros)}")
+
+
+@generator("testcapi_feature_macros", 'Modules/_testcapi_feature_macros.inc')
+def gen_testcapi_feature_macros(manifest, args, outfile):
+    """Generate/check the stable ABI list for documentation annotations"""
+    write = partial(print, file=outfile)
+    write('// Generated by Tools/scripts/stable_abi.py')
+    write()
+    write('// Add an entry in dict `result` for each Stable ABI feature macro.')
+    write()
+    for macro in manifest.select({'feature_macro'}):
+        name = macro.name
+        write(f'#ifdef {name}')
+        write(f'    res = PyDict_SetItemString(result, "{name}", Py_True);')
+        write('#else')
+        write(f'    res = PyDict_SetItemString(result, "{name}", Py_False);')
+        write('#endif')
+        write('if (res) {')
+        write('    Py_DECREF(result); return NULL;')
+        write('}')
+        write()
+
+
+def generate_or_check(manifest, args, path, func):
+    """Generate/check a file with a single generator
+
+    Return True if successful; False if a comparison failed.
+    """
+
+    outfile = io.StringIO()
+    func(manifest, args, outfile)
+    generated = outfile.getvalue()
+    existing = path.read_text()
+
+    if generated != existing:
+        if args.generate:
+            path.write_text(generated)
+        else:
+            print(f'File {path} differs from expected!')
+            diff = difflib.unified_diff(
+                generated.splitlines(), existing.splitlines(),
+                str(path), '<expected>',
+                lineterm='',
+            )
+            for line in diff:
+                print(line)
+            return False
+    return True
+
+
+def do_unixy_check(manifest, args):
+    """Check headers & library using "Unixy" tools (GCC/clang, binutils)"""
+    okay = True
+
+    # Get all macros first: we'll need feature macros like HAVE_FORK and
+    # MS_WINDOWS for everything else
+    present_macros = gcc_get_limited_api_macros(['Include/Python.h'])
+    feature_macros = set(m.name for m in manifest.select({'feature_macro'}))
+    feature_macros &= present_macros
+
+    # Check that we have all needed macros
+    expected_macros = set(
+        item.name for item in manifest.select({'macro'})
+    )
+    missing_macros = expected_macros - present_macros
+    okay &= _report_unexpected_items(
+        missing_macros,
+        'Some macros from are not defined from "Include/Python.h"'
+        + 'with Py_LIMITED_API:')
+
+    expected_symbols = set(item.name for item in manifest.select(
+        {'function', 'data'}, include_abi_only=True, ifdef=feature_macros,
+    ))
+
+    # Check the static library (*.a)
+    LIBRARY = sysconfig.get_config_var("LIBRARY")
+    if not LIBRARY:
+        raise Exception("failed to get LIBRARY variable from sysconfig")
+    if os.path.exists(LIBRARY):
+        okay &= binutils_check_library(
+            manifest, LIBRARY, expected_symbols, dynamic=False)
+
+    # Check the dynamic library (*.so)
+    LDLIBRARY = sysconfig.get_config_var("LDLIBRARY")
+    if not LDLIBRARY:
+        raise Exception("failed to get LDLIBRARY variable from sysconfig")
+    okay &= binutils_check_library(
+            manifest, LDLIBRARY, expected_symbols, dynamic=False)
+
+    # Check definitions in the header files
+    expected_defs = set(item.name for item in manifest.select(
+        {'function', 'data'}, include_abi_only=False, ifdef=feature_macros,
+    ))
+    found_defs = gcc_get_limited_api_definitions(['Include/Python.h'])
+    missing_defs = expected_defs - found_defs
+    okay &= _report_unexpected_items(
+        missing_defs,
+        'Some expected declarations were not declared in '
+        + '"Include/Python.h" with Py_LIMITED_API:')
+
+    # Some Limited API macros are defined in terms of private symbols.
+    # These are not part of Limited API (even though they're defined with
+    # Py_LIMITED_API). They must be part of the Stable ABI, though.
+    private_symbols = {n for n in expected_symbols if n.startswith('_')}
+    extra_defs = found_defs - expected_defs - private_symbols
+    okay &= _report_unexpected_items(
+        extra_defs,
+        'Some extra declarations were found in "Include/Python.h" '
+        + 'with Py_LIMITED_API:')
+
+    return okay
+
+
+def _report_unexpected_items(items, msg):
+    """If there are any `items`, report them using "msg" and return false"""
+    if items:
+        print(msg, file=sys.stderr)
+        for item in sorted(items):
+            print(' -', item, file=sys.stderr)
+        return False
+    return True
+
+
+def binutils_get_exported_symbols(library, dynamic=False):
+    """Retrieve exported symbols using the nm(1) tool from binutils"""
+    # Only look at dynamic symbols
+    args = ["nm", "--no-sort"]
+    if dynamic:
+        args.append("--dynamic")
+    args.append(library)
+    proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True)
+    if proc.returncode:
+        sys.stdout.write(proc.stdout)
+        sys.exit(proc.returncode)
+
+    stdout = proc.stdout.rstrip()
+    if not stdout:
+        raise Exception("command output is empty")
+
+    for line in stdout.splitlines():
+        # Split line '0000000000001b80 D PyTextIOWrapper_Type'
+        if not line:
+            continue
+
+        parts = line.split(maxsplit=2)
+        if len(parts) < 3:
+            continue
+
+        symbol = parts[-1]
+        if MACOS and symbol.startswith("_"):
+            yield symbol[1:]
+        else:
+            yield symbol
+
+
+def binutils_check_library(manifest, library, expected_symbols, dynamic):
+    """Check that library exports all expected_symbols"""
+    available_symbols = set(binutils_get_exported_symbols(library, dynamic))
+    missing_symbols = expected_symbols - available_symbols
+    if missing_symbols:
+        print(textwrap.dedent(f"""\
+            Some symbols from the limited API are missing from {library}:
+                {', '.join(missing_symbols)}
+
+            This error means that there are some missing symbols among the
+            ones exported in the library.
+            This normally means that some symbol, function implementation or
+            a prototype belonging to a symbol in the limited API has been
+            deleted or is missing.
+        """), file=sys.stderr)
+        return False
+    return True
+
+
+def gcc_get_limited_api_macros(headers):
+    """Get all limited API macros from headers.
+
+    Runs the preprocessor over all the header files in "Include" setting
+    "-DPy_LIMITED_API" to the correct value for the running version of the
+    interpreter and extracting all macro definitions (via adding -dM to the
+    compiler arguments).
+
+    Requires Python built with a GCC-compatible compiler. (clang might work)
+    """
+
+    api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16
+
+    preprocesor_output_with_macros = subprocess.check_output(
+        sysconfig.get_config_var("CC").split()
+        + [
+            # Prevent the expansion of the exported macros so we can
+            # capture them later
+            "-DSIZEOF_WCHAR_T=4",  # The actual value is not important
+            f"-DPy_LIMITED_API={api_hexversion}",
+            "-I.",
+            "-I./Include",
+            "-dM",
+            "-E",
+        ]
+        + [str(file) for file in headers],
+        text=True,
+    )
+
+    return {
+        target
+        for target in re.findall(
+            r"#define (\w+)", preprocesor_output_with_macros
+        )
+    }
+
+
+def gcc_get_limited_api_definitions(headers):
+    """Get all limited API definitions from headers.
+
+    Run the preprocessor over all the header files in "Include" setting
+    "-DPy_LIMITED_API" to the correct value for the running version of the
+    interpreter.
+
+    The limited API symbols will be extracted from the output of this command
+    as it includes the prototypes and definitions of all the exported symbols
+    that are in the limited api.
+
+    This function does *NOT* extract the macros defined on the limited API
+
+    Requires Python built with a GCC-compatible compiler. (clang might work)
+    """
+    api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16
+    preprocesor_output = subprocess.check_output(
+        sysconfig.get_config_var("CC").split()
+        + [
+            # Prevent the expansion of the exported macros so we can capture
+            # them later
+            "-DPyAPI_FUNC=__PyAPI_FUNC",
+            "-DPyAPI_DATA=__PyAPI_DATA",
+            "-DEXPORT_DATA=__EXPORT_DATA",
+            "-D_Py_NO_RETURN=",
+            "-DSIZEOF_WCHAR_T=4",  # The actual value is not important
+            f"-DPy_LIMITED_API={api_hexversion}",
+            "-I.",
+            "-I./Include",
+            "-E",
+        ]
+        + [str(file) for file in headers],
+        text=True,
+        stderr=subprocess.DEVNULL,
+    )
+    stable_functions = set(
+        re.findall(r"__PyAPI_FUNC\(.*?\)\s*(.*?)\s*\(", preprocesor_output)
+    )
+    stable_exported_data = set(
+        re.findall(r"__EXPORT_DATA\((.*?)\)", preprocesor_output)
+    )
+    stable_data = set(
+        re.findall(r"__PyAPI_DATA\(.*?\)[\s\*\(]*([^);]*)\)?.*;", preprocesor_output)
+    )
+    return stable_data | stable_exported_data | stable_functions
+
+def check_private_names(manifest):
+    """Ensure limited API doesn't contain private names
+
+    Names prefixed by an underscore are private by definition.
+    """
+    for name, item in manifest.contents.items():
+        if name.startswith('_') and not item.abi_only:
+            raise ValueError(
+                f'`{name}` is private (underscore-prefixed) and should be '
+                + 'removed from the stable ABI list or or marked `abi_only`')
+
+def check_dump(manifest, filename):
+    """Check that manifest.dump() corresponds to the data.
+
+    Mainly useful when debugging this script.
+    """
+    dumped = tomllib.loads('\n'.join(manifest.dump()))
+    with filename.open('rb') as file:
+        from_file = tomllib.load(file)
+    if dumped != from_file:
+        print(f'Dump differs from loaded data!', file=sys.stderr)
+        diff = difflib.unified_diff(
+            pprint.pformat(dumped).splitlines(),
+            pprint.pformat(from_file).splitlines(),
+            '<dumped>', str(filename),
+            lineterm='',
+        )
+        for line in diff:
+            print(line, file=sys.stderr)
+        return False
+    else:
+        return True
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "file", type=Path, metavar='FILE',
+        help="file with the stable abi manifest",
+    )
+    parser.add_argument(
+        "--generate", action='store_true',
+        help="generate file(s), rather than just checking them",
+    )
+    parser.add_argument(
+        "--generate-all", action='store_true',
+        help="as --generate, but generate all file(s) using default filenames."
+            + " (unlike --all, does not run any extra checks)",
+    )
+    parser.add_argument(
+        "-a", "--all", action='store_true',
+        help="run all available checks using default filenames",
+    )
+    parser.add_argument(
+        "-l", "--list", action='store_true',
+        help="list available generators and their default filenames; then exit",
+    )
+    parser.add_argument(
+        "--dump", action='store_true',
+        help="dump the manifest contents (used for debugging the parser)",
+    )
+
+    actions_group = parser.add_argument_group('actions')
+    for gen in generators:
+        actions_group.add_argument(
+            gen.arg_name, dest=gen.var_name,
+            type=str, nargs="?", default=MISSING,
+            metavar='FILENAME',
+            help=gen.__doc__,
+        )
+    actions_group.add_argument(
+        '--unixy-check', action='store_true',
+        help=do_unixy_check.__doc__,
+    )
+    args = parser.parse_args()
+
+    base_path = args.file.parent.parent
+
+    if args.list:
+        for gen in generators:
+            print(f'{gen.arg_name}: {base_path / gen.default_path}')
+        sys.exit(0)
+
+    run_all_generators = args.generate_all
+
+    if args.generate_all:
+        args.generate = True
+
+    if args.all:
+        run_all_generators = True
+        args.unixy_check = True
+
+    try:
+        file = args.file.open('rb')
+    except FileNotFoundError as err:
+        if args.file.suffix == '.txt':
+            # Provide a better error message
+            suggestion = args.file.with_suffix('.toml')
+            raise FileNotFoundError(
+                f'{args.file} not found. Did you mean {suggestion} ?') from err
+        raise
+    with file:
+        manifest = parse_manifest(file)
+
+    check_private_names(manifest)
+
+    # Remember results of all actions (as booleans).
+    # At the end we'll check that at least one action was run,
+    # and also fail if any are false.
+    results = {}
+
+    if args.dump:
+        for line in manifest.dump():
+            print(line)
+        results['dump'] = check_dump(manifest, args.file)
+
+    for gen in generators:
+        filename = getattr(args, gen.var_name)
+        if filename is None or (run_all_generators and filename is MISSING):
+            filename = base_path / gen.default_path
+        elif filename is MISSING:
+            continue
+
+        results[gen.var_name] = generate_or_check(manifest, args, filename, gen)
+
+    if args.unixy_check:
+        results['unixy_check'] = do_unixy_check(manifest, args)
+
+    if not results:
+        if args.generate:
+            parser.error('No file specified. Use --help for usage.')
+        parser.error('No check specified. Use --help for usage.')
+
+    failed_results = [name for name, result in results.items() if not result]
+
+    if failed_results:
+        raise Exception(f"""
+        These checks related to the stable ABI did not succeed:
+            {', '.join(failed_results)}
+
+        If you see diffs in the output, files derived from the stable
+        ABI manifest the were not regenerated.
+        Run `make regen-limited-abi` to fix this.
+
+        Otherwise, see the error(s) above.
+
+        The stable ABI manifest is at: {args.file}
+        Note that there is a process to follow when modifying it.
+
+        You can read more about the limited API and its contracts at:
+
+        https://docs.python.org/3/c-api/stable.html
+
+        And in PEP 384:
+
+        https://peps.python.org/pep-0384/
+        """)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/build/umarshal.py b/Tools/build/umarshal.py
new file mode 100644
index 0000000..f61570c
--- /dev/null
+++ b/Tools/build/umarshal.py
@@ -0,0 +1,325 @@
+# Implementat marshal.loads() in pure Python
+
+import ast
+
+from typing import Any, Tuple
+
+
+class Type:
+    # Adapted from marshal.c
+    NULL                = ord('0')
+    NONE                = ord('N')
+    FALSE               = ord('F')
+    TRUE                = ord('T')
+    STOPITER            = ord('S')
+    ELLIPSIS            = ord('.')
+    INT                 = ord('i')
+    INT64               = ord('I')
+    FLOAT               = ord('f')
+    BINARY_FLOAT        = ord('g')
+    COMPLEX             = ord('x')
+    BINARY_COMPLEX      = ord('y')
+    LONG                = ord('l')
+    STRING              = ord('s')
+    INTERNED            = ord('t')
+    REF                 = ord('r')
+    TUPLE               = ord('(')
+    LIST                = ord('[')
+    DICT                = ord('{')
+    CODE                = ord('c')
+    UNICODE             = ord('u')
+    UNKNOWN             = ord('?')
+    SET                 = ord('<')
+    FROZENSET           = ord('>')
+    ASCII               = ord('a')
+    ASCII_INTERNED      = ord('A')
+    SMALL_TUPLE         = ord(')')
+    SHORT_ASCII         = ord('z')
+    SHORT_ASCII_INTERNED = ord('Z')
+
+
+FLAG_REF = 0x80  # with a type, add obj to index
+
+NULL = object()  # marker
+
+# Cell kinds
+CO_FAST_LOCAL = 0x20
+CO_FAST_CELL = 0x40
+CO_FAST_FREE = 0x80
+
+
+class Code:
+    def __init__(self, **kwds: Any):
+        self.__dict__.update(kwds)
+
+    def __repr__(self) -> str:
+        return f"Code(**{self.__dict__})"
+
+    co_localsplusnames: Tuple[str]
+    co_localspluskinds: Tuple[int]
+
+    def get_localsplus_names(self, select_kind: int) -> Tuple[str, ...]:
+        varnames: list[str] = []
+        for name, kind in zip(self.co_localsplusnames,
+                              self.co_localspluskinds):
+            if kind & select_kind:
+                varnames.append(name)
+        return tuple(varnames)
+
+    @property
+    def co_varnames(self) -> Tuple[str, ...]:
+        return self.get_localsplus_names(CO_FAST_LOCAL)
+
+    @property
+    def co_cellvars(self) -> Tuple[str, ...]:
+        return self.get_localsplus_names(CO_FAST_CELL)
+
+    @property
+    def co_freevars(self) -> Tuple[str, ...]:
+        return self.get_localsplus_names(CO_FAST_FREE)
+
+    @property
+    def co_nlocals(self) -> int:
+        return len(self.co_varnames)
+
+
+class Reader:
+    # A fairly literal translation of the marshal reader.
+
+    def __init__(self, data: bytes):
+        self.data: bytes = data
+        self.end: int = len(self.data)
+        self.pos: int = 0
+        self.refs: list[Any] = []
+        self.level: int = 0
+
+    def r_string(self, n: int) -> bytes:
+        assert 0 <= n <= self.end - self.pos
+        buf = self.data[self.pos : self.pos + n]
+        self.pos += n
+        return buf
+
+    def r_byte(self) -> int:
+        buf = self.r_string(1)
+        return buf[0]
+
+    def r_short(self) -> int:
+        buf = self.r_string(2)
+        x = buf[0]
+        x |= buf[1] << 8
+        x |= -(x & (1<<15))  # Sign-extend
+        return x
+
+    def r_long(self) -> int:
+        buf = self.r_string(4)
+        x = buf[0]
+        x |= buf[1] << 8
+        x |= buf[2] << 16
+        x |= buf[3] << 24
+        x |= -(x & (1<<31))  # Sign-extend
+        return x
+
+    def r_long64(self) -> int:
+        buf = self.r_string(8)
+        x = buf[0]
+        x |= buf[1] << 8
+        x |= buf[2] << 16
+        x |= buf[3] << 24
+        x |= buf[1] << 32
+        x |= buf[1] << 40
+        x |= buf[1] << 48
+        x |= buf[1] << 56
+        x |= -(x & (1<<63))  # Sign-extend
+        return x
+
+    def r_PyLong(self) -> int:
+        n = self.r_long()
+        size = abs(n)
+        x = 0
+        # Pray this is right
+        for i in range(size):
+            x |= self.r_short() << i*15
+        if n < 0:
+            x = -x
+        return x
+
+    def r_float_bin(self) -> float:
+        buf = self.r_string(8)
+        import struct  # Lazy import to avoid breaking UNIX build
+        return struct.unpack("d", buf)[0]
+
+    def r_float_str(self) -> float:
+        n = self.r_byte()
+        buf = self.r_string(n)
+        return ast.literal_eval(buf.decode("ascii"))
+
+    def r_ref_reserve(self, flag: int) -> int:
+        if flag:
+            idx = len(self.refs)
+            self.refs.append(None)
+            return idx
+        else:
+            return 0
+
+    def r_ref_insert(self, obj: Any, idx: int, flag: int) -> Any:
+        if flag:
+            self.refs[idx] = obj
+        return obj
+
+    def r_ref(self, obj: Any, flag: int) -> Any:
+        assert flag & FLAG_REF
+        self.refs.append(obj)
+        return obj
+
+    def r_object(self) -> Any:
+        old_level = self.level
+        try:
+            return self._r_object()
+        finally:
+            self.level = old_level
+
+    def _r_object(self) -> Any:
+        code = self.r_byte()
+        flag = code & FLAG_REF
+        type = code & ~FLAG_REF
+        # print("  "*self.level + f"{code} {flag} {type} {chr(type)!r}")
+        self.level += 1
+
+        def R_REF(obj: Any) -> Any:
+            if flag:
+                obj = self.r_ref(obj, flag)
+            return obj
+
+        if type == Type.NULL:
+            return NULL
+        elif type == Type.NONE:
+            return None
+        elif type == Type.ELLIPSIS:
+            return Ellipsis
+        elif type == Type.FALSE:
+            return False
+        elif type == Type.TRUE:
+            return True
+        elif type == Type.INT:
+            return R_REF(self.r_long())
+        elif type == Type.INT64:
+            return R_REF(self.r_long64())
+        elif type == Type.LONG:
+            return R_REF(self.r_PyLong())
+        elif type == Type.FLOAT:
+            return R_REF(self.r_float_str())
+        elif type == Type.BINARY_FLOAT:
+            return R_REF(self.r_float_bin())
+        elif type == Type.COMPLEX:
+            return R_REF(complex(self.r_float_str(),
+                                    self.r_float_str()))
+        elif type == Type.BINARY_COMPLEX:
+            return R_REF(complex(self.r_float_bin(),
+                                    self.r_float_bin()))
+        elif type == Type.STRING:
+            n = self.r_long()
+            return R_REF(self.r_string(n))
+        elif type == Type.ASCII_INTERNED or type == Type.ASCII:
+            n = self.r_long()
+            return R_REF(self.r_string(n).decode("ascii"))
+        elif type == Type.SHORT_ASCII_INTERNED or type == Type.SHORT_ASCII:
+            n = self.r_byte()
+            return R_REF(self.r_string(n).decode("ascii"))
+        elif type == Type.INTERNED or type == Type.UNICODE:
+            n = self.r_long()
+            return R_REF(self.r_string(n).decode("utf8", "surrogatepass"))
+        elif type == Type.SMALL_TUPLE:
+            n = self.r_byte()
+            idx = self.r_ref_reserve(flag)
+            retval: Any = tuple(self.r_object() for _ in range(n))
+            self.r_ref_insert(retval, idx, flag)
+            return retval
+        elif type == Type.TUPLE:
+            n = self.r_long()
+            idx = self.r_ref_reserve(flag)
+            retval = tuple(self.r_object() for _ in range(n))
+            self.r_ref_insert(retval, idx, flag)
+            return retval
+        elif type == Type.LIST:
+            n = self.r_long()
+            retval = R_REF([])
+            for _ in range(n):
+                retval.append(self.r_object())
+            return retval
+        elif type == Type.DICT:
+            retval = R_REF({})
+            while True:
+                key = self.r_object()
+                if key == NULL:
+                    break
+                val = self.r_object()
+                retval[key] = val
+            return retval
+        elif type == Type.SET:
+            n = self.r_long()
+            retval = R_REF(set())
+            for _ in range(n):
+                v = self.r_object()
+                retval.add(v)
+            return retval
+        elif type == Type.FROZENSET:
+            n = self.r_long()
+            s: set[Any] = set()
+            idx = self.r_ref_reserve(flag)
+            for _ in range(n):
+                v = self.r_object()
+                s.add(v)
+            retval = frozenset(s)
+            self.r_ref_insert(retval, idx, flag)
+            return retval
+        elif type == Type.CODE:
+            retval = R_REF(Code())
+            retval.co_argcount = self.r_long()
+            retval.co_posonlyargcount = self.r_long()
+            retval.co_kwonlyargcount = self.r_long()
+            retval.co_stacksize = self.r_long()
+            retval.co_flags = self.r_long()
+            retval.co_code = self.r_object()
+            retval.co_consts = self.r_object()
+            retval.co_names = self.r_object()
+            retval.co_localsplusnames = self.r_object()
+            retval.co_localspluskinds = self.r_object()
+            retval.co_filename = self.r_object()
+            retval.co_name = self.r_object()
+            retval.co_qualname = self.r_object()
+            retval.co_firstlineno = self.r_long()
+            retval.co_linetable = self.r_object()
+            retval.co_exceptiontable = self.r_object()
+            return retval
+        elif type == Type.REF:
+            n = self.r_long()
+            retval = self.refs[n]
+            assert retval is not None
+            return retval
+        else:
+            breakpoint()
+            raise AssertionError(f"Unknown type {type} {chr(type)!r}")
+
+
+def loads(data: bytes) -> Any:
+    assert isinstance(data, bytes)
+    r = Reader(data)
+    return r.r_object()
+
+
+def main():
+    # Test
+    import marshal, pprint
+    sample = {'foo': {(42, "bar", 3.14)}}
+    data = marshal.dumps(sample)
+    retval = loads(data)
+    assert retval == sample, retval
+    sample = main.__code__
+    data = marshal.dumps(sample)
+    retval = loads(data)
+    assert isinstance(retval, Code), retval
+    pprint.pprint(retval.__dict__)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/build/update_file.py b/Tools/build/update_file.py
new file mode 100644
index 0000000..b4182c1
--- /dev/null
+++ b/Tools/build/update_file.py
@@ -0,0 +1,92 @@
+"""
+A script that replaces an old file with a new one, only if the contents
+actually changed.  If not, the new file is simply deleted.
+
+This avoids wholesale rebuilds when a code (re)generation phase does not
+actually change the in-tree generated code.
+"""
+
+import contextlib
+import os
+import os.path
+import sys
+
+
+@contextlib.contextmanager
+def updating_file_with_tmpfile(filename, tmpfile=None):
+    """A context manager for updating a file via a temp file.
+
+    The context manager provides two open files: the source file open
+    for reading, and the temp file, open for writing.
+
+    Upon exiting: both files are closed, and the source file is replaced
+    with the temp file.
+    """
+    # XXX Optionally use tempfile.TemporaryFile?
+    if not tmpfile:
+        tmpfile = filename + '.tmp'
+    elif os.path.isdir(tmpfile):
+        tmpfile = os.path.join(tmpfile, filename + '.tmp')
+
+    with open(filename, 'rb') as infile:
+        line = infile.readline()
+
+    if line.endswith(b'\r\n'):
+        newline = "\r\n"
+    elif line.endswith(b'\r'):
+        newline = "\r"
+    elif line.endswith(b'\n'):
+        newline = "\n"
+    else:
+        raise ValueError(f"unknown end of line: {filename}: {line!a}")
+
+    with open(tmpfile, 'w', newline=newline) as outfile:
+        with open(filename) as infile:
+            yield infile, outfile
+    update_file_with_tmpfile(filename, tmpfile)
+
+
+def update_file_with_tmpfile(filename, tmpfile, *, create=False):
+    try:
+        targetfile = open(filename, 'rb')
+    except FileNotFoundError:
+        if not create:
+            raise  # re-raise
+        outcome = 'created'
+        os.replace(tmpfile, filename)
+    else:
+        with targetfile:
+            old_contents = targetfile.read()
+        with open(tmpfile, 'rb') as f:
+            new_contents = f.read()
+        # Now compare!
+        if old_contents != new_contents:
+            outcome = 'updated'
+            os.replace(tmpfile, filename)
+        else:
+            outcome = 'same'
+            os.unlink(tmpfile)
+    return outcome
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--create', action='store_true')
+    parser.add_argument('--exitcode', action='store_true')
+    parser.add_argument('filename', help='path to be updated')
+    parser.add_argument('tmpfile', help='path with new contents')
+    args = parser.parse_args()
+    kwargs = vars(args)
+    setexitcode = kwargs.pop('exitcode')
+
+    outcome = update_file_with_tmpfile(**kwargs)
+    if setexitcode:
+        if outcome == 'same':
+            sys.exit(0)
+        elif outcome == 'updated':
+            sys.exit(1)
+        elif outcome == 'created':
+            sys.exit(2)
+        else:
+            raise NotImplementedError
diff --git a/Tools/build/verify_ensurepip_wheels.py b/Tools/build/verify_ensurepip_wheels.py
new file mode 100755
index 0000000..044d1fd
--- /dev/null
+++ b/Tools/build/verify_ensurepip_wheels.py
@@ -0,0 +1,98 @@
+#! /usr/bin/env python3
+
+"""
+Compare checksums for wheels in :mod:`ensurepip` against the Cheeseshop.
+
+When GitHub Actions executes the script, output is formatted accordingly.
+https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-notice-message
+"""
+
+import hashlib
+import json
+import os
+import re
+from pathlib import Path
+from urllib.request import urlopen
+
+PACKAGE_NAMES = ("pip", "setuptools")
+ENSURE_PIP_ROOT = Path(__file__).parent.parent.parent / "Lib/ensurepip"
+WHEEL_DIR = ENSURE_PIP_ROOT / "_bundled"
+ENSURE_PIP_INIT_PY_TEXT = (ENSURE_PIP_ROOT / "__init__.py").read_text(encoding="utf-8")
+GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true"
+
+
+def print_notice(file_path: str, message: str) -> None:
+    if GITHUB_ACTIONS:
+        message = f"::notice file={file_path}::{message}"
+    print(message, end="\n\n")
+
+
+def print_error(file_path: str, message: str) -> None:
+    if GITHUB_ACTIONS:
+        message = f"::error file={file_path}::{message}"
+    print(message, end="\n\n")
+
+
+def verify_wheel(package_name: str) -> bool:
+    # Find the package on disk
+    package_path = next(WHEEL_DIR.glob(f"{package_name}*.whl"), None)
+    if not package_path:
+        print_error("", f"Could not find a {package_name} wheel on disk.")
+        return False
+
+    print(f"Verifying checksum for {package_path}.")
+
+    # Find the version of the package used by ensurepip
+    package_version_match = re.search(
+        f'_{package_name.upper()}_VERSION = "([^"]+)', ENSURE_PIP_INIT_PY_TEXT
+    )
+    if not package_version_match:
+        print_error(
+            package_path,
+            f"No {package_name} version found in Lib/ensurepip/__init__.py.",
+        )
+        return False
+    package_version = package_version_match[1]
+
+    # Get the SHA 256 digest from the Cheeseshop
+    try:
+        raw_text = urlopen(f"https://pypi.org/pypi/{package_name}/json").read()
+    except (OSError, ValueError):
+        print_error(package_path, f"Could not fetch JSON metadata for {package_name}.")
+        return False
+
+    release_files = json.loads(raw_text)["releases"][package_version]
+    for release_info in release_files:
+        if package_path.name != release_info["filename"]:
+            continue
+        expected_digest = release_info["digests"].get("sha256", "")
+        break
+    else:
+        print_error(package_path, f"No digest for {package_name} found from PyPI.")
+        return False
+
+    # Compute the SHA 256 digest of the wheel on disk
+    actual_digest = hashlib.sha256(package_path.read_bytes()).hexdigest()
+
+    print(f"Expected digest: {expected_digest}")
+    print(f"Actual digest:   {actual_digest}")
+
+    if actual_digest != expected_digest:
+        print_error(
+            package_path, f"Failed to verify the checksum of the {package_name} wheel."
+        )
+        return False
+
+    print_notice(
+        package_path,
+        f"Successfully verified the checksum of the {package_name} wheel.",
+    )
+    return True
+
+
+if __name__ == "__main__":
+    exit_status = 0
+    for package_name in PACKAGE_NAMES:
+        if not verify_wheel(package_name):
+            exit_status = 1
+    raise SystemExit(exit_status)