summaryrefslogtreecommitdiffstats
path: root/Tools/scripts
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2022-10-17 10:01:00 (GMT)
committerGitHub <noreply@github.com>2022-10-17 10:01:00 (GMT)
commit1863302d61a7a5dd8b8d345a00f0ee242c7c10bf (patch)
treea1e41af02147e2a14155d5b19d7b68bbb31c3f6f /Tools/scripts
parenteae7dad40255bad42e4abce53ff8143dcbc66af5 (diff)
downloadcpython-1863302d61a7a5dd8b8d345a00f0ee242c7c10bf.zip
cpython-1863302d61a7a5dd8b8d345a00f0ee242c7c10bf.tar.gz
cpython-1863302d61a7a5dd8b8d345a00f0ee242c7c10bf.tar.bz2
gh-97669: Create Tools/build/ directory (#97963)
Create Tools/build/ directory. Move the following scripts from Tools/scripts/ to Tools/build/: * check_extension_modules.py * deepfreeze.py * freeze_modules.py * generate_global_objects.py * generate_levenshtein_examples.py * generate_opcode_h.py * generate_re_casefix.py * generate_sre_constants.py * generate_stdlib_module_names.py * generate_token.py * parse_html5_entities.py * smelly.py * stable_abi.py * umarshal.py * update_file.py * verify_ensurepip_wheels.py Update references to these scripts.
Diffstat (limited to 'Tools/scripts')
-rw-r--r--Tools/scripts/check_extension_modules.py484
-rw-r--r--Tools/scripts/deepfreeze.py504
-rw-r--r--Tools/scripts/freeze_modules.py733
-rw-r--r--Tools/scripts/generate_global_objects.py381
-rw-r--r--Tools/scripts/generate_levenshtein_examples.py70
-rw-r--r--Tools/scripts/generate_opcode_h.py199
-rwxr-xr-xTools/scripts/generate_re_casefix.py94
-rwxr-xr-xTools/scripts/generate_sre_constants.py78
-rw-r--r--Tools/scripts/generate_stdlib_module_names.py137
-rwxr-xr-xTools/scripts/generate_token.py275
-rwxr-xr-xTools/scripts/parse_html5_entities.py114
-rwxr-xr-xTools/scripts/smelly.py173
-rwxr-xr-xTools/scripts/stable_abi.py754
-rw-r--r--Tools/scripts/umarshal.py325
-rw-r--r--Tools/scripts/update_file.py92
-rwxr-xr-xTools/scripts/verify_ensurepip_wheels.py98
16 files changed, 0 insertions, 4511 deletions
diff --git a/Tools/scripts/check_extension_modules.py b/Tools/scripts/check_extension_modules.py
deleted file mode 100644
index 59239c6..0000000
--- a/Tools/scripts/check_extension_modules.py
+++ /dev/null
@@ -1,484 +0,0 @@
-"""Check extension modules
-
-The script checks shared and built-in extension modules. It verifies that the
-modules have been built and that they can be imported successfully. Missing
-modules and failed imports are reported to the user. Shared extension
-files are renamed on failed import.
-
-Module information is parsed from several sources:
-
-- core modules hard-coded in Modules/config.c.in
-- Windows-specific modules that are hard-coded in PC/config.c
-- MODULE_{name}_STATE entries in Makefile (provided through sysconfig)
-- Various makesetup files:
- - $(srcdir)/Modules/Setup
- - Modules/Setup.[local|bootstrap|stdlib] files, which are generated
- from $(srcdir)/Modules/Setup.*.in files
-
-See --help for more information
-"""
-import argparse
-import collections
-import enum
-import logging
-import os
-import pathlib
-import re
-import sys
-import sysconfig
-import warnings
-
-from importlib._bootstrap import _load as bootstrap_load
-from importlib.machinery import BuiltinImporter, ExtensionFileLoader, ModuleSpec
-from importlib.util import spec_from_file_location, spec_from_loader
-from typing import Iterable
-
-SRC_DIR = pathlib.Path(__file__).parent.parent.parent
-
-# core modules, hard-coded in Modules/config.h.in
-CORE_MODULES = {
- "_ast",
- "_imp",
- "_string",
- "_tokenize",
- "_warnings",
- "builtins",
- "gc",
- "marshal",
- "sys",
-}
-
-# Windows-only modules
-WINDOWS_MODULES = {
- "_msi",
- "_overlapped",
- "_testconsole",
- "_winapi",
- "msvcrt",
- "nt",
- "winreg",
- "winsound",
-}
-
-
-logger = logging.getLogger(__name__)
-
-parser = argparse.ArgumentParser(
- prog="check_extension_modules",
- description=__doc__,
- formatter_class=argparse.RawDescriptionHelpFormatter,
-)
-
-parser.add_argument(
- "--verbose",
- action="store_true",
- help="Verbose, report builtin, shared, and unavailable modules",
-)
-
-parser.add_argument(
- "--debug",
- action="store_true",
- help="Enable debug logging",
-)
-
-parser.add_argument(
- "--strict",
- action=argparse.BooleanOptionalAction,
- help=(
- "Strict check, fail when a module is missing or fails to import"
- "(default: no, unless env var PYTHONSTRICTEXTENSIONBUILD is set)"
- ),
- default=bool(os.environ.get("PYTHONSTRICTEXTENSIONBUILD")),
-)
-
-parser.add_argument(
- "--cross-compiling",
- action=argparse.BooleanOptionalAction,
- help=(
- "Use cross-compiling checks "
- "(default: no, unless env var _PYTHON_HOST_PLATFORM is set)."
- ),
- default="_PYTHON_HOST_PLATFORM" in os.environ,
-)
-
-parser.add_argument(
- "--list-module-names",
- action="store_true",
- help="Print a list of module names to stdout and exit",
-)
-
-
-class ModuleState(enum.Enum):
- # Makefile state "yes"
- BUILTIN = "builtin"
- SHARED = "shared"
-
- DISABLED = "disabled"
- MISSING = "missing"
- NA = "n/a"
- # disabled by Setup / makesetup rule
- DISABLED_SETUP = "disabled_setup"
-
- def __bool__(self):
- return self.value in {"builtin", "shared"}
-
-
-ModuleInfo = collections.namedtuple("ModuleInfo", "name state")
-
-
-class ModuleChecker:
- pybuilddir_txt = "pybuilddir.txt"
-
- setup_files = (
- # see end of configure.ac
- "Modules/Setup.local",
- "Modules/Setup.stdlib",
- "Modules/Setup.bootstrap",
- SRC_DIR / "Modules/Setup",
- )
-
- def __init__(self, cross_compiling: bool = False, strict: bool = False):
- self.cross_compiling = cross_compiling
- self.strict_extensions_build = strict
- self.ext_suffix = sysconfig.get_config_var("EXT_SUFFIX")
- self.platform = sysconfig.get_platform()
- self.builddir = self.get_builddir()
- self.modules = self.get_modules()
-
- self.builtin_ok = []
- self.shared_ok = []
- self.failed_on_import = []
- self.missing = []
- self.disabled_configure = []
- self.disabled_setup = []
- self.notavailable = []
-
- def check(self):
- for modinfo in self.modules:
- logger.debug("Checking '%s' (%s)", modinfo.name, self.get_location(modinfo))
- if modinfo.state == ModuleState.DISABLED:
- self.disabled_configure.append(modinfo)
- elif modinfo.state == ModuleState.DISABLED_SETUP:
- self.disabled_setup.append(modinfo)
- elif modinfo.state == ModuleState.MISSING:
- self.missing.append(modinfo)
- elif modinfo.state == ModuleState.NA:
- self.notavailable.append(modinfo)
- else:
- try:
- if self.cross_compiling:
- self.check_module_cross(modinfo)
- else:
- self.check_module_import(modinfo)
- except (ImportError, FileNotFoundError):
- self.rename_module(modinfo)
- self.failed_on_import.append(modinfo)
- else:
- if modinfo.state == ModuleState.BUILTIN:
- self.builtin_ok.append(modinfo)
- else:
- assert modinfo.state == ModuleState.SHARED
- self.shared_ok.append(modinfo)
-
- def summary(self, *, verbose: bool = False):
- longest = max([len(e.name) for e in self.modules], default=0)
-
- def print_three_column(modinfos: list[ModuleInfo]):
- names = [modinfo.name for modinfo in modinfos]
- names.sort(key=str.lower)
- # guarantee zip() doesn't drop anything
- while len(names) % 3:
- names.append("")
- for l, m, r in zip(names[::3], names[1::3], names[2::3]):
- print("%-*s %-*s %-*s" % (longest, l, longest, m, longest, r))
-
- if verbose and self.builtin_ok:
- print("The following *built-in* modules have been successfully built:")
- print_three_column(self.builtin_ok)
- print()
-
- if verbose and self.shared_ok:
- print("The following *shared* modules have been successfully built:")
- print_three_column(self.shared_ok)
- print()
-
- if self.disabled_configure:
- print("The following modules are *disabled* in configure script:")
- print_three_column(self.disabled_configure)
- print()
-
- if self.disabled_setup:
- print("The following modules are *disabled* in Modules/Setup files:")
- print_three_column(self.disabled_setup)
- print()
-
- if verbose and self.notavailable:
- print(
- f"The following modules are not available on platform '{self.platform}':"
- )
- print_three_column(self.notavailable)
- print()
-
- if self.missing:
- print("The necessary bits to build these optional modules were not found:")
- print_three_column(self.missing)
- print("To find the necessary bits, look in configure.ac and config.log.")
- print()
-
- if self.failed_on_import:
- print(
- "Following modules built successfully "
- "but were removed because they could not be imported:"
- )
- print_three_column(self.failed_on_import)
- print()
-
- if any(
- modinfo.name == "_ssl" for modinfo in self.missing + self.failed_on_import
- ):
- print("Could not build the ssl module!")
- print("Python requires a OpenSSL 1.1.1 or newer")
- if sysconfig.get_config_var("OPENSSL_LDFLAGS"):
- print("Custom linker flags may require --with-openssl-rpath=auto")
- print()
-
- disabled = len(self.disabled_configure) + len(self.disabled_setup)
- print(
- f"Checked {len(self.modules)} modules ("
- f"{len(self.builtin_ok)} built-in, "
- f"{len(self.shared_ok)} shared, "
- f"{len(self.notavailable)} n/a on {self.platform}, "
- f"{disabled} disabled, "
- f"{len(self.missing)} missing, "
- f"{len(self.failed_on_import)} failed on import)"
- )
-
- def check_strict_build(self):
- """Fail if modules are missing and it's a strict build"""
- if self.strict_extensions_build and (self.failed_on_import or self.missing):
- raise RuntimeError("Failed to build some stdlib modules")
-
- def list_module_names(self, *, all: bool = False) -> set:
- names = {modinfo.name for modinfo in self.modules}
- if all:
- names.update(WINDOWS_MODULES)
- return names
-
- def get_builddir(self) -> pathlib.Path:
- try:
- with open(self.pybuilddir_txt, encoding="utf-8") as f:
- builddir = f.read()
- except FileNotFoundError:
- logger.error("%s must be run from the top build directory", __file__)
- raise
- builddir = pathlib.Path(builddir)
- logger.debug("%s: %s", self.pybuilddir_txt, builddir)
- return builddir
-
- def get_modules(self) -> list[ModuleInfo]:
- """Get module info from sysconfig and Modules/Setup* files"""
- seen = set()
- modules = []
- # parsing order is important, first entry wins
- for modinfo in self.get_core_modules():
- modules.append(modinfo)
- seen.add(modinfo.name)
- for setup_file in self.setup_files:
- for modinfo in self.parse_setup_file(setup_file):
- if modinfo.name not in seen:
- modules.append(modinfo)
- seen.add(modinfo.name)
- for modinfo in self.get_sysconfig_modules():
- if modinfo.name not in seen:
- modules.append(modinfo)
- seen.add(modinfo.name)
- logger.debug("Found %i modules in total", len(modules))
- modules.sort()
- return modules
-
- def get_core_modules(self) -> Iterable[ModuleInfo]:
- """Get hard-coded core modules"""
- for name in CORE_MODULES:
- modinfo = ModuleInfo(name, ModuleState.BUILTIN)
- logger.debug("Found core module %s", modinfo)
- yield modinfo
-
- def get_sysconfig_modules(self) -> Iterable[ModuleInfo]:
- """Get modules defined in Makefile through sysconfig
-
- MODBUILT_NAMES: modules in *static* block
- MODSHARED_NAMES: modules in *shared* block
- MODDISABLED_NAMES: modules in *disabled* block
- """
- moddisabled = set(sysconfig.get_config_var("MODDISABLED_NAMES").split())
- if self.cross_compiling:
- modbuiltin = set(sysconfig.get_config_var("MODBUILT_NAMES").split())
- else:
- modbuiltin = set(sys.builtin_module_names)
-
- for key, value in sysconfig.get_config_vars().items():
- if not key.startswith("MODULE_") or not key.endswith("_STATE"):
- continue
- if value not in {"yes", "disabled", "missing", "n/a"}:
- raise ValueError(f"Unsupported value '{value}' for {key}")
-
- modname = key[7:-6].lower()
- if modname in moddisabled:
- # Setup "*disabled*" rule
- state = ModuleState.DISABLED_SETUP
- elif value in {"disabled", "missing", "n/a"}:
- state = ModuleState(value)
- elif modname in modbuiltin:
- assert value == "yes"
- state = ModuleState.BUILTIN
- else:
- assert value == "yes"
- state = ModuleState.SHARED
-
- modinfo = ModuleInfo(modname, state)
- logger.debug("Found %s in Makefile", modinfo)
- yield modinfo
-
- def parse_setup_file(self, setup_file: pathlib.Path) -> Iterable[ModuleInfo]:
- """Parse a Modules/Setup file"""
- assign_var = re.compile(r"^\w+=") # EGG_SPAM=foo
- # default to static module
- state = ModuleState.BUILTIN
- logger.debug("Parsing Setup file %s", setup_file)
- with open(setup_file, encoding="utf-8") as f:
- for line in f:
- line = line.strip()
- if not line or line.startswith("#") or assign_var.match(line):
- continue
- match line.split():
- case ["*shared*"]:
- state = ModuleState.SHARED
- case ["*static*"]:
- state = ModuleState.BUILTIN
- case ["*disabled*"]:
- state = ModuleState.DISABLED
- case ["*noconfig*"]:
- state = None
- case [*items]:
- if state == ModuleState.DISABLED:
- # *disabled* can disable multiple modules per line
- for item in items:
- modinfo = ModuleInfo(item, state)
- logger.debug("Found %s in %s", modinfo, setup_file)
- yield modinfo
- elif state in {ModuleState.SHARED, ModuleState.BUILTIN}:
- # *shared* and *static*, first item is the name of the module.
- modinfo = ModuleInfo(items[0], state)
- logger.debug("Found %s in %s", modinfo, setup_file)
- yield modinfo
-
- def get_spec(self, modinfo: ModuleInfo) -> ModuleSpec:
- """Get ModuleSpec for builtin or extension module"""
- if modinfo.state == ModuleState.SHARED:
- location = os.fspath(self.get_location(modinfo))
- loader = ExtensionFileLoader(modinfo.name, location)
- return spec_from_file_location(modinfo.name, location, loader=loader)
- elif modinfo.state == ModuleState.BUILTIN:
- return spec_from_loader(modinfo.name, loader=BuiltinImporter)
- else:
- raise ValueError(modinfo)
-
- def get_location(self, modinfo: ModuleInfo) -> pathlib.Path:
- """Get shared library location in build directory"""
- if modinfo.state == ModuleState.SHARED:
- return self.builddir / f"{modinfo.name}{self.ext_suffix}"
- else:
- return None
-
- def _check_file(self, modinfo: ModuleInfo, spec: ModuleSpec):
- """Check that the module file is present and not empty"""
- if spec.loader is BuiltinImporter:
- return
- try:
- st = os.stat(spec.origin)
- except FileNotFoundError:
- logger.error("%s (%s) is missing", modinfo.name, spec.origin)
- raise
- if not st.st_size:
- raise ImportError(f"{spec.origin} is an empty file")
-
- def check_module_import(self, modinfo: ModuleInfo):
- """Attempt to import module and report errors"""
- spec = self.get_spec(modinfo)
- self._check_file(modinfo, spec)
- try:
- with warnings.catch_warnings():
- # ignore deprecation warning from deprecated modules
- warnings.simplefilter("ignore", DeprecationWarning)
- bootstrap_load(spec)
- except ImportError as e:
- logger.error("%s failed to import: %s", modinfo.name, e)
- raise
- except Exception as e:
- logger.exception("Importing extension '%s' failed!", modinfo.name)
- raise
-
- def check_module_cross(self, modinfo: ModuleInfo):
- """Sanity check for cross compiling"""
- spec = self.get_spec(modinfo)
- self._check_file(modinfo, spec)
-
- def rename_module(self, modinfo: ModuleInfo) -> None:
- """Rename module file"""
- if modinfo.state == ModuleState.BUILTIN:
- logger.error("Cannot mark builtin module '%s' as failed!", modinfo.name)
- return
-
- failed_name = f"{modinfo.name}_failed{self.ext_suffix}"
- builddir_path = self.get_location(modinfo)
- if builddir_path.is_symlink():
- symlink = builddir_path
- module_path = builddir_path.resolve().relative_to(os.getcwd())
- failed_path = module_path.parent / failed_name
- else:
- symlink = None
- module_path = builddir_path
- failed_path = self.builddir / failed_name
-
- # remove old failed file
- failed_path.unlink(missing_ok=True)
- # remove symlink
- if symlink is not None:
- symlink.unlink(missing_ok=True)
- # rename shared extension file
- try:
- module_path.rename(failed_path)
- except FileNotFoundError:
- logger.debug("Shared extension file '%s' does not exist.", module_path)
- else:
- logger.debug("Rename '%s' -> '%s'", module_path, failed_path)
-
-
-def main():
- args = parser.parse_args()
- if args.debug:
- args.verbose = True
- logging.basicConfig(
- level=logging.DEBUG if args.debug else logging.INFO,
- format="[%(levelname)s] %(message)s",
- )
-
- checker = ModuleChecker(
- cross_compiling=args.cross_compiling,
- strict=args.strict,
- )
- if args.list_module_names:
- names = checker.list_module_names(all=True)
- for name in sorted(names):
- print(name)
- else:
- checker.check()
- checker.summary(verbose=args.verbose)
- try:
- checker.check_strict_build()
- except RuntimeError as e:
- parser.exit(1, f"\nError: {e}\n")
-
-
-if __name__ == "__main__":
- main()
diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py
deleted file mode 100644
index 28ac2b1..0000000
--- a/Tools/scripts/deepfreeze.py
+++ /dev/null
@@ -1,504 +0,0 @@
-"""Deep freeze
-
-The script may be executed by _bootstrap_python interpreter.
-Shared library extension modules are not available in that case.
-On Windows, and in cross-compilation cases, it is executed
-by Python 3.10, and 3.11 features are not available.
-"""
-import argparse
-import ast
-import builtins
-import collections
-import contextlib
-import os
-import re
-import time
-import types
-from typing import Dict, FrozenSet, TextIO, Tuple
-
-import umarshal
-from generate_global_objects import get_identifiers_and_strings
-
-verbose = False
-identifiers, strings = get_identifiers_and_strings()
-
-# This must be kept in sync with opcode.py
-RESUME = 151
-
-def isprintable(b: bytes) -> bool:
- return all(0x20 <= c < 0x7f for c in b)
-
-
-def make_string_literal(b: bytes) -> str:
- res = ['"']
- if isprintable(b):
- res.append(b.decode("ascii").replace("\\", "\\\\").replace("\"", "\\\""))
- else:
- for i in b:
- res.append(f"\\x{i:02x}")
- res.append('"')
- return "".join(res)
-
-
-CO_FAST_LOCAL = 0x20
-CO_FAST_CELL = 0x40
-CO_FAST_FREE = 0x80
-
-
-def get_localsplus(code: types.CodeType):
- a = collections.defaultdict(int)
- for name in code.co_varnames:
- a[name] |= CO_FAST_LOCAL
- for name in code.co_cellvars:
- a[name] |= CO_FAST_CELL
- for name in code.co_freevars:
- a[name] |= CO_FAST_FREE
- return tuple(a.keys()), bytes(a.values())
-
-
-def get_localsplus_counts(code: types.CodeType,
- names: Tuple[str, ...],
- kinds: bytes) -> Tuple[int, int, int, int]:
- nlocals = 0
- nplaincellvars = 0
- ncellvars = 0
- nfreevars = 0
- assert len(names) == len(kinds)
- for name, kind in zip(names, kinds):
- if kind & CO_FAST_LOCAL:
- nlocals += 1
- if kind & CO_FAST_CELL:
- ncellvars += 1
- elif kind & CO_FAST_CELL:
- ncellvars += 1
- nplaincellvars += 1
- elif kind & CO_FAST_FREE:
- nfreevars += 1
- assert nlocals == len(code.co_varnames) == code.co_nlocals, \
- (nlocals, len(code.co_varnames), code.co_nlocals)
- assert ncellvars == len(code.co_cellvars)
- assert nfreevars == len(code.co_freevars)
- assert len(names) == nlocals + nplaincellvars + nfreevars
- return nlocals, nplaincellvars, ncellvars, nfreevars
-
-
-PyUnicode_1BYTE_KIND = 1
-PyUnicode_2BYTE_KIND = 2
-PyUnicode_4BYTE_KIND = 4
-
-
-def analyze_character_width(s: str) -> Tuple[int, bool]:
- maxchar = ' '
- for c in s:
- maxchar = max(maxchar, c)
- ascii = False
- if maxchar <= '\xFF':
- kind = PyUnicode_1BYTE_KIND
- ascii = maxchar <= '\x7F'
- elif maxchar <= '\uFFFF':
- kind = PyUnicode_2BYTE_KIND
- else:
- kind = PyUnicode_4BYTE_KIND
- return kind, ascii
-
-
-def removesuffix(base: str, suffix: str) -> str:
- if base.endswith(suffix):
- return base[:len(base) - len(suffix)]
- return base
-
-class Printer:
-
- def __init__(self, file: TextIO) -> None:
- self.level = 0
- self.file = file
- self.cache: Dict[tuple[type, object, str], str] = {}
- self.hits, self.misses = 0, 0
- self.patchups: list[str] = []
- self.deallocs: list[str] = []
- self.interns: list[str] = []
- self.write('#include "Python.h"')
- self.write('#include "internal/pycore_gc.h"')
- self.write('#include "internal/pycore_code.h"')
- self.write('#include "internal/pycore_frame.h"')
- self.write('#include "internal/pycore_long.h"')
- self.write("")
-
- @contextlib.contextmanager
- def indent(self) -> None:
- save_level = self.level
- try:
- self.level += 1
- yield
- finally:
- self.level = save_level
-
- def write(self, arg: str) -> None:
- self.file.writelines((" "*self.level, arg, "\n"))
-
- @contextlib.contextmanager
- def block(self, prefix: str, suffix: str = "") -> None:
- self.write(prefix + " {")
- with self.indent():
- yield
- self.write("}" + suffix)
-
- def object_head(self, typename: str) -> None:
- with self.block(".ob_base =", ","):
- self.write(f".ob_refcnt = 999999999,")
- self.write(f".ob_type = &{typename},")
-
- def object_var_head(self, typename: str, size: int) -> None:
- with self.block(".ob_base =", ","):
- self.object_head(typename)
- self.write(f".ob_size = {size},")
-
- def field(self, obj: object, name: str) -> None:
- self.write(f".{name} = {getattr(obj, name)},")
-
- def generate_bytes(self, name: str, b: bytes) -> str:
- if b == b"":
- return "(PyObject *)&_Py_SINGLETON(bytes_empty)"
- if len(b) == 1:
- return f"(PyObject *)&_Py_SINGLETON(bytes_characters[{b[0]}])"
- self.write("static")
- with self.indent():
- with self.block("struct"):
- self.write("PyObject_VAR_HEAD")
- self.write("Py_hash_t ob_shash;")
- self.write(f"char ob_sval[{len(b) + 1}];")
- with self.block(f"{name} =", ";"):
- self.object_var_head("PyBytes_Type", len(b))
- self.write(".ob_shash = -1,")
- self.write(f".ob_sval = {make_string_literal(b)},")
- return f"& {name}.ob_base.ob_base"
-
- def generate_unicode(self, name: str, s: str) -> str:
- if s in strings:
- return f"&_Py_STR({strings[s]})"
- if s in identifiers:
- return f"&_Py_ID({s})"
- if re.match(r'\A[A-Za-z0-9_]+\Z', s):
- name = f"const_str_{s}"
- kind, ascii = analyze_character_width(s)
- if kind == PyUnicode_1BYTE_KIND:
- datatype = "uint8_t"
- elif kind == PyUnicode_2BYTE_KIND:
- datatype = "uint16_t"
- else:
- datatype = "uint32_t"
- self.write("static")
- with self.indent():
- with self.block("struct"):
- if ascii:
- self.write("PyASCIIObject _ascii;")
- else:
- self.write("PyCompactUnicodeObject _compact;")
- self.write(f"{datatype} _data[{len(s)+1}];")
- with self.block(f"{name} =", ";"):
- if ascii:
- with self.block("._ascii =", ","):
- self.object_head("PyUnicode_Type")
- self.write(f".length = {len(s)},")
- self.write(".hash = -1,")
- with self.block(".state =", ","):
- self.write(".kind = 1,")
- self.write(".compact = 1,")
- self.write(".ascii = 1,")
- self.write(f"._data = {make_string_literal(s.encode('ascii'))},")
- return f"& {name}._ascii.ob_base"
- else:
- with self.block("._compact =", ","):
- with self.block("._base =", ","):
- self.object_head("PyUnicode_Type")
- self.write(f".length = {len(s)},")
- self.write(".hash = -1,")
- with self.block(".state =", ","):
- self.write(f".kind = {kind},")
- self.write(".compact = 1,")
- self.write(".ascii = 0,")
- utf8 = s.encode('utf-8')
- self.write(f'.utf8 = {make_string_literal(utf8)},')
- self.write(f'.utf8_length = {len(utf8)},')
- with self.block(f"._data =", ","):
- for i in range(0, len(s), 16):
- data = s[i:i+16]
- self.write(", ".join(map(str, map(ord, data))) + ",")
- return f"& {name}._compact._base.ob_base"
-
-
- def generate_code(self, name: str, code: types.CodeType) -> str:
- # The ordering here matches PyCode_NewWithPosOnlyArgs()
- # (but see below).
- co_consts = self.generate(name + "_consts", code.co_consts)
- co_names = self.generate(name + "_names", code.co_names)
- co_filename = self.generate(name + "_filename", code.co_filename)
- co_name = self.generate(name + "_name", code.co_name)
- co_qualname = self.generate(name + "_qualname", code.co_qualname)
- co_linetable = self.generate(name + "_linetable", code.co_linetable)
- co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable)
- # These fields are not directly accessible
- localsplusnames, localspluskinds = get_localsplus(code)
- co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames)
- co_localspluskinds = self.generate(name + "_localspluskinds", localspluskinds)
- # Derived values
- nlocals, nplaincellvars, ncellvars, nfreevars = \
- get_localsplus_counts(code, localsplusnames, localspluskinds)
- co_code_adaptive = make_string_literal(code.co_code)
- self.write("static")
- with self.indent():
- self.write(f"struct _PyCode_DEF({len(code.co_code)})")
- with self.block(f"{name} =", ";"):
- self.object_var_head("PyCode_Type", len(code.co_code) // 2)
- # But the ordering here must match that in cpython/code.h
- # (which is a pain because we tend to reorder those for perf)
- # otherwise MSVC doesn't like it.
- self.write(f".co_consts = {co_consts},")
- self.write(f".co_names = {co_names},")
- self.write(f".co_exceptiontable = {co_exceptiontable},")
- self.field(code, "co_flags")
- self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,")
- self.write("._co_linearray_entry_size = 0,")
- self.field(code, "co_argcount")
- self.field(code, "co_posonlyargcount")
- self.field(code, "co_kwonlyargcount")
- self.write(f".co_framesize = {code.co_stacksize + len(localsplusnames)} + FRAME_SPECIALS_SIZE,")
- self.field(code, "co_stacksize")
- self.field(code, "co_firstlineno")
- self.write(f".co_nlocalsplus = {len(localsplusnames)},")
- self.field(code, "co_nlocals")
- self.write(f".co_nplaincellvars = {nplaincellvars},")
- self.write(f".co_ncellvars = {ncellvars},")
- self.write(f".co_nfreevars = {nfreevars},")
- self.write(f".co_localsplusnames = {co_localsplusnames},")
- self.write(f".co_localspluskinds = {co_localspluskinds},")
- self.write(f".co_filename = {co_filename},")
- self.write(f".co_name = {co_name},")
- self.write(f".co_qualname = {co_qualname},")
- self.write(f".co_linetable = {co_linetable},")
- self.write(f"._co_cached = NULL,")
- self.write("._co_linearray = NULL,")
- self.write(f".co_code_adaptive = {co_code_adaptive},")
- for i, op in enumerate(code.co_code[::2]):
- if op == RESUME:
- self.write(f"._co_firsttraceable = {i},")
- break
- name_as_code = f"(PyCodeObject *)&{name}"
- self.deallocs.append(f"_PyStaticCode_Dealloc({name_as_code});")
- self.interns.append(f"_PyStaticCode_InternStrings({name_as_code})")
- return f"& {name}.ob_base.ob_base"
-
- def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str:
- if len(t) == 0:
- return f"(PyObject *)& _Py_SINGLETON(tuple_empty)"
- items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)]
- self.write("static")
- with self.indent():
- with self.block("struct"):
- self.write("PyGC_Head _gc_head;")
- with self.block("struct", "_object;"):
- self.write("PyObject_VAR_HEAD")
- if t:
- self.write(f"PyObject *ob_item[{len(t)}];")
- with self.block(f"{name} =", ";"):
- with self.block("._object =", ","):
- self.object_var_head("PyTuple_Type", len(t))
- if items:
- with self.block(f".ob_item =", ","):
- for item in items:
- self.write(item + ",")
- return f"& {name}._object.ob_base.ob_base"
-
- def _generate_int_for_bits(self, name: str, i: int, digit: int) -> None:
- sign = -1 if i < 0 else 0 if i == 0 else +1
- i = abs(i)
- digits: list[int] = []
- while i:
- i, rem = divmod(i, digit)
- digits.append(rem)
- self.write("static")
- with self.indent():
- with self.block("struct"):
- self.write("PyObject_VAR_HEAD")
- self.write(f"digit ob_digit[{max(1, len(digits))}];")
- with self.block(f"{name} =", ";"):
- self.object_var_head("PyLong_Type", sign*len(digits))
- if digits:
- ds = ", ".join(map(str, digits))
- self.write(f".ob_digit = {{ {ds} }},")
-
- def generate_int(self, name: str, i: int) -> str:
- if -5 <= i <= 256:
- return f"(PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + {i}]"
- if i >= 0:
- name = f"const_int_{i}"
- else:
- name = f"const_int_negative_{abs(i)}"
- if abs(i) < 2**15:
- self._generate_int_for_bits(name, i, 2**15)
- else:
- connective = "if"
- for bits_in_digit in 15, 30:
- self.write(f"#{connective} PYLONG_BITS_IN_DIGIT == {bits_in_digit}")
- self._generate_int_for_bits(name, i, 2**bits_in_digit)
- connective = "elif"
- self.write("#else")
- self.write('#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"')
- self.write("#endif")
- # If neither clause applies, it won't compile
- return f"& {name}.ob_base.ob_base"
-
- def generate_float(self, name: str, x: float) -> str:
- with self.block(f"static PyFloatObject {name} =", ";"):
- self.object_head("PyFloat_Type")
- self.write(f".ob_fval = {x},")
- return f"&{name}.ob_base"
-
- def generate_complex(self, name: str, z: complex) -> str:
- with self.block(f"static PyComplexObject {name} =", ";"):
- self.object_head("PyComplex_Type")
- self.write(f".cval = {{ {z.real}, {z.imag} }},")
- return f"&{name}.ob_base"
-
- def generate_frozenset(self, name: str, fs: FrozenSet[object]) -> str:
- try:
- fs = sorted(fs)
- except TypeError:
- # frozen set with incompatible types, fallback to repr()
- fs = sorted(fs, key=repr)
- ret = self.generate_tuple(name, tuple(fs))
- self.write("// TODO: The above tuple should be a frozenset")
- return ret
-
- def generate_file(self, module: str, code: object)-> None:
- module = module.replace(".", "_")
- self.generate(f"{module}_toplevel", code)
- with self.block(f"static void {module}_do_patchups(void)"):
- for p in self.patchups:
- self.write(p)
- self.patchups.clear()
- self.write(EPILOGUE.replace("%%NAME%%", module))
-
- def generate(self, name: str, obj: object) -> str:
- # Use repr() in the key to distinguish -0.0 from +0.0
- key = (type(obj), obj, repr(obj))
- if key in self.cache:
- self.hits += 1
- # print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}")
- return self.cache[key]
- self.misses += 1
- if isinstance(obj, (types.CodeType, umarshal.Code)) :
- val = self.generate_code(name, obj)
- elif isinstance(obj, tuple):
- val = self.generate_tuple(name, obj)
- elif isinstance(obj, str):
- val = self.generate_unicode(name, obj)
- elif isinstance(obj, bytes):
- val = self.generate_bytes(name, obj)
- elif obj is True:
- return "Py_True"
- elif obj is False:
- return "Py_False"
- elif isinstance(obj, int):
- val = self.generate_int(name, obj)
- elif isinstance(obj, float):
- val = self.generate_float(name, obj)
- elif isinstance(obj, complex):
- val = self.generate_complex(name, obj)
- elif isinstance(obj, frozenset):
- val = self.generate_frozenset(name, obj)
- elif obj is builtins.Ellipsis:
- return "Py_Ellipsis"
- elif obj is None:
- return "Py_None"
- else:
- raise TypeError(
- f"Cannot generate code for {type(obj).__name__} object")
- # print(f"Cache store {key!r:.40}: {val!r:.40}")
- self.cache[key] = val
- return val
-
-
-EPILOGUE = """
-PyObject *
-_Py_get_%%NAME%%_toplevel(void)
-{
- %%NAME%%_do_patchups();
- return Py_NewRef((PyObject *) &%%NAME%%_toplevel);
-}
-"""
-
-FROZEN_COMMENT_C = "/* Auto-generated by Programs/_freeze_module.c */"
-FROZEN_COMMENT_PY = "/* Auto-generated by Programs/_freeze_module.py */"
-
-FROZEN_DATA_LINE = r"\s*(\d+,\s*)+\s*"
-
-
-def is_frozen_header(source: str) -> bool:
- return source.startswith((FROZEN_COMMENT_C, FROZEN_COMMENT_PY))
-
-
-def decode_frozen_data(source: str) -> types.CodeType:
- lines = source.splitlines()
- while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None:
- del lines[0]
- while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None:
- del lines[-1]
- values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip())
- data = bytes(values)
- return umarshal.loads(data)
-
-
-def generate(args: list[str], output: TextIO) -> None:
- printer = Printer(output)
- for arg in args:
- file, modname = arg.rsplit(':', 1)
- with open(file, "r", encoding="utf8") as fd:
- source = fd.read()
- if is_frozen_header(source):
- code = decode_frozen_data(source)
- else:
- code = compile(fd.read(), f"<frozen {modname}>", "exec")
- printer.generate_file(modname, code)
- with printer.block(f"void\n_Py_Deepfreeze_Fini(void)"):
- for p in printer.deallocs:
- printer.write(p)
- with printer.block(f"int\n_Py_Deepfreeze_Init(void)"):
- for p in printer.interns:
- with printer.block(f"if ({p} < 0)"):
- printer.write("return -1;")
- printer.write("return 0;")
- if verbose:
- print(f"Cache hits: {printer.hits}, misses: {printer.misses}")
-
-
-parser = argparse.ArgumentParser()
-parser.add_argument("-o", "--output", help="Defaults to deepfreeze.c", default="deepfreeze.c")
-parser.add_argument("-v", "--verbose", action="store_true", help="Print diagnostics")
-parser.add_argument('args', nargs="+", help="Input file and module name (required) in file:modname format")
-
-@contextlib.contextmanager
-def report_time(label: str):
- t0 = time.time()
- try:
- yield
- finally:
- t1 = time.time()
- if verbose:
- print(f"{label}: {t1-t0:.3f} sec")
-
-
-def main() -> None:
- global verbose
- args = parser.parse_args()
- verbose = args.verbose
- output = args.output
- with open(output, "w", encoding="utf-8") as file:
- with report_time("generate"):
- generate(args.args, file)
- if verbose:
- print(f"Wrote {os.path.getsize(output)} bytes to {output}")
-
-
-if __name__ == "__main__":
- main()
diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py
deleted file mode 100644
index aa1e4fe..0000000
--- a/Tools/scripts/freeze_modules.py
+++ /dev/null
@@ -1,733 +0,0 @@
-"""Freeze modules and regen related files (e.g. Python/frozen.c).
-
-See the notes at the top of Python/frozen.c for more info.
-"""
-
-from collections import namedtuple
-import hashlib
-import os
-import ntpath
-import posixpath
-import argparse
-from update_file import updating_file_with_tmpfile
-
-
-ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
-ROOT_DIR = os.path.abspath(ROOT_DIR)
-FROZEN_ONLY = os.path.join(ROOT_DIR, 'Tools', 'freeze', 'flag.py')
-
-STDLIB_DIR = os.path.join(ROOT_DIR, 'Lib')
-# If FROZEN_MODULES_DIR or DEEPFROZEN_MODULES_DIR is changed then the
-# .gitattributes and .gitignore files needs to be updated.
-FROZEN_MODULES_DIR = os.path.join(ROOT_DIR, 'Python', 'frozen_modules')
-DEEPFROZEN_MODULES_DIR = os.path.join(ROOT_DIR, 'Python', 'deepfreeze')
-
-FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c')
-MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in')
-PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj')
-PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filters')
-PCBUILD_PYTHONCORE = os.path.join(ROOT_DIR, 'PCbuild', 'pythoncore.vcxproj')
-
-
-OS_PATH = 'ntpath' if os.name == 'nt' else 'posixpath'
-
-# These are modules that get frozen.
-TESTS_SECTION = 'Test module'
-FROZEN = [
- # See parse_frozen_spec() for the format.
- # In cases where the frozenid is duplicated, the first one is re-used.
- ('import system', [
- # These frozen modules are necessary for bootstrapping
- # the import system.
- 'importlib._bootstrap : _frozen_importlib',
- 'importlib._bootstrap_external : _frozen_importlib_external',
- # This module is important because some Python builds rely
- # on a builtin zip file instead of a filesystem.
- 'zipimport',
- ]),
- ('stdlib - startup, without site (python -S)', [
- 'abc',
- 'codecs',
- # For now we do not freeze the encodings, due # to the noise all
- # those extra modules add to the text printed during the build.
- # (See https://github.com/python/cpython/pull/28398#pullrequestreview-756856469.)
- #'<encodings.*>',
- 'io',
- ]),
- ('stdlib - startup, with site', [
- '_collections_abc',
- '_sitebuiltins',
- 'genericpath',
- 'ntpath',
- 'posixpath',
- # We must explicitly mark os.path as a frozen module
- # even though it will never be imported.
- f'{OS_PATH} : os.path',
- 'os',
- 'site',
- 'stat',
- ]),
- ('runpy - run module with -m', [
- "importlib.util",
- "importlib.machinery",
- "runpy",
- ]),
- (TESTS_SECTION, [
- '__hello__',
- '__hello__ : __hello_alias__',
- '__hello__ : <__phello_alias__>',
- '__hello__ : __phello_alias__.spam',
- '<__phello__.**.*>',
- f'frozen_only : __hello_only__ = {FROZEN_ONLY}',
- ]),
-]
-BOOTSTRAP = {
- 'importlib._bootstrap',
- 'importlib._bootstrap_external',
- 'zipimport',
-}
-
-
-#######################################
-# platform-specific helpers
-
-if os.path is posixpath:
- relpath_for_posix_display = os.path.relpath
-
- def relpath_for_windows_display(path, base):
- return ntpath.relpath(
- ntpath.join(*path.split(os.path.sep)),
- ntpath.join(*base.split(os.path.sep)),
- )
-
-else:
- relpath_for_windows_display = ntpath.relpath
-
- def relpath_for_posix_display(path, base):
- return posixpath.relpath(
- posixpath.join(*path.split(os.path.sep)),
- posixpath.join(*base.split(os.path.sep)),
- )
-
-
-#######################################
-# specs
-
-def parse_frozen_specs():
- seen = {}
- for section, specs in FROZEN:
- parsed = _parse_specs(specs, section, seen)
- for item in parsed:
- frozenid, pyfile, modname, ispkg, section = item
- try:
- source = seen[frozenid]
- except KeyError:
- source = FrozenSource.from_id(frozenid, pyfile)
- seen[frozenid] = source
- else:
- assert not pyfile or pyfile == source.pyfile, item
- yield FrozenModule(modname, ispkg, section, source)
-
-
-def _parse_specs(specs, section, seen):
- for spec in specs:
- info, subs = _parse_spec(spec, seen, section)
- yield info
- for info in subs or ():
- yield info
-
-
-def _parse_spec(spec, knownids=None, section=None):
- """Yield an info tuple for each module corresponding to the given spec.
-
- The info consists of: (frozenid, pyfile, modname, ispkg, section).
-
- Supported formats:
-
- frozenid
- frozenid : modname
- frozenid : modname = pyfile
-
- "frozenid" and "modname" must be valid module names (dot-separated
- identifiers). If "modname" is not provided then "frozenid" is used.
- If "pyfile" is not provided then the filename of the module
- corresponding to "frozenid" is used.
-
- Angle brackets around a frozenid (e.g. '<encodings>") indicate
- it is a package. This also means it must be an actual module
- (i.e. "pyfile" cannot have been provided). Such values can have
- patterns to expand submodules:
-
- <encodings.*> - also freeze all direct submodules
- <encodings.**.*> - also freeze the full submodule tree
-
- As with "frozenid", angle brackets around "modname" indicate
- it is a package. However, in this case "pyfile" should not
- have been provided and patterns in "modname" are not supported.
- Also, if "modname" has brackets then "frozenid" should not,
- and "pyfile" should have been provided..
- """
- frozenid, _, remainder = spec.partition(':')
- modname, _, pyfile = remainder.partition('=')
- frozenid = frozenid.strip()
- modname = modname.strip()
- pyfile = pyfile.strip()
-
- submodules = None
- if modname.startswith('<') and modname.endswith('>'):
- assert check_modname(frozenid), spec
- modname = modname[1:-1]
- assert check_modname(modname), spec
- if frozenid in knownids:
- pass
- elif pyfile:
- assert not os.path.isdir(pyfile), spec
- else:
- pyfile = _resolve_module(frozenid, ispkg=False)
- ispkg = True
- elif pyfile:
- assert check_modname(frozenid), spec
- assert not knownids or frozenid not in knownids, spec
- assert check_modname(modname), spec
- assert not os.path.isdir(pyfile), spec
- ispkg = False
- elif knownids and frozenid in knownids:
- assert check_modname(frozenid), spec
- assert check_modname(modname), spec
- ispkg = False
- else:
- assert not modname or check_modname(modname), spec
- resolved = iter(resolve_modules(frozenid))
- frozenid, pyfile, ispkg = next(resolved)
- if not modname:
- modname = frozenid
- if ispkg:
- pkgid = frozenid
- pkgname = modname
- pkgfiles = {pyfile: pkgid}
- def iter_subs():
- for frozenid, pyfile, ispkg in resolved:
- if pkgname:
- modname = frozenid.replace(pkgid, pkgname, 1)
- else:
- modname = frozenid
- if pyfile:
- if pyfile in pkgfiles:
- frozenid = pkgfiles[pyfile]
- pyfile = None
- elif ispkg:
- pkgfiles[pyfile] = frozenid
- yield frozenid, pyfile, modname, ispkg, section
- submodules = iter_subs()
-
- info = (frozenid, pyfile or None, modname, ispkg, section)
- return info, submodules
-
-
-#######################################
-# frozen source files
-
-class FrozenSource(namedtuple('FrozenSource', 'id pyfile frozenfile deepfreezefile')):
-
- @classmethod
- def from_id(cls, frozenid, pyfile=None):
- if not pyfile:
- pyfile = os.path.join(STDLIB_DIR, *frozenid.split('.')) + '.py'
- #assert os.path.exists(pyfile), (frozenid, pyfile)
- frozenfile = resolve_frozen_file(frozenid, FROZEN_MODULES_DIR)
- deepfreezefile = resolve_frozen_file(frozenid, DEEPFROZEN_MODULES_DIR)
- return cls(frozenid, pyfile, frozenfile, deepfreezefile)
-
- @property
- def frozenid(self):
- return self.id
-
- @property
- def modname(self):
- if self.pyfile.startswith(STDLIB_DIR):
- return self.id
- return None
-
- @property
- def symbol(self):
- # This matches what we do in Programs/_freeze_module.c:
- name = self.frozenid.replace('.', '_')
- return '_Py_M__' + name
-
- @property
- def ispkg(self):
- if not self.pyfile:
- return False
- elif self.frozenid.endswith('.__init__'):
- return False
- else:
- return os.path.basename(self.pyfile) == '__init__.py'
-
- @property
- def isbootstrap(self):
- return self.id in BOOTSTRAP
-
-
-def resolve_frozen_file(frozenid, destdir):
- """Return the filename corresponding to the given frozen ID.
-
- For stdlib modules the ID will always be the full name
- of the source module.
- """
- if not isinstance(frozenid, str):
- try:
- frozenid = frozenid.frozenid
- except AttributeError:
- raise ValueError(f'unsupported frozenid {frozenid!r}')
- # We use a consistent naming convention for all frozen modules.
- frozenfile = f'{frozenid}.h'
- if not destdir:
- return frozenfile
- return os.path.join(destdir, frozenfile)
-
-
-#######################################
-# frozen modules
-
-class FrozenModule(namedtuple('FrozenModule', 'name ispkg section source')):
-
- def __getattr__(self, name):
- return getattr(self.source, name)
-
- @property
- def modname(self):
- return self.name
-
- @property
- def orig(self):
- return self.source.modname
-
- @property
- def isalias(self):
- orig = self.source.modname
- if not orig:
- return True
- return self.name != orig
-
- def summarize(self):
- source = self.source.modname
- if source:
- source = f'<{source}>'
- else:
- source = relpath_for_posix_display(self.pyfile, ROOT_DIR)
- return {
- 'module': self.name,
- 'ispkg': self.ispkg,
- 'source': source,
- 'frozen': os.path.basename(self.frozenfile),
- 'checksum': _get_checksum(self.frozenfile),
- }
-
-
-def _iter_sources(modules):
- seen = set()
- for mod in modules:
- if mod.source not in seen:
- yield mod.source
- seen.add(mod.source)
-
-
-#######################################
-# generic helpers
-
-def _get_checksum(filename):
- with open(filename, "rb") as infile:
- contents = infile.read()
- m = hashlib.sha256()
- m.update(contents)
- return m.hexdigest()
-
-
-def resolve_modules(modname, pyfile=None):
- if modname.startswith('<') and modname.endswith('>'):
- if pyfile:
- assert os.path.isdir(pyfile) or os.path.basename(pyfile) == '__init__.py', pyfile
- ispkg = True
- modname = modname[1:-1]
- rawname = modname
- # For now, we only expect match patterns at the end of the name.
- _modname, sep, match = modname.rpartition('.')
- if sep:
- if _modname.endswith('.**'):
- modname = _modname[:-3]
- match = f'**.{match}'
- elif match and not match.isidentifier():
- modname = _modname
- # Otherwise it's a plain name so we leave it alone.
- else:
- match = None
- else:
- ispkg = False
- rawname = modname
- match = None
-
- if not check_modname(modname):
- raise ValueError(f'not a valid module name ({rawname})')
-
- if not pyfile:
- pyfile = _resolve_module(modname, ispkg=ispkg)
- elif os.path.isdir(pyfile):
- pyfile = _resolve_module(modname, pyfile, ispkg)
- yield modname, pyfile, ispkg
-
- if match:
- pkgdir = os.path.dirname(pyfile)
- yield from iter_submodules(modname, pkgdir, match)
-
-
-def check_modname(modname):
- return all(n.isidentifier() for n in modname.split('.'))
-
-
-def iter_submodules(pkgname, pkgdir=None, match='*'):
- if not pkgdir:
- pkgdir = os.path.join(STDLIB_DIR, *pkgname.split('.'))
- if not match:
- match = '**.*'
- match_modname = _resolve_modname_matcher(match, pkgdir)
-
- def _iter_submodules(pkgname, pkgdir):
- for entry in sorted(os.scandir(pkgdir), key=lambda e: e.name):
- matched, recursive = match_modname(entry.name)
- if not matched:
- continue
- modname = f'{pkgname}.{entry.name}'
- if modname.endswith('.py'):
- yield modname[:-3], entry.path, False
- elif entry.is_dir():
- pyfile = os.path.join(entry.path, '__init__.py')
- # We ignore namespace packages.
- if os.path.exists(pyfile):
- yield modname, pyfile, True
- if recursive:
- yield from _iter_submodules(modname, entry.path)
-
- return _iter_submodules(pkgname, pkgdir)
-
-
-def _resolve_modname_matcher(match, rootdir=None):
- if isinstance(match, str):
- if match.startswith('**.'):
- recursive = True
- pat = match[3:]
- assert match
- else:
- recursive = False
- pat = match
-
- if pat == '*':
- def match_modname(modname):
- return True, recursive
- else:
- raise NotImplementedError(match)
- elif callable(match):
- match_modname = match(rootdir)
- else:
- raise ValueError(f'unsupported matcher {match!r}')
- return match_modname
-
-
-def _resolve_module(modname, pathentry=STDLIB_DIR, ispkg=False):
- assert pathentry, pathentry
- pathentry = os.path.normpath(pathentry)
- assert os.path.isabs(pathentry)
- if ispkg:
- return os.path.join(pathentry, *modname.split('.'), '__init__.py')
- return os.path.join(pathentry, *modname.split('.')) + '.py'
-
-
-#######################################
-# regenerating dependent files
-
-def find_marker(lines, marker, file):
- for pos, line in enumerate(lines):
- if marker in line:
- return pos
- raise Exception(f"Can't find {marker!r} in file {file}")
-
-
-def replace_block(lines, start_marker, end_marker, replacements, file):
- start_pos = find_marker(lines, start_marker, file)
- end_pos = find_marker(lines, end_marker, file)
- if end_pos <= start_pos:
- raise Exception(f"End marker {end_marker!r} "
- f"occurs before start marker {start_marker!r} "
- f"in file {file}")
- replacements = [line.rstrip() + '\n' for line in replacements]
- return lines[:start_pos + 1] + replacements + lines[end_pos:]
-
-
-def regen_frozen(modules, frozen_modules: bool):
- headerlines = []
- parentdir = os.path.dirname(FROZEN_FILE)
- if frozen_modules:
- for src in _iter_sources(modules):
- # Adding a comment to separate sections here doesn't add much,
- # so we don't.
- header = relpath_for_posix_display(src.frozenfile, parentdir)
- headerlines.append(f'#include "{header}"')
-
- externlines = []
- bootstraplines = []
- stdliblines = []
- testlines = []
- aliaslines = []
- indent = ' '
- lastsection = None
- for mod in modules:
- if mod.isbootstrap:
- lines = bootstraplines
- elif mod.section == TESTS_SECTION:
- lines = testlines
- else:
- lines = stdliblines
- if mod.section != lastsection:
- if lastsection is not None:
- lines.append('')
- lines.append(f'/* {mod.section} */')
- lastsection = mod.section
-
- # Also add a extern declaration for the corresponding
- # deepfreeze-generated function.
- orig_name = mod.source.id
- code_name = orig_name.replace(".", "_")
- get_code_name = "_Py_get_%s_toplevel" % code_name
- externlines.append("extern PyObject *%s(void);" % get_code_name)
-
- symbol = mod.symbol
- pkg = 'true' if mod.ispkg else 'false'
- if not frozen_modules:
- line = ('{"%s", NULL, 0, %s, GET_CODE(%s)},'
- ) % (mod.name, pkg, code_name)
- else:
- line = ('{"%s", %s, (int)sizeof(%s), %s, GET_CODE(%s)},'
- ) % (mod.name, symbol, symbol, pkg, code_name)
- lines.append(line)
-
- if mod.isalias:
- if not mod.orig:
- entry = '{"%s", NULL},' % (mod.name,)
- elif mod.source.ispkg:
- entry = '{"%s", "<%s"},' % (mod.name, mod.orig)
- else:
- entry = '{"%s", "%s"},' % (mod.name, mod.orig)
- aliaslines.append(indent + entry)
-
- for lines in (bootstraplines, stdliblines, testlines):
- # TODO: Is this necessary any more?
- if not lines[0]:
- del lines[0]
- for i, line in enumerate(lines):
- if line:
- lines[i] = indent + line
-
- print(f'# Updating {os.path.relpath(FROZEN_FILE)}')
- with updating_file_with_tmpfile(FROZEN_FILE) as (infile, outfile):
- lines = infile.readlines()
- # TODO: Use more obvious markers, e.g.
- # $START GENERATED FOOBAR$ / $END GENERATED FOOBAR$
- lines = replace_block(
- lines,
- "/* Includes for frozen modules: */",
- "/* End includes */",
- headerlines,
- FROZEN_FILE,
- )
- lines = replace_block(
- lines,
- "/* Start extern declarations */",
- "/* End extern declarations */",
- externlines,
- FROZEN_FILE,
- )
- lines = replace_block(
- lines,
- "static const struct _frozen bootstrap_modules[] =",
- "/* bootstrap sentinel */",
- bootstraplines,
- FROZEN_FILE,
- )
- lines = replace_block(
- lines,
- "static const struct _frozen stdlib_modules[] =",
- "/* stdlib sentinel */",
- stdliblines,
- FROZEN_FILE,
- )
- lines = replace_block(
- lines,
- "static const struct _frozen test_modules[] =",
- "/* test sentinel */",
- testlines,
- FROZEN_FILE,
- )
- lines = replace_block(
- lines,
- "const struct _module_alias aliases[] =",
- "/* aliases sentinel */",
- aliaslines,
- FROZEN_FILE,
- )
- outfile.writelines(lines)
-
-
-def regen_makefile(modules):
- pyfiles = []
- frozenfiles = []
- rules = ['']
- deepfreezerules = ["Python/deepfreeze/deepfreeze.c: $(DEEPFREEZE_DEPS)",
- "\t$(PYTHON_FOR_FREEZE) $(srcdir)/Tools/scripts/deepfreeze.py \\"]
- for src in _iter_sources(modules):
- frozen_header = relpath_for_posix_display(src.frozenfile, ROOT_DIR)
- frozenfiles.append(f'\t\t{frozen_header} \\')
-
- pyfile = relpath_for_posix_display(src.pyfile, ROOT_DIR)
- pyfiles.append(f'\t\t{pyfile} \\')
-
- if src.isbootstrap:
- freezecmd = '$(FREEZE_MODULE_BOOTSTRAP)'
- freezedep = '$(FREEZE_MODULE_BOOTSTRAP_DEPS)'
- else:
- freezecmd = '$(FREEZE_MODULE)'
- freezedep = '$(FREEZE_MODULE_DEPS)'
-
- freeze = (f'{freezecmd} {src.frozenid} '
- f'$(srcdir)/{pyfile} {frozen_header}')
- rules.extend([
- f'{frozen_header}: {pyfile} {freezedep}',
- f'\t{freeze}',
- '',
- ])
- deepfreezerules.append(f"\t{frozen_header}:{src.frozenid} \\")
- deepfreezerules.append('\t-o Python/deepfreeze/deepfreeze.c')
- pyfiles[-1] = pyfiles[-1].rstrip(" \\")
- frozenfiles[-1] = frozenfiles[-1].rstrip(" \\")
-
- print(f'# Updating {os.path.relpath(MAKEFILE)}')
- with updating_file_with_tmpfile(MAKEFILE) as (infile, outfile):
- lines = infile.readlines()
- lines = replace_block(
- lines,
- "FROZEN_FILES_IN =",
- "# End FROZEN_FILES_IN",
- pyfiles,
- MAKEFILE,
- )
- lines = replace_block(
- lines,
- "FROZEN_FILES_OUT =",
- "# End FROZEN_FILES_OUT",
- frozenfiles,
- MAKEFILE,
- )
- lines = replace_block(
- lines,
- "# BEGIN: freezing modules",
- "# END: freezing modules",
- rules,
- MAKEFILE,
- )
- lines = replace_block(
- lines,
- "# BEGIN: deepfreeze modules",
- "# END: deepfreeze modules",
- deepfreezerules,
- MAKEFILE,
- )
- outfile.writelines(lines)
-
-
-def regen_pcbuild(modules):
- projlines = []
- filterlines = []
- corelines = []
- deepfreezerules = ['\t<Exec Command=\'$(PythonForBuild) "$(PySourcePath)Tools\\scripts\\deepfreeze.py" ^']
- for src in _iter_sources(modules):
- pyfile = relpath_for_windows_display(src.pyfile, ROOT_DIR)
- header = relpath_for_windows_display(src.frozenfile, ROOT_DIR)
- intfile = ntpath.splitext(ntpath.basename(header))[0] + '.g.h'
- projlines.append(f' <None Include="..\\{pyfile}">')
- projlines.append(f' <ModName>{src.frozenid}</ModName>')
- projlines.append(f' <IntFile>$(IntDir){intfile}</IntFile>')
- projlines.append(f' <OutFile>$(PySourcePath){header}</OutFile>')
- projlines.append(f' </None>')
-
- filterlines.append(f' <None Include="..\\{pyfile}">')
- filterlines.append(' <Filter>Python Files</Filter>')
- filterlines.append(' </None>')
- deepfreezerules.append(f'\t\t "$(PySourcePath){header}:{src.frozenid}" ^')
- deepfreezerules.append('\t\t "-o" "$(PySourcePath)Python\\deepfreeze\\deepfreeze.c"\'/>' )
-
- corelines.append(f' <ClCompile Include="..\\Python\\deepfreeze\\deepfreeze.c" />')
-
- print(f'# Updating {os.path.relpath(PCBUILD_PROJECT)}')
- with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile):
- lines = infile.readlines()
- lines = replace_block(
- lines,
- '<!-- BEGIN frozen modules -->',
- '<!-- END frozen modules -->',
- projlines,
- PCBUILD_PROJECT,
- )
- outfile.writelines(lines)
- with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile):
- lines = infile.readlines()
- lines = replace_block(
- lines,
- '<!-- BEGIN deepfreeze rule -->',
- '<!-- END deepfreeze rule -->',
- deepfreezerules,
- PCBUILD_PROJECT,
- )
- outfile.writelines(lines)
- print(f'# Updating {os.path.relpath(PCBUILD_FILTERS)}')
- with updating_file_with_tmpfile(PCBUILD_FILTERS) as (infile, outfile):
- lines = infile.readlines()
- lines = replace_block(
- lines,
- '<!-- BEGIN frozen modules -->',
- '<!-- END frozen modules -->',
- filterlines,
- PCBUILD_FILTERS,
- )
- outfile.writelines(lines)
- print(f'# Updating {os.path.relpath(PCBUILD_PYTHONCORE)}')
- with updating_file_with_tmpfile(PCBUILD_PYTHONCORE) as (infile, outfile):
- lines = infile.readlines()
- lines = replace_block(
- lines,
- '<!-- BEGIN deepfreeze -->',
- '<!-- END deepfreeze -->',
- corelines,
- PCBUILD_FILTERS,
- )
- outfile.writelines(lines)
-
-
-#######################################
-# the script
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--frozen-modules", action="store_true",
- help="Use both frozen and deepfrozen modules. (default: uses only deepfrozen modules)")
-
-def main():
- args = parser.parse_args()
- frozen_modules: bool = args.frozen_modules
- # Expand the raw specs, preserving order.
- modules = list(parse_frozen_specs())
-
- # Regen build-related files.
- regen_makefile(modules)
- regen_pcbuild(modules)
- regen_frozen(modules, frozen_modules)
-
-
-if __name__ == '__main__':
- main()
diff --git a/Tools/scripts/generate_global_objects.py b/Tools/scripts/generate_global_objects.py
deleted file mode 100644
index 0432bf5..0000000
--- a/Tools/scripts/generate_global_objects.py
+++ /dev/null
@@ -1,381 +0,0 @@
-import contextlib
-import io
-import os.path
-import re
-
-__file__ = os.path.abspath(__file__)
-ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
-INTERNAL = os.path.join(ROOT, 'Include', 'internal')
-
-
-IGNORED = {
- 'ACTION', # Python/_warnings.c
- 'ATTR', # Python/_warnings.c and Objects/funcobject.c
- 'DUNDER', # Objects/typeobject.c
- 'RDUNDER', # Objects/typeobject.c
- 'SPECIAL', # Objects/weakrefobject.c
- 'NAME', # Objects/typeobject.c
-}
-IDENTIFIERS = [
- # from ADD() Python/_warnings.c
- 'default',
- 'ignore',
-
- # from GET_WARNINGS_ATTR() in Python/_warnings.c
- 'WarningMessage',
- '_showwarnmsg',
- '_warn_unawaited_coroutine',
- 'defaultaction',
- 'filters',
- 'onceregistry',
-
- # from WRAP_METHOD() in Objects/weakrefobject.c
- '__bytes__',
- '__reversed__',
-
- # from COPY_ATTR() in Objects/funcobject.c
- '__module__',
- '__name__',
- '__qualname__',
- '__doc__',
- '__annotations__',
-
- # from SLOT* in Objects/typeobject.c
- '__abs__',
- '__add__',
- '__aiter__',
- '__and__',
- '__anext__',
- '__await__',
- '__bool__',
- '__call__',
- '__contains__',
- '__del__',
- '__delattr__',
- '__delete__',
- '__delitem__',
- '__eq__',
- '__float__',
- '__floordiv__',
- '__ge__',
- '__get__',
- '__getattr__',
- '__getattribute__',
- '__getitem__',
- '__gt__',
- '__hash__',
- '__iadd__',
- '__iand__',
- '__ifloordiv__',
- '__ilshift__',
- '__imatmul__',
- '__imod__',
- '__imul__',
- '__index__',
- '__init__',
- '__int__',
- '__invert__',
- '__ior__',
- '__ipow__',
- '__irshift__',
- '__isub__',
- '__iter__',
- '__itruediv__',
- '__ixor__',
- '__le__',
- '__len__',
- '__lshift__',
- '__lt__',
- '__matmul__',
- '__mod__',
- '__mul__',
- '__ne__',
- '__neg__',
- '__new__',
- '__next__',
- '__or__',
- '__pos__',
- '__pow__',
- '__radd__',
- '__rand__',
- '__repr__',
- '__rfloordiv__',
- '__rlshift__',
- '__rmatmul__',
- '__rmod__',
- '__rmul__',
- '__ror__',
- '__rpow__',
- '__rrshift__',
- '__rshift__',
- '__rsub__',
- '__rtruediv__',
- '__rxor__',
- '__set__',
- '__setattr__',
- '__setitem__',
- '__str__',
- '__sub__',
- '__truediv__',
- '__xor__',
- '__divmod__',
- '__rdivmod__',
-]
-
-
-#######################################
-# helpers
-
-def iter_files():
- for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'):
- root = os.path.join(ROOT, name)
- for dirname, _, files in os.walk(root):
- for name in files:
- if not name.endswith(('.c', '.h')):
- continue
- yield os.path.join(dirname, name)
-
-
-def iter_global_strings():
- id_regex = re.compile(r'\b_Py_ID\((\w+)\)')
- str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')
- for filename in iter_files():
- try:
- infile = open(filename, encoding='utf-8')
- except FileNotFoundError:
- # The file must have been a temporary file.
- continue
- with infile:
- for lno, line in enumerate(infile, 1):
- for m in id_regex.finditer(line):
- identifier, = m.groups()
- yield identifier, None, filename, lno, line
- for m in str_regex.finditer(line):
- varname, string = m.groups()
- yield varname, string, filename, lno, line
-
-
-def iter_to_marker(lines, marker):
- for line in lines:
- if line.rstrip() == marker:
- break
- yield line
-
-
-class Printer:
-
- def __init__(self, file):
- self.level = 0
- self.file = file
- self.continuation = [False]
-
- @contextlib.contextmanager
- def indent(self):
- save_level = self.level
- try:
- self.level += 1
- yield
- finally:
- self.level = save_level
-
- def write(self, arg):
- eol = '\n'
- if self.continuation[-1]:
- eol = f' \\{eol}' if arg else f'\\{eol}'
- self.file.writelines((" "*self.level, arg, eol))
-
- @contextlib.contextmanager
- def block(self, prefix, suffix="", *, continuation=None):
- if continuation is None:
- continuation = self.continuation[-1]
- self.continuation.append(continuation)
-
- self.write(prefix + " {")
- with self.indent():
- yield
- self.continuation.pop()
- self.write("}" + suffix)
-
-
-@contextlib.contextmanager
-def open_for_changes(filename, orig):
- """Like open() but only write to the file if it changed."""
- outfile = io.StringIO()
- yield outfile
- text = outfile.getvalue()
- if text != orig:
- with open(filename, 'w', encoding='utf-8') as outfile:
- outfile.write(text)
- else:
- print(f'# not changed: {filename}')
-
-
-#######################################
-# the global objects
-
-START = '/* The following is auto-generated by Tools/scripts/generate_global_objects.py. */'
-END = '/* End auto-generated code */'
-
-
-def generate_global_strings(identifiers, strings):
- filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
-
- # Read the non-generated part of the file.
- with open(filename) as infile:
- orig = infile.read()
- lines = iter(orig.rstrip().splitlines())
- before = '\n'.join(iter_to_marker(lines, START))
- for _ in iter_to_marker(lines, END):
- pass
- after = '\n'.join(lines)
-
- # Generate the file.
- with open_for_changes(filename, orig) as outfile:
- printer = Printer(outfile)
- printer.write(before)
- printer.write(START)
- with printer.block('struct _Py_global_strings', ';'):
- with printer.block('struct', ' literals;'):
- for literal, name in sorted(strings.items(), key=lambda x: x[1]):
- printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
- outfile.write('\n')
- with printer.block('struct', ' identifiers;'):
- for name in sorted(identifiers):
- assert name.isidentifier(), name
- printer.write(f'STRUCT_FOR_ID({name})')
- with printer.block('struct', ' ascii[128];'):
- printer.write("PyASCIIObject _ascii;")
- printer.write("uint8_t _data[2];")
- with printer.block('struct', ' latin1[128];'):
- printer.write("PyCompactUnicodeObject _latin1;")
- printer.write("uint8_t _data[2];")
- printer.write(END)
- printer.write(after)
-
-
-def generate_runtime_init(identifiers, strings):
- # First get some info from the declarations.
- nsmallposints = None
- nsmallnegints = None
- with open(os.path.join(INTERNAL, 'pycore_global_objects.h')) as infile:
- for line in infile:
- if line.startswith('#define _PY_NSMALLPOSINTS'):
- nsmallposints = int(line.split()[-1])
- elif line.startswith('#define _PY_NSMALLNEGINTS'):
- nsmallnegints = int(line.split()[-1])
- break
- else:
- raise NotImplementedError
- assert nsmallposints and nsmallnegints
-
- # Then target the runtime initializer.
- filename = os.path.join(INTERNAL, 'pycore_runtime_init_generated.h')
-
- # Read the non-generated part of the file.
- with open(filename) as infile:
- orig = infile.read()
- lines = iter(orig.rstrip().splitlines())
- before = '\n'.join(iter_to_marker(lines, START))
- for _ in iter_to_marker(lines, END):
- pass
- after = '\n'.join(lines)
-
- # Generate the file.
- with open_for_changes(filename, orig) as outfile:
- immortal_objects = []
- printer = Printer(outfile)
- printer.write(before)
- printer.write(START)
- with printer.block('#define _Py_global_objects_INIT', continuation=True):
- with printer.block('.singletons =', ','):
- # Global int objects.
- with printer.block('.small_ints =', ','):
- for i in range(-nsmallnegints, nsmallposints):
- printer.write(f'_PyLong_DIGIT_INIT({i}),')
- immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + {i}]')
- printer.write('')
- # Global bytes objects.
- printer.write('.bytes_empty = _PyBytes_SIMPLE_INIT(0, 0),')
- immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_empty)')
- with printer.block('.bytes_characters =', ','):
- for i in range(256):
- printer.write(f'_PyBytes_CHAR_INIT({i}),')
- immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_characters)[{i}]')
- printer.write('')
- # Global strings.
- with printer.block('.strings =', ','):
- with printer.block('.literals =', ','):
- for literal, name in sorted(strings.items(), key=lambda x: x[1]):
- printer.write(f'INIT_STR({name}, "{literal}"),')
- immortal_objects.append(f'(PyObject *)&_Py_STR({name})')
- with printer.block('.identifiers =', ','):
- for name in sorted(identifiers):
- assert name.isidentifier(), name
- printer.write(f'INIT_ID({name}),')
- immortal_objects.append(f'(PyObject *)&_Py_ID({name})')
- with printer.block('.ascii =', ','):
- for i in range(128):
- printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),')
- immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).ascii[{i}]')
- with printer.block('.latin1 =', ','):
- for i in range(128, 256):
- utf8 = ['"']
- for c in chr(i).encode('utf-8'):
- utf8.append(f"\\x{c:02x}")
- utf8.append('"')
- printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}", {"".join(utf8)}),')
- immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).latin1[{i} - 128]')
- printer.write('')
- with printer.block('.tuple_empty =', ','):
- printer.write('.ob_base = _PyVarObject_IMMORTAL_INIT(&PyTuple_Type, 0)')
- immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(tuple_empty)')
- printer.write('')
- printer.write("static inline void")
- with printer.block("_PyUnicode_InitStaticStrings(void)"):
- printer.write(f'PyObject *string;')
- for i in sorted(identifiers):
- # This use of _Py_ID() is ignored by iter_global_strings()
- # since iter_files() ignores .h files.
- printer.write(f'string = &_Py_ID({i});')
- printer.write(f'PyUnicode_InternInPlace(&string);')
- printer.write('')
- printer.write('#ifdef Py_DEBUG')
- printer.write("static inline void")
- with printer.block("_PyStaticObjects_CheckRefcnt(void)"):
- for i in immortal_objects:
- with printer.block(f'if (Py_REFCNT({i}) < _PyObject_IMMORTAL_REFCNT)', ';'):
- printer.write(f'_PyObject_Dump({i});')
- printer.write(f'Py_FatalError("immortal object has less refcnt than '
- 'expected _PyObject_IMMORTAL_REFCNT");')
- printer.write('#endif')
- printer.write(END)
- printer.write(after)
-
-
-def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]':
- identifiers = set(IDENTIFIERS)
- strings = {}
- for name, string, *_ in iter_global_strings():
- if string is None:
- if name not in IGNORED:
- identifiers.add(name)
- else:
- if string not in strings:
- strings[string] = name
- elif name != strings[string]:
- raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
- return identifiers, strings
-
-
-#######################################
-# the script
-
-def main() -> None:
- identifiers, strings = get_identifiers_and_strings()
-
- generate_global_strings(identifiers, strings)
- generate_runtime_init(identifiers, strings)
-
-
-if __name__ == '__main__':
- main()
diff --git a/Tools/scripts/generate_levenshtein_examples.py b/Tools/scripts/generate_levenshtein_examples.py
deleted file mode 100644
index 5a8360f..0000000
--- a/Tools/scripts/generate_levenshtein_examples.py
+++ /dev/null
@@ -1,70 +0,0 @@
-"""Generate 10,000 unique examples for the Levenshtein short-circuit tests."""
-
-import argparse
-from functools import cache
-import json
-import os.path
-from random import choices, randrange
-
-
-# This should be in sync with Lib/traceback.py. It's not importing those values
-# because this script is being executed by PYTHON_FOR_REGEN and not by the in-tree
-# build of Python.
-_MOVE_COST = 2
-_CASE_COST = 1
-
-
-def _substitution_cost(ch_a, ch_b):
- if ch_a == ch_b:
- return 0
- if ch_a.lower() == ch_b.lower():
- return _CASE_COST
- return _MOVE_COST
-
-
-@cache
-def levenshtein(a, b):
- if not a or not b:
- return (len(a) + len(b)) * _MOVE_COST
- option1 = levenshtein(a[:-1], b[:-1]) + _substitution_cost(a[-1], b[-1])
- option2 = levenshtein(a[:-1], b) + _MOVE_COST
- option3 = levenshtein(a, b[:-1]) + _MOVE_COST
- return min(option1, option2, option3)
-
-
-def main():
- parser = argparse.ArgumentParser(description=__doc__)
- parser.add_argument('output_path', metavar='FILE', type=str)
- parser.add_argument('--overwrite', dest='overwrite', action='store_const',
- const=True, default=False,
- help='overwrite an existing test file')
-
- args = parser.parse_args()
- output_path = os.path.realpath(args.output_path)
- if not args.overwrite and os.path.isfile(output_path):
- print(f"{output_path} already exists, skipping regeneration.")
- print(
- "To force, add --overwrite to the invocation of this tool or"
- " delete the existing file."
- )
- return
-
- examples = set()
- # Create a lot of non-empty examples, which should end up with a Gauss-like
- # distribution for even costs (moves) and odd costs (case substitutions).
- while len(examples) < 9990:
- a = ''.join(choices("abcABC", k=randrange(1, 10)))
- b = ''.join(choices("abcABC", k=randrange(1, 10)))
- expected = levenshtein(a, b)
- examples.add((a, b, expected))
- # Create one empty case each for strings between 0 and 9 in length.
- for i in range(10):
- b = ''.join(choices("abcABC", k=i))
- expected = levenshtein("", b)
- examples.add(("", b, expected))
- with open(output_path, "w") as f:
- json.dump(sorted(examples), f, indent=2)
-
-
-if __name__ == "__main__":
- main()
diff --git a/Tools/scripts/generate_opcode_h.py b/Tools/scripts/generate_opcode_h.py
deleted file mode 100644
index 9ff264a..0000000
--- a/Tools/scripts/generate_opcode_h.py
+++ /dev/null
@@ -1,199 +0,0 @@
-# This script generates the opcode.h header file.
-
-import sys
-import tokenize
-
-SCRIPT_NAME = "Tools/scripts/generate_opcode_h.py"
-PYTHON_OPCODE = "Lib/opcode.py"
-
-header = f"""
-// Auto-generated by {SCRIPT_NAME} from {PYTHON_OPCODE}
-
-#ifndef Py_OPCODE_H
-#define Py_OPCODE_H
-#ifdef __cplusplus
-extern "C" {{
-#endif
-
-
-/* Instruction opcodes for compiled code */
-""".lstrip()
-
-footer = """
-
-#define IS_PSEUDO_OPCODE(op) (((op) >= MIN_PSEUDO_OPCODE) && ((op) <= MAX_PSEUDO_OPCODE))
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* !Py_OPCODE_H */
-"""
-
-internal_header = f"""
-// Auto-generated by {SCRIPT_NAME} from {PYTHON_OPCODE}
-
-#ifndef Py_INTERNAL_OPCODE_H
-#define Py_INTERNAL_OPCODE_H
-#ifdef __cplusplus
-extern "C" {{
-#endif
-
-#ifndef Py_BUILD_CORE
-# error "this header requires Py_BUILD_CORE define"
-#endif
-
-#include "opcode.h"
-""".lstrip()
-
-internal_footer = """
-#ifdef __cplusplus
-}
-#endif
-#endif // !Py_INTERNAL_OPCODE_H
-"""
-
-DEFINE = "#define {:<38} {:>3}\n"
-
-UINT32_MASK = (1<<32)-1
-
-def write_int_array_from_ops(name, ops, out):
- bits = 0
- for op in ops:
- bits |= 1<<op
- out.write(f"static const uint32_t {name}[9] = {{\n")
- for i in range(9):
- out.write(f" {bits & UINT32_MASK}U,\n")
- bits >>= 32
- assert bits == 0
- out.write(f"}};\n")
-
-def main(opcode_py, outfile='Include/opcode.h', internaloutfile='Include/internal/pycore_opcode.h'):
- opcode = {}
- if hasattr(tokenize, 'open'):
- fp = tokenize.open(opcode_py) # Python 3.2+
- else:
- fp = open(opcode_py) # Python 2.7
- with fp:
- code = fp.read()
- exec(code, opcode)
- opmap = opcode['opmap']
- opname = opcode['opname']
- hasarg = opcode['hasarg']
- hasconst = opcode['hasconst']
- hasjrel = opcode['hasjrel']
- hasjabs = opcode['hasjabs']
- is_pseudo = opcode['is_pseudo']
- _pseudo_ops = opcode['_pseudo_ops']
-
- HAVE_ARGUMENT = opcode["HAVE_ARGUMENT"]
- MIN_PSEUDO_OPCODE = opcode["MIN_PSEUDO_OPCODE"]
- MAX_PSEUDO_OPCODE = opcode["MAX_PSEUDO_OPCODE"]
-
- NUM_OPCODES = len(opname)
- used = [ False ] * len(opname)
- next_op = 1
-
- for name, op in opmap.items():
- used[op] = True
-
- specialized_opmap = {}
- opname_including_specialized = opname.copy()
- for name in opcode['_specialized_instructions']:
- while used[next_op]:
- next_op += 1
- specialized_opmap[name] = next_op
- opname_including_specialized[next_op] = name
- used[next_op] = True
- specialized_opmap['DO_TRACING'] = 255
- opname_including_specialized[255] = 'DO_TRACING'
- used[255] = True
-
- with (open(outfile, 'w') as fobj, open(internaloutfile, 'w') as iobj):
- fobj.write(header)
- iobj.write(internal_header)
-
- for name in opname:
- if name in opmap:
- op = opmap[name]
- if op == HAVE_ARGUMENT:
- fobj.write(DEFINE.format("HAVE_ARGUMENT", HAVE_ARGUMENT))
- if op == MIN_PSEUDO_OPCODE:
- fobj.write(DEFINE.format("MIN_PSEUDO_OPCODE", MIN_PSEUDO_OPCODE))
-
- fobj.write(DEFINE.format(name, op))
-
- if op == MAX_PSEUDO_OPCODE:
- fobj.write(DEFINE.format("MAX_PSEUDO_OPCODE", MAX_PSEUDO_OPCODE))
-
-
- for name, op in specialized_opmap.items():
- fobj.write(DEFINE.format(name, op))
-
- iobj.write("\nextern const uint8_t _PyOpcode_Caches[256];\n")
- iobj.write("\nextern const uint8_t _PyOpcode_Deopt[256];\n")
- iobj.write("\n#ifdef NEED_OPCODE_TABLES\n")
- write_int_array_from_ops("_PyOpcode_RelativeJump", opcode['hasjrel'], iobj)
- write_int_array_from_ops("_PyOpcode_Jump", opcode['hasjrel'] + opcode['hasjabs'], iobj)
-
- iobj.write("\nconst uint8_t _PyOpcode_Caches[256] = {\n")
- for i, entries in enumerate(opcode["_inline_cache_entries"]):
- if entries:
- iobj.write(f" [{opname[i]}] = {entries},\n")
- iobj.write("};\n")
-
- deoptcodes = {}
- for basic, op in opmap.items():
- if not is_pseudo(op):
- deoptcodes[basic] = basic
- for basic, family in opcode["_specializations"].items():
- for specialized in family:
- deoptcodes[specialized] = basic
- iobj.write("\nconst uint8_t _PyOpcode_Deopt[256] = {\n")
- for opt, deopt in sorted(deoptcodes.items()):
- iobj.write(f" [{opt}] = {deopt},\n")
- iobj.write("};\n")
- iobj.write("#endif // NEED_OPCODE_TABLES\n")
-
- fobj.write("\n")
- fobj.write("#define HAS_ARG(op) ((((op) >= HAVE_ARGUMENT) && (!IS_PSEUDO_OPCODE(op)))\\")
- for op in _pseudo_ops:
- if opmap[op] in hasarg:
- fobj.write(f"\n || ((op) == {op}) \\")
- fobj.write("\n )\n")
-
- fobj.write("\n")
- fobj.write("#define HAS_CONST(op) (false\\")
- for op in hasconst:
- fobj.write(f"\n || ((op) == {opname[op]}) \\")
- fobj.write("\n )\n")
-
- fobj.write("\n")
- for i, (op, _) in enumerate(opcode["_nb_ops"]):
- fobj.write(DEFINE.format(op, i))
-
- iobj.write("\n")
- iobj.write("#ifdef Py_DEBUG\n")
- iobj.write(f"static const char *const _PyOpcode_OpName[{NUM_OPCODES}] = {{\n")
- for op, name in enumerate(opname_including_specialized):
- if name[0] != "<":
- op = name
- iobj.write(f''' [{op}] = "{name}",\n''')
- iobj.write("};\n")
- iobj.write("#endif\n")
-
- iobj.write("\n")
- iobj.write("#define EXTRA_CASES \\\n")
- for i, flag in enumerate(used):
- if not flag:
- iobj.write(f" case {i}: \\\n")
- iobj.write(" ;\n")
-
- fobj.write(footer)
- iobj.write(internal_footer)
-
-
- print(f"{outfile} regenerated from {opcode_py}")
-
-
-if __name__ == '__main__':
- main(sys.argv[1], sys.argv[2], sys.argv[3])
diff --git a/Tools/scripts/generate_re_casefix.py b/Tools/scripts/generate_re_casefix.py
deleted file mode 100755
index 625b065..0000000
--- a/Tools/scripts/generate_re_casefix.py
+++ /dev/null
@@ -1,94 +0,0 @@
-#! /usr/bin/env python3
-# This script generates Lib/re/_casefix.py.
-
-import collections
-import sys
-import unicodedata
-
-def update_file(file, content):
- try:
- with open(file, 'r', encoding='utf-8') as fobj:
- if fobj.read() == content:
- return False
- except (OSError, ValueError):
- pass
- with open(file, 'w', encoding='utf-8') as fobj:
- fobj.write(content)
- return True
-
-re_casefix_template = """\
-# Auto-generated by Tools/scripts/generate_re_casefix.py.
-
-# Maps the code of lowercased character to codes of different lowercased
-# characters which have the same uppercase.
-_EXTRA_CASES = {
-%s
-}
-"""
-
-def uname(i):
- return unicodedata.name(chr(i), r'U+%04X' % i)
-
-class hexint(int):
- def __repr__(self):
- return '%#06x' % self
-
-def alpha(i):
- c = chr(i)
- return c if c.isalpha() else ascii(c)[1:-1]
-
-
-def main(outfile='Lib/re/_casefix.py'):
- # Find sets of characters which have the same uppercase.
- equivalent_chars = collections.defaultdict(str)
- for c in map(chr, range(sys.maxunicode + 1)):
- equivalent_chars[c.upper()] += c
- equivalent_chars = [t for t in equivalent_chars.values() if len(t) > 1]
-
- # List of codes of lowercased characters which have the same uppercase.
- equivalent_lower_codes = [sorted(t)
- for s in equivalent_chars
- for t in [set(ord(c.lower()) for c in s)]
- if len(t) > 1]
-
- bad_codes = []
- for t in equivalent_lower_codes:
- for i in t:
- if i > 0xffff:
- bad_codes.extend(t)
- try:
- bad_codes.append(ord(chr(i).upper()))
- except (ValueError, TypeError):
- pass
- break
- if bad_codes:
- print('Case-insensitive matching may not work correctly for character:',
- file=sys.stderr)
- for i in sorted(bad_codes):
- print(" '%s' (U+%04x, %s)" % (alpha(i), i, uname(i)),
- file=sys.stderr)
- sys.exit(1)
-
- mapping = {i: tuple(j for j in t if i != j)
- for t in equivalent_lower_codes
- for i in t}
-
- items = []
- for i, t in sorted(mapping.items()):
- items.append(' # %s: %s' % (
- uname(i),
- ', '.join(map(uname, t)),
- ))
- items.append(" %r: %r, # '%s': '%s'" % (
- hexint(i),
- tuple(map(hexint, t)),
- alpha(i),
- ''.join(map(alpha, t)),
- ))
-
- update_file(outfile, re_casefix_template % '\n'.join(items))
-
-
-if __name__ == '__main__':
- import sys
- main(*sys.argv[1:])
diff --git a/Tools/scripts/generate_sre_constants.py b/Tools/scripts/generate_sre_constants.py
deleted file mode 100755
index 7271507..0000000
--- a/Tools/scripts/generate_sre_constants.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#! /usr/bin/env python3
-# This script generates Modules/_sre/sre_constants.h from Lib/re/_constants.py.
-
-
-def update_file(file, content):
- try:
- with open(file, 'r') as fobj:
- if fobj.read() == content:
- return False
- except (OSError, ValueError):
- pass
- with open(file, 'w') as fobj:
- fobj.write(content)
- return True
-
-sre_constants_header = """\
-/*
- * Secret Labs' Regular Expression Engine
- *
- * regular expression matching engine
- *
- * Auto-generated by Tools/scripts/generate_sre_constants.py from
- * Lib/re/_constants.py.
- *
- * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
- *
- * See the sre.c file for information on usage and redistribution.
- */
-
-"""
-
-def main(
- infile="Lib/re/_constants.py",
- outfile_constants="Modules/_sre/sre_constants.h",
- outfile_targets="Modules/_sre/sre_targets.h",
-):
- ns = {}
- with open(infile) as fp:
- code = fp.read()
- exec(code, ns)
-
- def dump(d, prefix):
- items = sorted(d)
- for item in items:
- yield "#define %s_%s %d\n" % (prefix, item, item)
-
- def dump2(d, prefix):
- items = [(value, name) for name, value in d.items()
- if name.startswith(prefix)]
- for value, name in sorted(items):
- yield "#define %s %d\n" % (name, value)
-
- def dump_gotos(d, prefix):
- for i, item in enumerate(sorted(d)):
- assert i == item
- yield f" &&{prefix}_{item},\n"
-
- content = [sre_constants_header]
- content.append("#define SRE_MAGIC %d\n" % ns["MAGIC"])
- content.extend(dump(ns["OPCODES"], "SRE_OP"))
- content.extend(dump(ns["ATCODES"], "SRE"))
- content.extend(dump(ns["CHCODES"], "SRE"))
- content.extend(dump2(ns, "SRE_FLAG_"))
- content.extend(dump2(ns, "SRE_INFO_"))
-
- update_file(outfile_constants, ''.join(content))
-
- content = [sre_constants_header]
- content.append(f"static void *sre_targets[{len(ns['OPCODES'])}] = {{\n")
- content.extend(dump_gotos(ns["OPCODES"], "TARGET_SRE_OP"))
- content.append("};\n")
-
- update_file(outfile_targets, ''.join(content))
-
-
-if __name__ == '__main__':
- import sys
- main(*sys.argv[1:])
diff --git a/Tools/scripts/generate_stdlib_module_names.py b/Tools/scripts/generate_stdlib_module_names.py
deleted file mode 100644
index 92100bd..0000000
--- a/Tools/scripts/generate_stdlib_module_names.py
+++ /dev/null
@@ -1,137 +0,0 @@
-# This script lists the names of standard library modules
-# to update Python/stdlib_mod_names.h
-import _imp
-import os.path
-import re
-import subprocess
-import sys
-import sysconfig
-
-from check_extension_modules import ModuleChecker
-
-
-SRC_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
-STDLIB_PATH = os.path.join(SRC_DIR, 'Lib')
-
-IGNORE = {
- '__init__',
- '__pycache__',
- 'site-packages',
-
- # Test modules and packages
- '__hello__',
- '__phello__',
- '__hello_alias__',
- '__phello_alias__',
- '__hello_only__',
- '_ctypes_test',
- '_testbuffer',
- '_testcapi',
- '_testconsole',
- '_testimportmultiple',
- '_testinternalcapi',
- '_testmultiphase',
- '_xxsubinterpreters',
- '_xxtestfuzz',
- 'distutils.tests',
- 'idlelib.idle_test',
- 'test',
- 'xxlimited',
- 'xxlimited_35',
- 'xxsubtype',
-}
-
-# Pure Python modules (Lib/*.py)
-def list_python_modules(names):
- for filename in os.listdir(STDLIB_PATH):
- if not filename.endswith(".py"):
- continue
- name = filename.removesuffix(".py")
- names.add(name)
-
-
-# Packages in Lib/
-def list_packages(names):
- for name in os.listdir(STDLIB_PATH):
- if name in IGNORE:
- continue
- package_path = os.path.join(STDLIB_PATH, name)
- if not os.path.isdir(package_path):
- continue
- if any(package_file.endswith(".py")
- for package_file in os.listdir(package_path)):
- names.add(name)
-
-
-# Built-in and extension modules built by Modules/Setup*
-# includes Windows and macOS extensions.
-def list_modules_setup_extensions(names):
- checker = ModuleChecker()
- names.update(checker.list_module_names(all=True))
-
-
-# List frozen modules of the PyImport_FrozenModules list (Python/frozen.c).
-# Use the "./Programs/_testembed list_frozen" command.
-def list_frozen(names):
- submodules = set()
- for name in _imp._frozen_module_names():
- # To skip __hello__, __hello_alias__ and etc.
- if name.startswith('__'):
- continue
- if '.' in name:
- submodules.add(name)
- else:
- names.add(name)
- # Make sure all frozen submodules have a known parent.
- for name in list(submodules):
- if name.partition('.')[0] in names:
- submodules.remove(name)
- if submodules:
- raise Exception(f'unexpected frozen submodules: {sorted(submodules)}')
-
-
-def list_modules():
- names = set(sys.builtin_module_names)
- list_modules_setup_extensions(names)
- list_packages(names)
- list_python_modules(names)
- list_frozen(names)
-
- # Remove ignored packages and modules
- for name in list(names):
- package_name = name.split('.')[0]
- # package_name can be equal to name
- if package_name in IGNORE:
- names.discard(name)
-
- for name in names:
- if "." in name:
- raise Exception("sub-modules must not be listed")
-
- return names
-
-
-def write_modules(fp, names):
- print("// Auto-generated by Tools/scripts/generate_stdlib_module_names.py.",
- file=fp)
- print("// List used to create sys.stdlib_module_names.", file=fp)
- print(file=fp)
- print("static const char* _Py_stdlib_module_names[] = {", file=fp)
- for name in sorted(names):
- print(f'"{name}",', file=fp)
- print("};", file=fp)
-
-
-def main():
- if not sysconfig.is_python_build():
- print(f"ERROR: {sys.executable} is not a Python build",
- file=sys.stderr)
- sys.exit(1)
-
- fp = sys.stdout
- names = list_modules()
- write_modules(fp, names)
-
-
-if __name__ == "__main__":
- main()
diff --git a/Tools/scripts/generate_token.py b/Tools/scripts/generate_token.py
deleted file mode 100755
index d8be8b9..0000000
--- a/Tools/scripts/generate_token.py
+++ /dev/null
@@ -1,275 +0,0 @@
-#! /usr/bin/env python3
-# This script generates token related files from Grammar/Tokens:
-#
-# Doc/library/token-list.inc
-# Include/token.h
-# Parser/token.c
-# Lib/token.py
-
-
-NT_OFFSET = 256
-
-def load_tokens(path):
- tok_names = []
- string_to_tok = {}
- ERRORTOKEN = None
- with open(path) as fp:
- for line in fp:
- line = line.strip()
- # strip comments
- i = line.find('#')
- if i >= 0:
- line = line[:i].strip()
- if not line:
- continue
- fields = line.split()
- name = fields[0]
- value = len(tok_names)
- if name == 'ERRORTOKEN':
- ERRORTOKEN = value
- string = fields[1] if len(fields) > 1 else None
- if string:
- string = eval(string)
- string_to_tok[string] = value
- tok_names.append(name)
- return tok_names, ERRORTOKEN, string_to_tok
-
-
-def update_file(file, content):
- try:
- with open(file, 'r') as fobj:
- if fobj.read() == content:
- return False
- except (OSError, ValueError):
- pass
- with open(file, 'w') as fobj:
- fobj.write(content)
- return True
-
-
-token_h_template = """\
-/* Auto-generated by Tools/scripts/generate_token.py */
-
-/* Token types */
-#ifndef Py_INTERNAL_TOKEN_H
-#define Py_INTERNAL_TOKEN_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef Py_BUILD_CORE
-# error "this header requires Py_BUILD_CORE define"
-#endif
-
-#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
-
-%s\
-#define N_TOKENS %d
-#define NT_OFFSET %d
-
-/* Special definitions for cooperation with parser */
-
-#define ISTERMINAL(x) ((x) < NT_OFFSET)
-#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
-#define ISEOF(x) ((x) == ENDMARKER)
-#define ISWHITESPACE(x) ((x) == ENDMARKER || \\
- (x) == NEWLINE || \\
- (x) == INDENT || \\
- (x) == DEDENT)
-
-
-// Symbols exported for test_peg_generator
-PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
-PyAPI_FUNC(int) _PyToken_OneChar(int);
-PyAPI_FUNC(int) _PyToken_TwoChars(int, int);
-PyAPI_FUNC(int) _PyToken_ThreeChars(int, int, int);
-
-#ifdef __cplusplus
-}
-#endif
-#endif // !Py_INTERNAL_TOKEN_H
-"""
-
-def make_h(infile, outfile='Include/internal/pycore_token.h'):
- tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
-
- defines = []
- for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
- defines.append("#define %-15s %d\n" % (name, value))
-
- if update_file(outfile, token_h_template % (
- ''.join(defines),
- len(tok_names),
- NT_OFFSET
- )):
- print("%s regenerated from %s" % (outfile, infile))
-
-
-token_c_template = """\
-/* Auto-generated by Tools/scripts/generate_token.py */
-
-#include "Python.h"
-#include "pycore_token.h"
-
-/* Token names */
-
-const char * const _PyParser_TokenNames[] = {
-%s\
-};
-
-/* Return the token corresponding to a single character */
-
-int
-_PyToken_OneChar(int c1)
-{
-%s\
- return OP;
-}
-
-int
-_PyToken_TwoChars(int c1, int c2)
-{
-%s\
- return OP;
-}
-
-int
-_PyToken_ThreeChars(int c1, int c2, int c3)
-{
-%s\
- return OP;
-}
-"""
-
-def generate_chars_to_token(mapping, n=1):
- result = []
- write = result.append
- indent = ' ' * n
- write(indent)
- write('switch (c%d) {\n' % (n,))
- for c in sorted(mapping):
- write(indent)
- value = mapping[c]
- if isinstance(value, dict):
- write("case '%s':\n" % (c,))
- write(generate_chars_to_token(value, n + 1))
- write(indent)
- write(' break;\n')
- else:
- write("case '%s': return %s;\n" % (c, value))
- write(indent)
- write('}\n')
- return ''.join(result)
-
-def make_c(infile, outfile='Parser/token.c'):
- tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
- string_to_tok['<>'] = string_to_tok['!=']
- chars_to_token = {}
- for string, value in string_to_tok.items():
- assert 1 <= len(string) <= 3
- name = tok_names[value]
- m = chars_to_token.setdefault(len(string), {})
- for c in string[:-1]:
- m = m.setdefault(c, {})
- m[string[-1]] = name
-
- names = []
- for value, name in enumerate(tok_names):
- if value >= ERRORTOKEN:
- name = '<%s>' % name
- names.append(' "%s",\n' % name)
- names.append(' "<N_TOKENS>",\n')
-
- if update_file(outfile, token_c_template % (
- ''.join(names),
- generate_chars_to_token(chars_to_token[1]),
- generate_chars_to_token(chars_to_token[2]),
- generate_chars_to_token(chars_to_token[3])
- )):
- print("%s regenerated from %s" % (outfile, infile))
-
-
-token_inc_template = """\
-.. Auto-generated by Tools/scripts/generate_token.py
-%s
-.. data:: N_TOKENS
-
-.. data:: NT_OFFSET
-"""
-
-def make_rst(infile, outfile='Doc/library/token-list.inc'):
- tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
- tok_to_string = {value: s for s, value in string_to_tok.items()}
-
- names = []
- for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
- names.append('.. data:: %s' % (name,))
- if value in tok_to_string:
- names.append('')
- names.append(' Token value for ``"%s"``.' % tok_to_string[value])
- names.append('')
-
- if update_file(outfile, token_inc_template % '\n'.join(names)):
- print("%s regenerated from %s" % (outfile, infile))
-
-
-token_py_template = '''\
-"""Token constants."""
-# Auto-generated by Tools/scripts/generate_token.py
-
-__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
-
-%s
-N_TOKENS = %d
-# Special definitions for cooperation with parser
-NT_OFFSET = %d
-
-tok_name = {value: name
- for name, value in globals().items()
- if isinstance(value, int) and not name.startswith('_')}
-__all__.extend(tok_name.values())
-
-EXACT_TOKEN_TYPES = {
-%s
-}
-
-def ISTERMINAL(x):
- return x < NT_OFFSET
-
-def ISNONTERMINAL(x):
- return x >= NT_OFFSET
-
-def ISEOF(x):
- return x == ENDMARKER
-'''
-
-def make_py(infile, outfile='Lib/token.py'):
- tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
-
- constants = []
- for value, name in enumerate(tok_names):
- constants.append('%s = %d' % (name, value))
- constants.insert(ERRORTOKEN,
- "# These aren't used by the C tokenizer but are needed for tokenize.py")
-
- token_types = []
- for s, value in sorted(string_to_tok.items()):
- token_types.append(' %r: %s,' % (s, tok_names[value]))
-
- if update_file(outfile, token_py_template % (
- '\n'.join(constants),
- len(tok_names),
- NT_OFFSET,
- '\n'.join(token_types),
- )):
- print("%s regenerated from %s" % (outfile, infile))
-
-
-def main(op, infile='Grammar/Tokens', *args):
- make = globals()['make_' + op]
- make(infile, *args)
-
-
-if __name__ == '__main__':
- import sys
- main(*sys.argv[1:])
diff --git a/Tools/scripts/parse_html5_entities.py b/Tools/scripts/parse_html5_entities.py
deleted file mode 100755
index 1e5bdad..0000000
--- a/Tools/scripts/parse_html5_entities.py
+++ /dev/null
@@ -1,114 +0,0 @@
-#!/usr/bin/env python3
-"""
-Utility for parsing HTML5 entity definitions available from:
-
- https://html.spec.whatwg.org/entities.json
- https://html.spec.whatwg.org/multipage/named-characters.html
-
-The page now contains the following note:
-
- "This list is static and will not be expanded or changed in the future."
-
-Written by Ezio Melotti and Iuliia Proskurnia.
-"""
-
-import os
-import sys
-import json
-from urllib.request import urlopen
-from html.entities import html5
-
-PAGE_URL = 'https://html.spec.whatwg.org/multipage/named-characters.html'
-ENTITIES_URL = 'https://html.spec.whatwg.org/entities.json'
-HTML5_SECTION_START = '# HTML5 named character references'
-
-def get_json(url):
- """Download the json file from the url and returns a decoded object."""
- with urlopen(url) as f:
- data = f.read().decode('utf-8')
- return json.loads(data)
-
-def create_dict(entities):
- """Create the html5 dict from the decoded json object."""
- new_html5 = {}
- for name, value in entities.items():
- new_html5[name.lstrip('&')] = value['characters']
- return new_html5
-
-def compare_dicts(old, new):
- """Compare the old and new dicts and print the differences."""
- added = new.keys() - old.keys()
- if added:
- print('{} entitie(s) have been added:'.format(len(added)))
- for name in sorted(added):
- print(' {!r}: {!r}'.format(name, new[name]))
- removed = old.keys() - new.keys()
- if removed:
- print('{} entitie(s) have been removed:'.format(len(removed)))
- for name in sorted(removed):
- print(' {!r}: {!r}'.format(name, old[name]))
- changed = set()
- for name in (old.keys() & new.keys()):
- if old[name] != new[name]:
- changed.add((name, old[name], new[name]))
- if changed:
- print('{} entitie(s) have been modified:'.format(len(changed)))
- for item in sorted(changed):
- print(' {!r}: {!r} -> {!r}'.format(*item))
-
-def write_items(entities, file=sys.stdout):
- """Write the items of the dictionary in the specified file."""
- # The keys in the generated dictionary should be sorted
- # in a case-insensitive way, however, when two keys are equal,
- # the uppercase version should come first so that the result
- # looks like: ['Aacute', 'aacute', 'Aacute;', 'aacute;', ...]
- # To do this we first sort in a case-sensitive way (so all the
- # uppercase chars come first) and then sort with key=str.lower.
- # Since the sorting is stable the uppercase keys will eventually
- # be before their equivalent lowercase version.
- keys = sorted(entities.keys())
- keys = sorted(keys, key=str.lower)
- print(HTML5_SECTION_START, file=file)
- print(f'# Generated by {sys.argv[0]!r}\n'
- f'# from {ENTITIES_URL} and\n'
- f'# {PAGE_URL}.\n'
- f'# Map HTML5 named character references to the '
- f'equivalent Unicode character(s).', file=file)
- print('html5 = {', file=file)
- for name in keys:
- print(f' {name!r}: {entities[name]!a},', file=file)
- print('}', file=file)
-
-
-if __name__ == '__main__':
- # without args print a diff between html.entities.html5 and new_html5
- # with --create print the new html5 dict
- # with --patch patch the Lib/html/entities.py file
- new_html5 = create_dict(get_json(ENTITIES_URL))
- if '--create' in sys.argv:
- write_items(new_html5)
- elif '--patch' in sys.argv:
- fname = 'Lib/html/entities.py'
- temp_fname = fname + '.temp'
- with open(fname) as f1, open(temp_fname, 'w') as f2:
- skip = False
- for line in f1:
- if line.startswith(HTML5_SECTION_START):
- write_items(new_html5, file=f2)
- skip = True
- continue
- if skip:
- # skip the old items until the }
- if line.startswith('}'):
- skip = False
- continue
- f2.write(line)
- os.remove(fname)
- os.rename(temp_fname, fname)
- else:
- if html5 == new_html5:
- print('The current dictionary is updated.')
- else:
- compare_dicts(html5, new_html5)
- print('Run "./python {0} --patch" to update Lib/html/entities.html '
- 'or "./python {0} --create" to see the generated ' 'dictionary.'.format(__file__))
diff --git a/Tools/scripts/smelly.py b/Tools/scripts/smelly.py
deleted file mode 100755
index 276a5ab..0000000
--- a/Tools/scripts/smelly.py
+++ /dev/null
@@ -1,173 +0,0 @@
-#!/usr/bin/env python
-# Script checking that all symbols exported by libpython start with Py or _Py
-
-import os.path
-import subprocess
-import sys
-import sysconfig
-
-
-ALLOWED_PREFIXES = ('Py', '_Py')
-if sys.platform == 'darwin':
- ALLOWED_PREFIXES += ('__Py',)
-
-IGNORED_EXTENSION = "_ctypes_test"
-# Ignore constructor and destructor functions
-IGNORED_SYMBOLS = {'_init', '_fini'}
-
-
-def is_local_symbol_type(symtype):
- # Ignore local symbols.
-
- # If lowercase, the symbol is usually local; if uppercase, the symbol
- # is global (external). There are however a few lowercase symbols that
- # are shown for special global symbols ("u", "v" and "w").
- if symtype.islower() and symtype not in "uvw":
- return True
-
- # Ignore the initialized data section (d and D) and the BSS data
- # section. For example, ignore "__bss_start (type: B)"
- # and "_edata (type: D)".
- if symtype in "bBdD":
- return True
-
- return False
-
-
-def get_exported_symbols(library, dynamic=False):
- print(f"Check that {library} only exports symbols starting with Py or _Py")
-
- # Only look at dynamic symbols
- args = ['nm', '--no-sort']
- if dynamic:
- args.append('--dynamic')
- args.append(library)
- print("+ %s" % ' '.join(args))
- proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True)
- if proc.returncode:
- sys.stdout.write(proc.stdout)
- sys.exit(proc.returncode)
-
- stdout = proc.stdout.rstrip()
- if not stdout:
- raise Exception("command output is empty")
- return stdout
-
-
-def get_smelly_symbols(stdout):
- smelly_symbols = []
- python_symbols = []
- local_symbols = []
-
- for line in stdout.splitlines():
- # Split line '0000000000001b80 D PyTextIOWrapper_Type'
- if not line:
- continue
-
- parts = line.split(maxsplit=2)
- if len(parts) < 3:
- continue
-
- symtype = parts[1].strip()
- symbol = parts[-1]
- result = '%s (type: %s)' % (symbol, symtype)
-
- if symbol.startswith(ALLOWED_PREFIXES):
- python_symbols.append(result)
- continue
-
- if is_local_symbol_type(symtype):
- local_symbols.append(result)
- elif symbol in IGNORED_SYMBOLS:
- local_symbols.append(result)
- else:
- smelly_symbols.append(result)
-
- if local_symbols:
- print(f"Ignore {len(local_symbols)} local symbols")
- return smelly_symbols, python_symbols
-
-
-def check_library(library, dynamic=False):
- nm_output = get_exported_symbols(library, dynamic)
- smelly_symbols, python_symbols = get_smelly_symbols(nm_output)
-
- if not smelly_symbols:
- print(f"OK: no smelly symbol found ({len(python_symbols)} Python symbols)")
- return 0
-
- print()
- smelly_symbols.sort()
- for symbol in smelly_symbols:
- print("Smelly symbol: %s" % symbol)
-
- print()
- print("ERROR: Found %s smelly symbols!" % len(smelly_symbols))
- return len(smelly_symbols)
-
-
-def check_extensions():
- print(__file__)
- # This assumes pybuilddir.txt is in same directory as pyconfig.h.
- # In the case of out-of-tree builds, we can't assume pybuilddir.txt is
- # in the source folder.
- config_dir = os.path.dirname(sysconfig.get_config_h_filename())
- filename = os.path.join(config_dir, "pybuilddir.txt")
- try:
- with open(filename, encoding="utf-8") as fp:
- pybuilddir = fp.readline()
- except FileNotFoundError:
- print(f"Cannot check extensions because {filename} does not exist")
- return True
-
- print(f"Check extension modules from {pybuilddir} directory")
- builddir = os.path.join(config_dir, pybuilddir)
- nsymbol = 0
- for name in os.listdir(builddir):
- if not name.endswith(".so"):
- continue
- if IGNORED_EXTENSION in name:
- print()
- print(f"Ignore extension: {name}")
- continue
-
- print()
- filename = os.path.join(builddir, name)
- nsymbol += check_library(filename, dynamic=True)
-
- return nsymbol
-
-
-def main():
- nsymbol = 0
-
- # static library
- LIBRARY = sysconfig.get_config_var('LIBRARY')
- if not LIBRARY:
- raise Exception("failed to get LIBRARY variable from sysconfig")
- if os.path.exists(LIBRARY):
- nsymbol += check_library(LIBRARY)
-
- # dynamic library
- LDLIBRARY = sysconfig.get_config_var('LDLIBRARY')
- if not LDLIBRARY:
- raise Exception("failed to get LDLIBRARY variable from sysconfig")
- if LDLIBRARY != LIBRARY:
- print()
- nsymbol += check_library(LDLIBRARY, dynamic=True)
-
- # Check extension modules like _ssl.cpython-310d-x86_64-linux-gnu.so
- nsymbol += check_extensions()
-
- if nsymbol:
- print()
- print(f"ERROR: Found {nsymbol} smelly symbols in total!")
- sys.exit(1)
-
- print()
- print(f"OK: all exported symbols of all libraries "
- f"are prefixed with {' or '.join(map(repr, ALLOWED_PREFIXES))}")
-
-
-if __name__ == "__main__":
- main()
diff --git a/Tools/scripts/stable_abi.py b/Tools/scripts/stable_abi.py
deleted file mode 100755
index d557e10..0000000
--- a/Tools/scripts/stable_abi.py
+++ /dev/null
@@ -1,754 +0,0 @@
-"""Check the stable ABI manifest or generate files from it
-
-By default, the tool only checks existing files/libraries.
-Pass --generate to recreate auto-generated files instead.
-
-For actions that take a FILENAME, the filename can be left out to use a default
-(relative to the manifest file, as they appear in the CPython codebase).
-"""
-
-from functools import partial
-from pathlib import Path
-import dataclasses
-import subprocess
-import sysconfig
-import argparse
-import textwrap
-import tomllib
-import difflib
-import pprint
-import sys
-import os
-import os.path
-import io
-import re
-import csv
-
-MISSING = object()
-
-EXCLUDED_HEADERS = {
- "bytes_methods.h",
- "cellobject.h",
- "classobject.h",
- "code.h",
- "compile.h",
- "datetime.h",
- "dtoa.h",
- "frameobject.h",
- "genobject.h",
- "longintrepr.h",
- "parsetok.h",
- "pyatomic.h",
- "pytime.h",
- "token.h",
- "ucnhash.h",
-}
-MACOS = (sys.platform == "darwin")
-UNIXY = MACOS or (sys.platform == "linux") # XXX should this be "not Windows"?
-
-
-# The stable ABI manifest (Misc/stable_abi.toml) exists only to fill the
-# following dataclasses.
-# Feel free to change its syntax (and the `parse_manifest` function)
-# to better serve that purpose (while keeping it human-readable).
-
-class Manifest:
- """Collection of `ABIItem`s forming the stable ABI/limited API."""
- def __init__(self):
- self.contents = dict()
-
- def add(self, item):
- if item.name in self.contents:
- # We assume that stable ABI items do not share names,
- # even if they're different kinds (e.g. function vs. macro).
- raise ValueError(f'duplicate ABI item {item.name}')
- self.contents[item.name] = item
-
- def select(self, kinds, *, include_abi_only=True, ifdef=None):
- """Yield selected items of the manifest
-
- kinds: set of requested kinds, e.g. {'function', 'macro'}
- include_abi_only: if True (default), include all items of the
- stable ABI.
- If False, include only items from the limited API
- (i.e. items people should use today)
- ifdef: set of feature macros (e.g. {'HAVE_FORK', 'MS_WINDOWS'}).
- If None (default), items are not filtered by this. (This is
- different from the empty set, which filters out all such
- conditional items.)
- """
- for name, item in sorted(self.contents.items()):
- if item.kind not in kinds:
- continue
- if item.abi_only and not include_abi_only:
- continue
- if (ifdef is not None
- and item.ifdef is not None
- and item.ifdef not in ifdef):
- continue
- yield item
-
- def dump(self):
- """Yield lines to recreate the manifest file (sans comments/newlines)"""
- for item in self.contents.values():
- fields = dataclasses.fields(item)
- yield f"[{item.kind}.{item.name}]"
- for field in fields:
- if field.name in {'name', 'value', 'kind'}:
- continue
- value = getattr(item, field.name)
- if value == field.default:
- pass
- elif value is True:
- yield f" {field.name} = true"
- elif value:
- yield f" {field.name} = {value!r}"
-
-
-itemclasses = {}
-def itemclass(kind):
- """Register the decorated class in `itemclasses`"""
- def decorator(cls):
- itemclasses[kind] = cls
- return cls
- return decorator
-
-@itemclass('function')
-@itemclass('macro')
-@itemclass('data')
-@itemclass('const')
-@itemclass('typedef')
-@dataclasses.dataclass
-class ABIItem:
- """Information on one item (function, macro, struct, etc.)"""
-
- name: str
- kind: str
- added: str = None
- abi_only: bool = False
- ifdef: str = None
-
-@itemclass('feature_macro')
-@dataclasses.dataclass(kw_only=True)
-class FeatureMacro(ABIItem):
- name: str
- doc: str
- windows: bool = False
- abi_only: bool = True
-
-@itemclass('struct')
-@dataclasses.dataclass(kw_only=True)
-class Struct(ABIItem):
- struct_abi_kind: str
- members: list = None
-
-
-def parse_manifest(file):
- """Parse the given file (iterable of lines) to a Manifest"""
-
- manifest = Manifest()
-
- data = tomllib.load(file)
-
- for kind, itemclass in itemclasses.items():
- for name, item_data in data[kind].items():
- try:
- item = itemclass(name=name, kind=kind, **item_data)
- manifest.add(item)
- except BaseException as exc:
- exc.add_note(f'in {kind} {name}')
- raise
-
- return manifest
-
-# The tool can run individual "actions".
-# Most actions are "generators", which generate a single file from the
-# manifest. (Checking works by generating a temp file & comparing.)
-# Other actions, like "--unixy-check", don't work on a single file.
-
-generators = []
-def generator(var_name, default_path):
- """Decorates a file generator: function that writes to a file"""
- def _decorator(func):
- func.var_name = var_name
- func.arg_name = '--' + var_name.replace('_', '-')
- func.default_path = default_path
- generators.append(func)
- return func
- return _decorator
-
-
-@generator("python3dll", 'PC/python3dll.c')
-def gen_python3dll(manifest, args, outfile):
- """Generate/check the source for the Windows stable ABI library"""
- write = partial(print, file=outfile)
- write(textwrap.dedent(r"""
- /* Re-export stable Python ABI */
-
- /* Generated by Tools/scripts/stable_abi.py */
-
- #ifdef _M_IX86
- #define DECORATE "_"
- #else
- #define DECORATE
- #endif
-
- #define EXPORT_FUNC(name) \
- __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name))
- #define EXPORT_DATA(name) \
- __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name ",DATA"))
- """))
-
- def sort_key(item):
- return item.name.lower()
-
- windows_feature_macros = {
- item.name for item in manifest.select({'feature_macro'}) if item.windows
- }
- for item in sorted(
- manifest.select(
- {'function'},
- include_abi_only=True,
- ifdef=windows_feature_macros),
- key=sort_key):
- write(f'EXPORT_FUNC({item.name})')
-
- write()
-
- for item in sorted(
- manifest.select(
- {'data'},
- include_abi_only=True,
- ifdef=windows_feature_macros),
- key=sort_key):
- write(f'EXPORT_DATA({item.name})')
-
-REST_ROLES = {
- 'function': 'function',
- 'data': 'var',
- 'struct': 'type',
- 'macro': 'macro',
- # 'const': 'const', # all undocumented
- 'typedef': 'type',
-}
-
-@generator("doc_list", 'Doc/data/stable_abi.dat')
-def gen_doc_annotations(manifest, args, outfile):
- """Generate/check the stable ABI list for documentation annotations"""
- writer = csv.DictWriter(
- outfile,
- ['role', 'name', 'added', 'ifdef_note', 'struct_abi_kind'],
- lineterminator='\n')
- writer.writeheader()
- for item in manifest.select(REST_ROLES.keys(), include_abi_only=False):
- if item.ifdef:
- ifdef_note = manifest.contents[item.ifdef].doc
- else:
- ifdef_note = None
- row = {
- 'role': REST_ROLES[item.kind],
- 'name': item.name,
- 'added': item.added,
- 'ifdef_note': ifdef_note}
- rows = [row]
- if item.kind == 'struct':
- row['struct_abi_kind'] = item.struct_abi_kind
- for member_name in item.members or ():
- rows.append({
- 'role': 'member',
- 'name': f'{item.name}.{member_name}',
- 'added': item.added})
- writer.writerows(rows)
-
-@generator("ctypes_test", 'Lib/test/test_stable_abi_ctypes.py')
-def gen_ctypes_test(manifest, args, outfile):
- """Generate/check the ctypes-based test for exported symbols"""
- write = partial(print, file=outfile)
- write(textwrap.dedent('''
- # Generated by Tools/scripts/stable_abi.py
-
- """Test that all symbols of the Stable ABI are accessible using ctypes
- """
-
- import sys
- import unittest
- from test.support.import_helper import import_module
- from _testcapi import get_feature_macros
-
- feature_macros = get_feature_macros()
- ctypes_test = import_module('ctypes')
-
- class TestStableABIAvailability(unittest.TestCase):
- def test_available_symbols(self):
-
- for symbol_name in SYMBOL_NAMES:
- with self.subTest(symbol_name):
- ctypes_test.pythonapi[symbol_name]
-
- def test_feature_macros(self):
- self.assertEqual(
- set(get_feature_macros()), EXPECTED_FEATURE_MACROS)
-
- # The feature macros for Windows are used in creating the DLL
- # definition, so they must be known on all platforms.
- # If we are on Windows, we check that the hardcoded data matches
- # the reality.
- @unittest.skipIf(sys.platform != "win32", "Windows specific test")
- def test_windows_feature_macros(self):
- for name, value in WINDOWS_FEATURE_MACROS.items():
- if value != 'maybe':
- with self.subTest(name):
- self.assertEqual(feature_macros[name], value)
-
- SYMBOL_NAMES = (
- '''))
- items = manifest.select(
- {'function', 'data'},
- include_abi_only=True,
- )
- optional_items = {}
- for item in items:
- if item.name in (
- # Some symbols aren't exported on all platforms.
- # This is a bug: https://bugs.python.org/issue44133
- 'PyModule_Create2', 'PyModule_FromDefAndSpec2',
- ):
- continue
- if item.ifdef:
- optional_items.setdefault(item.ifdef, []).append(item.name)
- else:
- write(f' "{item.name}",')
- write(")")
- for ifdef, names in optional_items.items():
- write(f"if feature_macros[{ifdef!r}]:")
- write(f" SYMBOL_NAMES += (")
- for name in names:
- write(f" {name!r},")
- write(" )")
- write("")
- feature_macros = list(manifest.select({'feature_macro'}))
- feature_names = sorted(m.name for m in feature_macros)
- write(f"EXPECTED_FEATURE_MACROS = set({pprint.pformat(feature_names)})")
-
- windows_feature_macros = {m.name: m.windows for m in feature_macros}
- write(f"WINDOWS_FEATURE_MACROS = {pprint.pformat(windows_feature_macros)}")
-
-
-@generator("testcapi_feature_macros", 'Modules/_testcapi_feature_macros.inc')
-def gen_testcapi_feature_macros(manifest, args, outfile):
- """Generate/check the stable ABI list for documentation annotations"""
- write = partial(print, file=outfile)
- write('// Generated by Tools/scripts/stable_abi.py')
- write()
- write('// Add an entry in dict `result` for each Stable ABI feature macro.')
- write()
- for macro in manifest.select({'feature_macro'}):
- name = macro.name
- write(f'#ifdef {name}')
- write(f' res = PyDict_SetItemString(result, "{name}", Py_True);')
- write('#else')
- write(f' res = PyDict_SetItemString(result, "{name}", Py_False);')
- write('#endif')
- write('if (res) {')
- write(' Py_DECREF(result); return NULL;')
- write('}')
- write()
-
-
-def generate_or_check(manifest, args, path, func):
- """Generate/check a file with a single generator
-
- Return True if successful; False if a comparison failed.
- """
-
- outfile = io.StringIO()
- func(manifest, args, outfile)
- generated = outfile.getvalue()
- existing = path.read_text()
-
- if generated != existing:
- if args.generate:
- path.write_text(generated)
- else:
- print(f'File {path} differs from expected!')
- diff = difflib.unified_diff(
- generated.splitlines(), existing.splitlines(),
- str(path), '<expected>',
- lineterm='',
- )
- for line in diff:
- print(line)
- return False
- return True
-
-
-def do_unixy_check(manifest, args):
- """Check headers & library using "Unixy" tools (GCC/clang, binutils)"""
- okay = True
-
- # Get all macros first: we'll need feature macros like HAVE_FORK and
- # MS_WINDOWS for everything else
- present_macros = gcc_get_limited_api_macros(['Include/Python.h'])
- feature_macros = set(m.name for m in manifest.select({'feature_macro'}))
- feature_macros &= present_macros
-
- # Check that we have all needed macros
- expected_macros = set(
- item.name for item in manifest.select({'macro'})
- )
- missing_macros = expected_macros - present_macros
- okay &= _report_unexpected_items(
- missing_macros,
- 'Some macros from are not defined from "Include/Python.h"'
- + 'with Py_LIMITED_API:')
-
- expected_symbols = set(item.name for item in manifest.select(
- {'function', 'data'}, include_abi_only=True, ifdef=feature_macros,
- ))
-
- # Check the static library (*.a)
- LIBRARY = sysconfig.get_config_var("LIBRARY")
- if not LIBRARY:
- raise Exception("failed to get LIBRARY variable from sysconfig")
- if os.path.exists(LIBRARY):
- okay &= binutils_check_library(
- manifest, LIBRARY, expected_symbols, dynamic=False)
-
- # Check the dynamic library (*.so)
- LDLIBRARY = sysconfig.get_config_var("LDLIBRARY")
- if not LDLIBRARY:
- raise Exception("failed to get LDLIBRARY variable from sysconfig")
- okay &= binutils_check_library(
- manifest, LDLIBRARY, expected_symbols, dynamic=False)
-
- # Check definitions in the header files
- expected_defs = set(item.name for item in manifest.select(
- {'function', 'data'}, include_abi_only=False, ifdef=feature_macros,
- ))
- found_defs = gcc_get_limited_api_definitions(['Include/Python.h'])
- missing_defs = expected_defs - found_defs
- okay &= _report_unexpected_items(
- missing_defs,
- 'Some expected declarations were not declared in '
- + '"Include/Python.h" with Py_LIMITED_API:')
-
- # Some Limited API macros are defined in terms of private symbols.
- # These are not part of Limited API (even though they're defined with
- # Py_LIMITED_API). They must be part of the Stable ABI, though.
- private_symbols = {n for n in expected_symbols if n.startswith('_')}
- extra_defs = found_defs - expected_defs - private_symbols
- okay &= _report_unexpected_items(
- extra_defs,
- 'Some extra declarations were found in "Include/Python.h" '
- + 'with Py_LIMITED_API:')
-
- return okay
-
-
-def _report_unexpected_items(items, msg):
- """If there are any `items`, report them using "msg" and return false"""
- if items:
- print(msg, file=sys.stderr)
- for item in sorted(items):
- print(' -', item, file=sys.stderr)
- return False
- return True
-
-
-def binutils_get_exported_symbols(library, dynamic=False):
- """Retrieve exported symbols using the nm(1) tool from binutils"""
- # Only look at dynamic symbols
- args = ["nm", "--no-sort"]
- if dynamic:
- args.append("--dynamic")
- args.append(library)
- proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True)
- if proc.returncode:
- sys.stdout.write(proc.stdout)
- sys.exit(proc.returncode)
-
- stdout = proc.stdout.rstrip()
- if not stdout:
- raise Exception("command output is empty")
-
- for line in stdout.splitlines():
- # Split line '0000000000001b80 D PyTextIOWrapper_Type'
- if not line:
- continue
-
- parts = line.split(maxsplit=2)
- if len(parts) < 3:
- continue
-
- symbol = parts[-1]
- if MACOS and symbol.startswith("_"):
- yield symbol[1:]
- else:
- yield symbol
-
-
-def binutils_check_library(manifest, library, expected_symbols, dynamic):
- """Check that library exports all expected_symbols"""
- available_symbols = set(binutils_get_exported_symbols(library, dynamic))
- missing_symbols = expected_symbols - available_symbols
- if missing_symbols:
- print(textwrap.dedent(f"""\
- Some symbols from the limited API are missing from {library}:
- {', '.join(missing_symbols)}
-
- This error means that there are some missing symbols among the
- ones exported in the library.
- This normally means that some symbol, function implementation or
- a prototype belonging to a symbol in the limited API has been
- deleted or is missing.
- """), file=sys.stderr)
- return False
- return True
-
-
-def gcc_get_limited_api_macros(headers):
- """Get all limited API macros from headers.
-
- Runs the preprocessor over all the header files in "Include" setting
- "-DPy_LIMITED_API" to the correct value for the running version of the
- interpreter and extracting all macro definitions (via adding -dM to the
- compiler arguments).
-
- Requires Python built with a GCC-compatible compiler. (clang might work)
- """
-
- api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16
-
- preprocesor_output_with_macros = subprocess.check_output(
- sysconfig.get_config_var("CC").split()
- + [
- # Prevent the expansion of the exported macros so we can
- # capture them later
- "-DSIZEOF_WCHAR_T=4", # The actual value is not important
- f"-DPy_LIMITED_API={api_hexversion}",
- "-I.",
- "-I./Include",
- "-dM",
- "-E",
- ]
- + [str(file) for file in headers],
- text=True,
- )
-
- return {
- target
- for target in re.findall(
- r"#define (\w+)", preprocesor_output_with_macros
- )
- }
-
-
-def gcc_get_limited_api_definitions(headers):
- """Get all limited API definitions from headers.
-
- Run the preprocessor over all the header files in "Include" setting
- "-DPy_LIMITED_API" to the correct value for the running version of the
- interpreter.
-
- The limited API symbols will be extracted from the output of this command
- as it includes the prototypes and definitions of all the exported symbols
- that are in the limited api.
-
- This function does *NOT* extract the macros defined on the limited API
-
- Requires Python built with a GCC-compatible compiler. (clang might work)
- """
- api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16
- preprocesor_output = subprocess.check_output(
- sysconfig.get_config_var("CC").split()
- + [
- # Prevent the expansion of the exported macros so we can capture
- # them later
- "-DPyAPI_FUNC=__PyAPI_FUNC",
- "-DPyAPI_DATA=__PyAPI_DATA",
- "-DEXPORT_DATA=__EXPORT_DATA",
- "-D_Py_NO_RETURN=",
- "-DSIZEOF_WCHAR_T=4", # The actual value is not important
- f"-DPy_LIMITED_API={api_hexversion}",
- "-I.",
- "-I./Include",
- "-E",
- ]
- + [str(file) for file in headers],
- text=True,
- stderr=subprocess.DEVNULL,
- )
- stable_functions = set(
- re.findall(r"__PyAPI_FUNC\(.*?\)\s*(.*?)\s*\(", preprocesor_output)
- )
- stable_exported_data = set(
- re.findall(r"__EXPORT_DATA\((.*?)\)", preprocesor_output)
- )
- stable_data = set(
- re.findall(r"__PyAPI_DATA\(.*?\)[\s\*\(]*([^);]*)\)?.*;", preprocesor_output)
- )
- return stable_data | stable_exported_data | stable_functions
-
-def check_private_names(manifest):
- """Ensure limited API doesn't contain private names
-
- Names prefixed by an underscore are private by definition.
- """
- for name, item in manifest.contents.items():
- if name.startswith('_') and not item.abi_only:
- raise ValueError(
- f'`{name}` is private (underscore-prefixed) and should be '
- + 'removed from the stable ABI list or or marked `abi_only`')
-
-def check_dump(manifest, filename):
- """Check that manifest.dump() corresponds to the data.
-
- Mainly useful when debugging this script.
- """
- dumped = tomllib.loads('\n'.join(manifest.dump()))
- with filename.open('rb') as file:
- from_file = tomllib.load(file)
- if dumped != from_file:
- print(f'Dump differs from loaded data!', file=sys.stderr)
- diff = difflib.unified_diff(
- pprint.pformat(dumped).splitlines(),
- pprint.pformat(from_file).splitlines(),
- '<dumped>', str(filename),
- lineterm='',
- )
- for line in diff:
- print(line, file=sys.stderr)
- return False
- else:
- return True
-
-def main():
- parser = argparse.ArgumentParser(
- description=__doc__,
- formatter_class=argparse.RawDescriptionHelpFormatter,
- )
- parser.add_argument(
- "file", type=Path, metavar='FILE',
- help="file with the stable abi manifest",
- )
- parser.add_argument(
- "--generate", action='store_true',
- help="generate file(s), rather than just checking them",
- )
- parser.add_argument(
- "--generate-all", action='store_true',
- help="as --generate, but generate all file(s) using default filenames."
- + " (unlike --all, does not run any extra checks)",
- )
- parser.add_argument(
- "-a", "--all", action='store_true',
- help="run all available checks using default filenames",
- )
- parser.add_argument(
- "-l", "--list", action='store_true',
- help="list available generators and their default filenames; then exit",
- )
- parser.add_argument(
- "--dump", action='store_true',
- help="dump the manifest contents (used for debugging the parser)",
- )
-
- actions_group = parser.add_argument_group('actions')
- for gen in generators:
- actions_group.add_argument(
- gen.arg_name, dest=gen.var_name,
- type=str, nargs="?", default=MISSING,
- metavar='FILENAME',
- help=gen.__doc__,
- )
- actions_group.add_argument(
- '--unixy-check', action='store_true',
- help=do_unixy_check.__doc__,
- )
- args = parser.parse_args()
-
- base_path = args.file.parent.parent
-
- if args.list:
- for gen in generators:
- print(f'{gen.arg_name}: {base_path / gen.default_path}')
- sys.exit(0)
-
- run_all_generators = args.generate_all
-
- if args.generate_all:
- args.generate = True
-
- if args.all:
- run_all_generators = True
- args.unixy_check = True
-
- try:
- file = args.file.open('rb')
- except FileNotFoundError as err:
- if args.file.suffix == '.txt':
- # Provide a better error message
- suggestion = args.file.with_suffix('.toml')
- raise FileNotFoundError(
- f'{args.file} not found. Did you mean {suggestion} ?') from err
- raise
- with file:
- manifest = parse_manifest(file)
-
- check_private_names(manifest)
-
- # Remember results of all actions (as booleans).
- # At the end we'll check that at least one action was run,
- # and also fail if any are false.
- results = {}
-
- if args.dump:
- for line in manifest.dump():
- print(line)
- results['dump'] = check_dump(manifest, args.file)
-
- for gen in generators:
- filename = getattr(args, gen.var_name)
- if filename is None or (run_all_generators and filename is MISSING):
- filename = base_path / gen.default_path
- elif filename is MISSING:
- continue
-
- results[gen.var_name] = generate_or_check(manifest, args, filename, gen)
-
- if args.unixy_check:
- results['unixy_check'] = do_unixy_check(manifest, args)
-
- if not results:
- if args.generate:
- parser.error('No file specified. Use --help for usage.')
- parser.error('No check specified. Use --help for usage.')
-
- failed_results = [name for name, result in results.items() if not result]
-
- if failed_results:
- raise Exception(f"""
- These checks related to the stable ABI did not succeed:
- {', '.join(failed_results)}
-
- If you see diffs in the output, files derived from the stable
- ABI manifest the were not regenerated.
- Run `make regen-limited-abi` to fix this.
-
- Otherwise, see the error(s) above.
-
- The stable ABI manifest is at: {args.file}
- Note that there is a process to follow when modifying it.
-
- You can read more about the limited API and its contracts at:
-
- https://docs.python.org/3/c-api/stable.html
-
- And in PEP 384:
-
- https://peps.python.org/pep-0384/
- """)
-
-
-if __name__ == "__main__":
- main()
diff --git a/Tools/scripts/umarshal.py b/Tools/scripts/umarshal.py
deleted file mode 100644
index f61570c..0000000
--- a/Tools/scripts/umarshal.py
+++ /dev/null
@@ -1,325 +0,0 @@
-# Implementat marshal.loads() in pure Python
-
-import ast
-
-from typing import Any, Tuple
-
-
-class Type:
- # Adapted from marshal.c
- NULL = ord('0')
- NONE = ord('N')
- FALSE = ord('F')
- TRUE = ord('T')
- STOPITER = ord('S')
- ELLIPSIS = ord('.')
- INT = ord('i')
- INT64 = ord('I')
- FLOAT = ord('f')
- BINARY_FLOAT = ord('g')
- COMPLEX = ord('x')
- BINARY_COMPLEX = ord('y')
- LONG = ord('l')
- STRING = ord('s')
- INTERNED = ord('t')
- REF = ord('r')
- TUPLE = ord('(')
- LIST = ord('[')
- DICT = ord('{')
- CODE = ord('c')
- UNICODE = ord('u')
- UNKNOWN = ord('?')
- SET = ord('<')
- FROZENSET = ord('>')
- ASCII = ord('a')
- ASCII_INTERNED = ord('A')
- SMALL_TUPLE = ord(')')
- SHORT_ASCII = ord('z')
- SHORT_ASCII_INTERNED = ord('Z')
-
-
-FLAG_REF = 0x80 # with a type, add obj to index
-
-NULL = object() # marker
-
-# Cell kinds
-CO_FAST_LOCAL = 0x20
-CO_FAST_CELL = 0x40
-CO_FAST_FREE = 0x80
-
-
-class Code:
- def __init__(self, **kwds: Any):
- self.__dict__.update(kwds)
-
- def __repr__(self) -> str:
- return f"Code(**{self.__dict__})"
-
- co_localsplusnames: Tuple[str]
- co_localspluskinds: Tuple[int]
-
- def get_localsplus_names(self, select_kind: int) -> Tuple[str, ...]:
- varnames: list[str] = []
- for name, kind in zip(self.co_localsplusnames,
- self.co_localspluskinds):
- if kind & select_kind:
- varnames.append(name)
- return tuple(varnames)
-
- @property
- def co_varnames(self) -> Tuple[str, ...]:
- return self.get_localsplus_names(CO_FAST_LOCAL)
-
- @property
- def co_cellvars(self) -> Tuple[str, ...]:
- return self.get_localsplus_names(CO_FAST_CELL)
-
- @property
- def co_freevars(self) -> Tuple[str, ...]:
- return self.get_localsplus_names(CO_FAST_FREE)
-
- @property
- def co_nlocals(self) -> int:
- return len(self.co_varnames)
-
-
-class Reader:
- # A fairly literal translation of the marshal reader.
-
- def __init__(self, data: bytes):
- self.data: bytes = data
- self.end: int = len(self.data)
- self.pos: int = 0
- self.refs: list[Any] = []
- self.level: int = 0
-
- def r_string(self, n: int) -> bytes:
- assert 0 <= n <= self.end - self.pos
- buf = self.data[self.pos : self.pos + n]
- self.pos += n
- return buf
-
- def r_byte(self) -> int:
- buf = self.r_string(1)
- return buf[0]
-
- def r_short(self) -> int:
- buf = self.r_string(2)
- x = buf[0]
- x |= buf[1] << 8
- x |= -(x & (1<<15)) # Sign-extend
- return x
-
- def r_long(self) -> int:
- buf = self.r_string(4)
- x = buf[0]
- x |= buf[1] << 8
- x |= buf[2] << 16
- x |= buf[3] << 24
- x |= -(x & (1<<31)) # Sign-extend
- return x
-
- def r_long64(self) -> int:
- buf = self.r_string(8)
- x = buf[0]
- x |= buf[1] << 8
- x |= buf[2] << 16
- x |= buf[3] << 24
- x |= buf[1] << 32
- x |= buf[1] << 40
- x |= buf[1] << 48
- x |= buf[1] << 56
- x |= -(x & (1<<63)) # Sign-extend
- return x
-
- def r_PyLong(self) -> int:
- n = self.r_long()
- size = abs(n)
- x = 0
- # Pray this is right
- for i in range(size):
- x |= self.r_short() << i*15
- if n < 0:
- x = -x
- return x
-
- def r_float_bin(self) -> float:
- buf = self.r_string(8)
- import struct # Lazy import to avoid breaking UNIX build
- return struct.unpack("d", buf)[0]
-
- def r_float_str(self) -> float:
- n = self.r_byte()
- buf = self.r_string(n)
- return ast.literal_eval(buf.decode("ascii"))
-
- def r_ref_reserve(self, flag: int) -> int:
- if flag:
- idx = len(self.refs)
- self.refs.append(None)
- return idx
- else:
- return 0
-
- def r_ref_insert(self, obj: Any, idx: int, flag: int) -> Any:
- if flag:
- self.refs[idx] = obj
- return obj
-
- def r_ref(self, obj: Any, flag: int) -> Any:
- assert flag & FLAG_REF
- self.refs.append(obj)
- return obj
-
- def r_object(self) -> Any:
- old_level = self.level
- try:
- return self._r_object()
- finally:
- self.level = old_level
-
- def _r_object(self) -> Any:
- code = self.r_byte()
- flag = code & FLAG_REF
- type = code & ~FLAG_REF
- # print(" "*self.level + f"{code} {flag} {type} {chr(type)!r}")
- self.level += 1
-
- def R_REF(obj: Any) -> Any:
- if flag:
- obj = self.r_ref(obj, flag)
- return obj
-
- if type == Type.NULL:
- return NULL
- elif type == Type.NONE:
- return None
- elif type == Type.ELLIPSIS:
- return Ellipsis
- elif type == Type.FALSE:
- return False
- elif type == Type.TRUE:
- return True
- elif type == Type.INT:
- return R_REF(self.r_long())
- elif type == Type.INT64:
- return R_REF(self.r_long64())
- elif type == Type.LONG:
- return R_REF(self.r_PyLong())
- elif type == Type.FLOAT:
- return R_REF(self.r_float_str())
- elif type == Type.BINARY_FLOAT:
- return R_REF(self.r_float_bin())
- elif type == Type.COMPLEX:
- return R_REF(complex(self.r_float_str(),
- self.r_float_str()))
- elif type == Type.BINARY_COMPLEX:
- return R_REF(complex(self.r_float_bin(),
- self.r_float_bin()))
- elif type == Type.STRING:
- n = self.r_long()
- return R_REF(self.r_string(n))
- elif type == Type.ASCII_INTERNED or type == Type.ASCII:
- n = self.r_long()
- return R_REF(self.r_string(n).decode("ascii"))
- elif type == Type.SHORT_ASCII_INTERNED or type == Type.SHORT_ASCII:
- n = self.r_byte()
- return R_REF(self.r_string(n).decode("ascii"))
- elif type == Type.INTERNED or type == Type.UNICODE:
- n = self.r_long()
- return R_REF(self.r_string(n).decode("utf8", "surrogatepass"))
- elif type == Type.SMALL_TUPLE:
- n = self.r_byte()
- idx = self.r_ref_reserve(flag)
- retval: Any = tuple(self.r_object() for _ in range(n))
- self.r_ref_insert(retval, idx, flag)
- return retval
- elif type == Type.TUPLE:
- n = self.r_long()
- idx = self.r_ref_reserve(flag)
- retval = tuple(self.r_object() for _ in range(n))
- self.r_ref_insert(retval, idx, flag)
- return retval
- elif type == Type.LIST:
- n = self.r_long()
- retval = R_REF([])
- for _ in range(n):
- retval.append(self.r_object())
- return retval
- elif type == Type.DICT:
- retval = R_REF({})
- while True:
- key = self.r_object()
- if key == NULL:
- break
- val = self.r_object()
- retval[key] = val
- return retval
- elif type == Type.SET:
- n = self.r_long()
- retval = R_REF(set())
- for _ in range(n):
- v = self.r_object()
- retval.add(v)
- return retval
- elif type == Type.FROZENSET:
- n = self.r_long()
- s: set[Any] = set()
- idx = self.r_ref_reserve(flag)
- for _ in range(n):
- v = self.r_object()
- s.add(v)
- retval = frozenset(s)
- self.r_ref_insert(retval, idx, flag)
- return retval
- elif type == Type.CODE:
- retval = R_REF(Code())
- retval.co_argcount = self.r_long()
- retval.co_posonlyargcount = self.r_long()
- retval.co_kwonlyargcount = self.r_long()
- retval.co_stacksize = self.r_long()
- retval.co_flags = self.r_long()
- retval.co_code = self.r_object()
- retval.co_consts = self.r_object()
- retval.co_names = self.r_object()
- retval.co_localsplusnames = self.r_object()
- retval.co_localspluskinds = self.r_object()
- retval.co_filename = self.r_object()
- retval.co_name = self.r_object()
- retval.co_qualname = self.r_object()
- retval.co_firstlineno = self.r_long()
- retval.co_linetable = self.r_object()
- retval.co_exceptiontable = self.r_object()
- return retval
- elif type == Type.REF:
- n = self.r_long()
- retval = self.refs[n]
- assert retval is not None
- return retval
- else:
- breakpoint()
- raise AssertionError(f"Unknown type {type} {chr(type)!r}")
-
-
-def loads(data: bytes) -> Any:
- assert isinstance(data, bytes)
- r = Reader(data)
- return r.r_object()
-
-
-def main():
- # Test
- import marshal, pprint
- sample = {'foo': {(42, "bar", 3.14)}}
- data = marshal.dumps(sample)
- retval = loads(data)
- assert retval == sample, retval
- sample = main.__code__
- data = marshal.dumps(sample)
- retval = loads(data)
- assert isinstance(retval, Code), retval
- pprint.pprint(retval.__dict__)
-
-
-if __name__ == "__main__":
- main()
diff --git a/Tools/scripts/update_file.py b/Tools/scripts/update_file.py
deleted file mode 100644
index b4182c1..0000000
--- a/Tools/scripts/update_file.py
+++ /dev/null
@@ -1,92 +0,0 @@
-"""
-A script that replaces an old file with a new one, only if the contents
-actually changed. If not, the new file is simply deleted.
-
-This avoids wholesale rebuilds when a code (re)generation phase does not
-actually change the in-tree generated code.
-"""
-
-import contextlib
-import os
-import os.path
-import sys
-
-
-@contextlib.contextmanager
-def updating_file_with_tmpfile(filename, tmpfile=None):
- """A context manager for updating a file via a temp file.
-
- The context manager provides two open files: the source file open
- for reading, and the temp file, open for writing.
-
- Upon exiting: both files are closed, and the source file is replaced
- with the temp file.
- """
- # XXX Optionally use tempfile.TemporaryFile?
- if not tmpfile:
- tmpfile = filename + '.tmp'
- elif os.path.isdir(tmpfile):
- tmpfile = os.path.join(tmpfile, filename + '.tmp')
-
- with open(filename, 'rb') as infile:
- line = infile.readline()
-
- if line.endswith(b'\r\n'):
- newline = "\r\n"
- elif line.endswith(b'\r'):
- newline = "\r"
- elif line.endswith(b'\n'):
- newline = "\n"
- else:
- raise ValueError(f"unknown end of line: {filename}: {line!a}")
-
- with open(tmpfile, 'w', newline=newline) as outfile:
- with open(filename) as infile:
- yield infile, outfile
- update_file_with_tmpfile(filename, tmpfile)
-
-
-def update_file_with_tmpfile(filename, tmpfile, *, create=False):
- try:
- targetfile = open(filename, 'rb')
- except FileNotFoundError:
- if not create:
- raise # re-raise
- outcome = 'created'
- os.replace(tmpfile, filename)
- else:
- with targetfile:
- old_contents = targetfile.read()
- with open(tmpfile, 'rb') as f:
- new_contents = f.read()
- # Now compare!
- if old_contents != new_contents:
- outcome = 'updated'
- os.replace(tmpfile, filename)
- else:
- outcome = 'same'
- os.unlink(tmpfile)
- return outcome
-
-
-if __name__ == '__main__':
- import argparse
- parser = argparse.ArgumentParser()
- parser.add_argument('--create', action='store_true')
- parser.add_argument('--exitcode', action='store_true')
- parser.add_argument('filename', help='path to be updated')
- parser.add_argument('tmpfile', help='path with new contents')
- args = parser.parse_args()
- kwargs = vars(args)
- setexitcode = kwargs.pop('exitcode')
-
- outcome = update_file_with_tmpfile(**kwargs)
- if setexitcode:
- if outcome == 'same':
- sys.exit(0)
- elif outcome == 'updated':
- sys.exit(1)
- elif outcome == 'created':
- sys.exit(2)
- else:
- raise NotImplementedError
diff --git a/Tools/scripts/verify_ensurepip_wheels.py b/Tools/scripts/verify_ensurepip_wheels.py
deleted file mode 100755
index 044d1fd..0000000
--- a/Tools/scripts/verify_ensurepip_wheels.py
+++ /dev/null
@@ -1,98 +0,0 @@
-#! /usr/bin/env python3
-
-"""
-Compare checksums for wheels in :mod:`ensurepip` against the Cheeseshop.
-
-When GitHub Actions executes the script, output is formatted accordingly.
-https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-notice-message
-"""
-
-import hashlib
-import json
-import os
-import re
-from pathlib import Path
-from urllib.request import urlopen
-
-PACKAGE_NAMES = ("pip", "setuptools")
-ENSURE_PIP_ROOT = Path(__file__).parent.parent.parent / "Lib/ensurepip"
-WHEEL_DIR = ENSURE_PIP_ROOT / "_bundled"
-ENSURE_PIP_INIT_PY_TEXT = (ENSURE_PIP_ROOT / "__init__.py").read_text(encoding="utf-8")
-GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true"
-
-
-def print_notice(file_path: str, message: str) -> None:
- if GITHUB_ACTIONS:
- message = f"::notice file={file_path}::{message}"
- print(message, end="\n\n")
-
-
-def print_error(file_path: str, message: str) -> None:
- if GITHUB_ACTIONS:
- message = f"::error file={file_path}::{message}"
- print(message, end="\n\n")
-
-
-def verify_wheel(package_name: str) -> bool:
- # Find the package on disk
- package_path = next(WHEEL_DIR.glob(f"{package_name}*.whl"), None)
- if not package_path:
- print_error("", f"Could not find a {package_name} wheel on disk.")
- return False
-
- print(f"Verifying checksum for {package_path}.")
-
- # Find the version of the package used by ensurepip
- package_version_match = re.search(
- f'_{package_name.upper()}_VERSION = "([^"]+)', ENSURE_PIP_INIT_PY_TEXT
- )
- if not package_version_match:
- print_error(
- package_path,
- f"No {package_name} version found in Lib/ensurepip/__init__.py.",
- )
- return False
- package_version = package_version_match[1]
-
- # Get the SHA 256 digest from the Cheeseshop
- try:
- raw_text = urlopen(f"https://pypi.org/pypi/{package_name}/json").read()
- except (OSError, ValueError):
- print_error(package_path, f"Could not fetch JSON metadata for {package_name}.")
- return False
-
- release_files = json.loads(raw_text)["releases"][package_version]
- for release_info in release_files:
- if package_path.name != release_info["filename"]:
- continue
- expected_digest = release_info["digests"].get("sha256", "")
- break
- else:
- print_error(package_path, f"No digest for {package_name} found from PyPI.")
- return False
-
- # Compute the SHA 256 digest of the wheel on disk
- actual_digest = hashlib.sha256(package_path.read_bytes()).hexdigest()
-
- print(f"Expected digest: {expected_digest}")
- print(f"Actual digest: {actual_digest}")
-
- if actual_digest != expected_digest:
- print_error(
- package_path, f"Failed to verify the checksum of the {package_name} wheel."
- )
- return False
-
- print_notice(
- package_path,
- f"Successfully verified the checksum of the {package_name} wheel.",
- )
- return True
-
-
-if __name__ == "__main__":
- exit_status = 0
- for package_name in PACKAGE_NAMES:
- if not verify_wheel(package_name):
- exit_status = 1
- raise SystemExit(exit_status)