diff options
author | Victor Stinner <vstinner@python.org> | 2022-10-17 10:01:00 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-17 10:01:00 (GMT) |
commit | 1863302d61a7a5dd8b8d345a00f0ee242c7c10bf (patch) | |
tree | a1e41af02147e2a14155d5b19d7b68bbb31c3f6f /Tools/scripts | |
parent | eae7dad40255bad42e4abce53ff8143dcbc66af5 (diff) | |
download | cpython-1863302d61a7a5dd8b8d345a00f0ee242c7c10bf.zip cpython-1863302d61a7a5dd8b8d345a00f0ee242c7c10bf.tar.gz cpython-1863302d61a7a5dd8b8d345a00f0ee242c7c10bf.tar.bz2 |
gh-97669: Create Tools/build/ directory (#97963)
Create Tools/build/ directory. Move the following scripts from
Tools/scripts/ to Tools/build/:
* check_extension_modules.py
* deepfreeze.py
* freeze_modules.py
* generate_global_objects.py
* generate_levenshtein_examples.py
* generate_opcode_h.py
* generate_re_casefix.py
* generate_sre_constants.py
* generate_stdlib_module_names.py
* generate_token.py
* parse_html5_entities.py
* smelly.py
* stable_abi.py
* umarshal.py
* update_file.py
* verify_ensurepip_wheels.py
Update references to these scripts.
Diffstat (limited to 'Tools/scripts')
-rw-r--r-- | Tools/scripts/check_extension_modules.py | 484 | ||||
-rw-r--r-- | Tools/scripts/deepfreeze.py | 504 | ||||
-rw-r--r-- | Tools/scripts/freeze_modules.py | 733 | ||||
-rw-r--r-- | Tools/scripts/generate_global_objects.py | 381 | ||||
-rw-r--r-- | Tools/scripts/generate_levenshtein_examples.py | 70 | ||||
-rw-r--r-- | Tools/scripts/generate_opcode_h.py | 199 | ||||
-rwxr-xr-x | Tools/scripts/generate_re_casefix.py | 94 | ||||
-rwxr-xr-x | Tools/scripts/generate_sre_constants.py | 78 | ||||
-rw-r--r-- | Tools/scripts/generate_stdlib_module_names.py | 137 | ||||
-rwxr-xr-x | Tools/scripts/generate_token.py | 275 | ||||
-rwxr-xr-x | Tools/scripts/parse_html5_entities.py | 114 | ||||
-rwxr-xr-x | Tools/scripts/smelly.py | 173 | ||||
-rwxr-xr-x | Tools/scripts/stable_abi.py | 754 | ||||
-rw-r--r-- | Tools/scripts/umarshal.py | 325 | ||||
-rw-r--r-- | Tools/scripts/update_file.py | 92 | ||||
-rwxr-xr-x | Tools/scripts/verify_ensurepip_wheels.py | 98 |
16 files changed, 0 insertions, 4511 deletions
diff --git a/Tools/scripts/check_extension_modules.py b/Tools/scripts/check_extension_modules.py deleted file mode 100644 index 59239c6..0000000 --- a/Tools/scripts/check_extension_modules.py +++ /dev/null @@ -1,484 +0,0 @@ -"""Check extension modules - -The script checks shared and built-in extension modules. It verifies that the -modules have been built and that they can be imported successfully. Missing -modules and failed imports are reported to the user. Shared extension -files are renamed on failed import. - -Module information is parsed from several sources: - -- core modules hard-coded in Modules/config.c.in -- Windows-specific modules that are hard-coded in PC/config.c -- MODULE_{name}_STATE entries in Makefile (provided through sysconfig) -- Various makesetup files: - - $(srcdir)/Modules/Setup - - Modules/Setup.[local|bootstrap|stdlib] files, which are generated - from $(srcdir)/Modules/Setup.*.in files - -See --help for more information -""" -import argparse -import collections -import enum -import logging -import os -import pathlib -import re -import sys -import sysconfig -import warnings - -from importlib._bootstrap import _load as bootstrap_load -from importlib.machinery import BuiltinImporter, ExtensionFileLoader, ModuleSpec -from importlib.util import spec_from_file_location, spec_from_loader -from typing import Iterable - -SRC_DIR = pathlib.Path(__file__).parent.parent.parent - -# core modules, hard-coded in Modules/config.h.in -CORE_MODULES = { - "_ast", - "_imp", - "_string", - "_tokenize", - "_warnings", - "builtins", - "gc", - "marshal", - "sys", -} - -# Windows-only modules -WINDOWS_MODULES = { - "_msi", - "_overlapped", - "_testconsole", - "_winapi", - "msvcrt", - "nt", - "winreg", - "winsound", -} - - -logger = logging.getLogger(__name__) - -parser = argparse.ArgumentParser( - prog="check_extension_modules", - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter, -) - -parser.add_argument( - "--verbose", - action="store_true", - help="Verbose, report builtin, shared, and unavailable modules", -) - -parser.add_argument( - "--debug", - action="store_true", - help="Enable debug logging", -) - -parser.add_argument( - "--strict", - action=argparse.BooleanOptionalAction, - help=( - "Strict check, fail when a module is missing or fails to import" - "(default: no, unless env var PYTHONSTRICTEXTENSIONBUILD is set)" - ), - default=bool(os.environ.get("PYTHONSTRICTEXTENSIONBUILD")), -) - -parser.add_argument( - "--cross-compiling", - action=argparse.BooleanOptionalAction, - help=( - "Use cross-compiling checks " - "(default: no, unless env var _PYTHON_HOST_PLATFORM is set)." - ), - default="_PYTHON_HOST_PLATFORM" in os.environ, -) - -parser.add_argument( - "--list-module-names", - action="store_true", - help="Print a list of module names to stdout and exit", -) - - -class ModuleState(enum.Enum): - # Makefile state "yes" - BUILTIN = "builtin" - SHARED = "shared" - - DISABLED = "disabled" - MISSING = "missing" - NA = "n/a" - # disabled by Setup / makesetup rule - DISABLED_SETUP = "disabled_setup" - - def __bool__(self): - return self.value in {"builtin", "shared"} - - -ModuleInfo = collections.namedtuple("ModuleInfo", "name state") - - -class ModuleChecker: - pybuilddir_txt = "pybuilddir.txt" - - setup_files = ( - # see end of configure.ac - "Modules/Setup.local", - "Modules/Setup.stdlib", - "Modules/Setup.bootstrap", - SRC_DIR / "Modules/Setup", - ) - - def __init__(self, cross_compiling: bool = False, strict: bool = False): - self.cross_compiling = cross_compiling - self.strict_extensions_build = strict - self.ext_suffix = sysconfig.get_config_var("EXT_SUFFIX") - self.platform = sysconfig.get_platform() - self.builddir = self.get_builddir() - self.modules = self.get_modules() - - self.builtin_ok = [] - self.shared_ok = [] - self.failed_on_import = [] - self.missing = [] - self.disabled_configure = [] - self.disabled_setup = [] - self.notavailable = [] - - def check(self): - for modinfo in self.modules: - logger.debug("Checking '%s' (%s)", modinfo.name, self.get_location(modinfo)) - if modinfo.state == ModuleState.DISABLED: - self.disabled_configure.append(modinfo) - elif modinfo.state == ModuleState.DISABLED_SETUP: - self.disabled_setup.append(modinfo) - elif modinfo.state == ModuleState.MISSING: - self.missing.append(modinfo) - elif modinfo.state == ModuleState.NA: - self.notavailable.append(modinfo) - else: - try: - if self.cross_compiling: - self.check_module_cross(modinfo) - else: - self.check_module_import(modinfo) - except (ImportError, FileNotFoundError): - self.rename_module(modinfo) - self.failed_on_import.append(modinfo) - else: - if modinfo.state == ModuleState.BUILTIN: - self.builtin_ok.append(modinfo) - else: - assert modinfo.state == ModuleState.SHARED - self.shared_ok.append(modinfo) - - def summary(self, *, verbose: bool = False): - longest = max([len(e.name) for e in self.modules], default=0) - - def print_three_column(modinfos: list[ModuleInfo]): - names = [modinfo.name for modinfo in modinfos] - names.sort(key=str.lower) - # guarantee zip() doesn't drop anything - while len(names) % 3: - names.append("") - for l, m, r in zip(names[::3], names[1::3], names[2::3]): - print("%-*s %-*s %-*s" % (longest, l, longest, m, longest, r)) - - if verbose and self.builtin_ok: - print("The following *built-in* modules have been successfully built:") - print_three_column(self.builtin_ok) - print() - - if verbose and self.shared_ok: - print("The following *shared* modules have been successfully built:") - print_three_column(self.shared_ok) - print() - - if self.disabled_configure: - print("The following modules are *disabled* in configure script:") - print_three_column(self.disabled_configure) - print() - - if self.disabled_setup: - print("The following modules are *disabled* in Modules/Setup files:") - print_three_column(self.disabled_setup) - print() - - if verbose and self.notavailable: - print( - f"The following modules are not available on platform '{self.platform}':" - ) - print_three_column(self.notavailable) - print() - - if self.missing: - print("The necessary bits to build these optional modules were not found:") - print_three_column(self.missing) - print("To find the necessary bits, look in configure.ac and config.log.") - print() - - if self.failed_on_import: - print( - "Following modules built successfully " - "but were removed because they could not be imported:" - ) - print_three_column(self.failed_on_import) - print() - - if any( - modinfo.name == "_ssl" for modinfo in self.missing + self.failed_on_import - ): - print("Could not build the ssl module!") - print("Python requires a OpenSSL 1.1.1 or newer") - if sysconfig.get_config_var("OPENSSL_LDFLAGS"): - print("Custom linker flags may require --with-openssl-rpath=auto") - print() - - disabled = len(self.disabled_configure) + len(self.disabled_setup) - print( - f"Checked {len(self.modules)} modules (" - f"{len(self.builtin_ok)} built-in, " - f"{len(self.shared_ok)} shared, " - f"{len(self.notavailable)} n/a on {self.platform}, " - f"{disabled} disabled, " - f"{len(self.missing)} missing, " - f"{len(self.failed_on_import)} failed on import)" - ) - - def check_strict_build(self): - """Fail if modules are missing and it's a strict build""" - if self.strict_extensions_build and (self.failed_on_import or self.missing): - raise RuntimeError("Failed to build some stdlib modules") - - def list_module_names(self, *, all: bool = False) -> set: - names = {modinfo.name for modinfo in self.modules} - if all: - names.update(WINDOWS_MODULES) - return names - - def get_builddir(self) -> pathlib.Path: - try: - with open(self.pybuilddir_txt, encoding="utf-8") as f: - builddir = f.read() - except FileNotFoundError: - logger.error("%s must be run from the top build directory", __file__) - raise - builddir = pathlib.Path(builddir) - logger.debug("%s: %s", self.pybuilddir_txt, builddir) - return builddir - - def get_modules(self) -> list[ModuleInfo]: - """Get module info from sysconfig and Modules/Setup* files""" - seen = set() - modules = [] - # parsing order is important, first entry wins - for modinfo in self.get_core_modules(): - modules.append(modinfo) - seen.add(modinfo.name) - for setup_file in self.setup_files: - for modinfo in self.parse_setup_file(setup_file): - if modinfo.name not in seen: - modules.append(modinfo) - seen.add(modinfo.name) - for modinfo in self.get_sysconfig_modules(): - if modinfo.name not in seen: - modules.append(modinfo) - seen.add(modinfo.name) - logger.debug("Found %i modules in total", len(modules)) - modules.sort() - return modules - - def get_core_modules(self) -> Iterable[ModuleInfo]: - """Get hard-coded core modules""" - for name in CORE_MODULES: - modinfo = ModuleInfo(name, ModuleState.BUILTIN) - logger.debug("Found core module %s", modinfo) - yield modinfo - - def get_sysconfig_modules(self) -> Iterable[ModuleInfo]: - """Get modules defined in Makefile through sysconfig - - MODBUILT_NAMES: modules in *static* block - MODSHARED_NAMES: modules in *shared* block - MODDISABLED_NAMES: modules in *disabled* block - """ - moddisabled = set(sysconfig.get_config_var("MODDISABLED_NAMES").split()) - if self.cross_compiling: - modbuiltin = set(sysconfig.get_config_var("MODBUILT_NAMES").split()) - else: - modbuiltin = set(sys.builtin_module_names) - - for key, value in sysconfig.get_config_vars().items(): - if not key.startswith("MODULE_") or not key.endswith("_STATE"): - continue - if value not in {"yes", "disabled", "missing", "n/a"}: - raise ValueError(f"Unsupported value '{value}' for {key}") - - modname = key[7:-6].lower() - if modname in moddisabled: - # Setup "*disabled*" rule - state = ModuleState.DISABLED_SETUP - elif value in {"disabled", "missing", "n/a"}: - state = ModuleState(value) - elif modname in modbuiltin: - assert value == "yes" - state = ModuleState.BUILTIN - else: - assert value == "yes" - state = ModuleState.SHARED - - modinfo = ModuleInfo(modname, state) - logger.debug("Found %s in Makefile", modinfo) - yield modinfo - - def parse_setup_file(self, setup_file: pathlib.Path) -> Iterable[ModuleInfo]: - """Parse a Modules/Setup file""" - assign_var = re.compile(r"^\w+=") # EGG_SPAM=foo - # default to static module - state = ModuleState.BUILTIN - logger.debug("Parsing Setup file %s", setup_file) - with open(setup_file, encoding="utf-8") as f: - for line in f: - line = line.strip() - if not line or line.startswith("#") or assign_var.match(line): - continue - match line.split(): - case ["*shared*"]: - state = ModuleState.SHARED - case ["*static*"]: - state = ModuleState.BUILTIN - case ["*disabled*"]: - state = ModuleState.DISABLED - case ["*noconfig*"]: - state = None - case [*items]: - if state == ModuleState.DISABLED: - # *disabled* can disable multiple modules per line - for item in items: - modinfo = ModuleInfo(item, state) - logger.debug("Found %s in %s", modinfo, setup_file) - yield modinfo - elif state in {ModuleState.SHARED, ModuleState.BUILTIN}: - # *shared* and *static*, first item is the name of the module. - modinfo = ModuleInfo(items[0], state) - logger.debug("Found %s in %s", modinfo, setup_file) - yield modinfo - - def get_spec(self, modinfo: ModuleInfo) -> ModuleSpec: - """Get ModuleSpec for builtin or extension module""" - if modinfo.state == ModuleState.SHARED: - location = os.fspath(self.get_location(modinfo)) - loader = ExtensionFileLoader(modinfo.name, location) - return spec_from_file_location(modinfo.name, location, loader=loader) - elif modinfo.state == ModuleState.BUILTIN: - return spec_from_loader(modinfo.name, loader=BuiltinImporter) - else: - raise ValueError(modinfo) - - def get_location(self, modinfo: ModuleInfo) -> pathlib.Path: - """Get shared library location in build directory""" - if modinfo.state == ModuleState.SHARED: - return self.builddir / f"{modinfo.name}{self.ext_suffix}" - else: - return None - - def _check_file(self, modinfo: ModuleInfo, spec: ModuleSpec): - """Check that the module file is present and not empty""" - if spec.loader is BuiltinImporter: - return - try: - st = os.stat(spec.origin) - except FileNotFoundError: - logger.error("%s (%s) is missing", modinfo.name, spec.origin) - raise - if not st.st_size: - raise ImportError(f"{spec.origin} is an empty file") - - def check_module_import(self, modinfo: ModuleInfo): - """Attempt to import module and report errors""" - spec = self.get_spec(modinfo) - self._check_file(modinfo, spec) - try: - with warnings.catch_warnings(): - # ignore deprecation warning from deprecated modules - warnings.simplefilter("ignore", DeprecationWarning) - bootstrap_load(spec) - except ImportError as e: - logger.error("%s failed to import: %s", modinfo.name, e) - raise - except Exception as e: - logger.exception("Importing extension '%s' failed!", modinfo.name) - raise - - def check_module_cross(self, modinfo: ModuleInfo): - """Sanity check for cross compiling""" - spec = self.get_spec(modinfo) - self._check_file(modinfo, spec) - - def rename_module(self, modinfo: ModuleInfo) -> None: - """Rename module file""" - if modinfo.state == ModuleState.BUILTIN: - logger.error("Cannot mark builtin module '%s' as failed!", modinfo.name) - return - - failed_name = f"{modinfo.name}_failed{self.ext_suffix}" - builddir_path = self.get_location(modinfo) - if builddir_path.is_symlink(): - symlink = builddir_path - module_path = builddir_path.resolve().relative_to(os.getcwd()) - failed_path = module_path.parent / failed_name - else: - symlink = None - module_path = builddir_path - failed_path = self.builddir / failed_name - - # remove old failed file - failed_path.unlink(missing_ok=True) - # remove symlink - if symlink is not None: - symlink.unlink(missing_ok=True) - # rename shared extension file - try: - module_path.rename(failed_path) - except FileNotFoundError: - logger.debug("Shared extension file '%s' does not exist.", module_path) - else: - logger.debug("Rename '%s' -> '%s'", module_path, failed_path) - - -def main(): - args = parser.parse_args() - if args.debug: - args.verbose = True - logging.basicConfig( - level=logging.DEBUG if args.debug else logging.INFO, - format="[%(levelname)s] %(message)s", - ) - - checker = ModuleChecker( - cross_compiling=args.cross_compiling, - strict=args.strict, - ) - if args.list_module_names: - names = checker.list_module_names(all=True) - for name in sorted(names): - print(name) - else: - checker.check() - checker.summary(verbose=args.verbose) - try: - checker.check_strict_build() - except RuntimeError as e: - parser.exit(1, f"\nError: {e}\n") - - -if __name__ == "__main__": - main() diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py deleted file mode 100644 index 28ac2b1..0000000 --- a/Tools/scripts/deepfreeze.py +++ /dev/null @@ -1,504 +0,0 @@ -"""Deep freeze - -The script may be executed by _bootstrap_python interpreter. -Shared library extension modules are not available in that case. -On Windows, and in cross-compilation cases, it is executed -by Python 3.10, and 3.11 features are not available. -""" -import argparse -import ast -import builtins -import collections -import contextlib -import os -import re -import time -import types -from typing import Dict, FrozenSet, TextIO, Tuple - -import umarshal -from generate_global_objects import get_identifiers_and_strings - -verbose = False -identifiers, strings = get_identifiers_and_strings() - -# This must be kept in sync with opcode.py -RESUME = 151 - -def isprintable(b: bytes) -> bool: - return all(0x20 <= c < 0x7f for c in b) - - -def make_string_literal(b: bytes) -> str: - res = ['"'] - if isprintable(b): - res.append(b.decode("ascii").replace("\\", "\\\\").replace("\"", "\\\"")) - else: - for i in b: - res.append(f"\\x{i:02x}") - res.append('"') - return "".join(res) - - -CO_FAST_LOCAL = 0x20 -CO_FAST_CELL = 0x40 -CO_FAST_FREE = 0x80 - - -def get_localsplus(code: types.CodeType): - a = collections.defaultdict(int) - for name in code.co_varnames: - a[name] |= CO_FAST_LOCAL - for name in code.co_cellvars: - a[name] |= CO_FAST_CELL - for name in code.co_freevars: - a[name] |= CO_FAST_FREE - return tuple(a.keys()), bytes(a.values()) - - -def get_localsplus_counts(code: types.CodeType, - names: Tuple[str, ...], - kinds: bytes) -> Tuple[int, int, int, int]: - nlocals = 0 - nplaincellvars = 0 - ncellvars = 0 - nfreevars = 0 - assert len(names) == len(kinds) - for name, kind in zip(names, kinds): - if kind & CO_FAST_LOCAL: - nlocals += 1 - if kind & CO_FAST_CELL: - ncellvars += 1 - elif kind & CO_FAST_CELL: - ncellvars += 1 - nplaincellvars += 1 - elif kind & CO_FAST_FREE: - nfreevars += 1 - assert nlocals == len(code.co_varnames) == code.co_nlocals, \ - (nlocals, len(code.co_varnames), code.co_nlocals) - assert ncellvars == len(code.co_cellvars) - assert nfreevars == len(code.co_freevars) - assert len(names) == nlocals + nplaincellvars + nfreevars - return nlocals, nplaincellvars, ncellvars, nfreevars - - -PyUnicode_1BYTE_KIND = 1 -PyUnicode_2BYTE_KIND = 2 -PyUnicode_4BYTE_KIND = 4 - - -def analyze_character_width(s: str) -> Tuple[int, bool]: - maxchar = ' ' - for c in s: - maxchar = max(maxchar, c) - ascii = False - if maxchar <= '\xFF': - kind = PyUnicode_1BYTE_KIND - ascii = maxchar <= '\x7F' - elif maxchar <= '\uFFFF': - kind = PyUnicode_2BYTE_KIND - else: - kind = PyUnicode_4BYTE_KIND - return kind, ascii - - -def removesuffix(base: str, suffix: str) -> str: - if base.endswith(suffix): - return base[:len(base) - len(suffix)] - return base - -class Printer: - - def __init__(self, file: TextIO) -> None: - self.level = 0 - self.file = file - self.cache: Dict[tuple[type, object, str], str] = {} - self.hits, self.misses = 0, 0 - self.patchups: list[str] = [] - self.deallocs: list[str] = [] - self.interns: list[str] = [] - self.write('#include "Python.h"') - self.write('#include "internal/pycore_gc.h"') - self.write('#include "internal/pycore_code.h"') - self.write('#include "internal/pycore_frame.h"') - self.write('#include "internal/pycore_long.h"') - self.write("") - - @contextlib.contextmanager - def indent(self) -> None: - save_level = self.level - try: - self.level += 1 - yield - finally: - self.level = save_level - - def write(self, arg: str) -> None: - self.file.writelines((" "*self.level, arg, "\n")) - - @contextlib.contextmanager - def block(self, prefix: str, suffix: str = "") -> None: - self.write(prefix + " {") - with self.indent(): - yield - self.write("}" + suffix) - - def object_head(self, typename: str) -> None: - with self.block(".ob_base =", ","): - self.write(f".ob_refcnt = 999999999,") - self.write(f".ob_type = &{typename},") - - def object_var_head(self, typename: str, size: int) -> None: - with self.block(".ob_base =", ","): - self.object_head(typename) - self.write(f".ob_size = {size},") - - def field(self, obj: object, name: str) -> None: - self.write(f".{name} = {getattr(obj, name)},") - - def generate_bytes(self, name: str, b: bytes) -> str: - if b == b"": - return "(PyObject *)&_Py_SINGLETON(bytes_empty)" - if len(b) == 1: - return f"(PyObject *)&_Py_SINGLETON(bytes_characters[{b[0]}])" - self.write("static") - with self.indent(): - with self.block("struct"): - self.write("PyObject_VAR_HEAD") - self.write("Py_hash_t ob_shash;") - self.write(f"char ob_sval[{len(b) + 1}];") - with self.block(f"{name} =", ";"): - self.object_var_head("PyBytes_Type", len(b)) - self.write(".ob_shash = -1,") - self.write(f".ob_sval = {make_string_literal(b)},") - return f"& {name}.ob_base.ob_base" - - def generate_unicode(self, name: str, s: str) -> str: - if s in strings: - return f"&_Py_STR({strings[s]})" - if s in identifiers: - return f"&_Py_ID({s})" - if re.match(r'\A[A-Za-z0-9_]+\Z', s): - name = f"const_str_{s}" - kind, ascii = analyze_character_width(s) - if kind == PyUnicode_1BYTE_KIND: - datatype = "uint8_t" - elif kind == PyUnicode_2BYTE_KIND: - datatype = "uint16_t" - else: - datatype = "uint32_t" - self.write("static") - with self.indent(): - with self.block("struct"): - if ascii: - self.write("PyASCIIObject _ascii;") - else: - self.write("PyCompactUnicodeObject _compact;") - self.write(f"{datatype} _data[{len(s)+1}];") - with self.block(f"{name} =", ";"): - if ascii: - with self.block("._ascii =", ","): - self.object_head("PyUnicode_Type") - self.write(f".length = {len(s)},") - self.write(".hash = -1,") - with self.block(".state =", ","): - self.write(".kind = 1,") - self.write(".compact = 1,") - self.write(".ascii = 1,") - self.write(f"._data = {make_string_literal(s.encode('ascii'))},") - return f"& {name}._ascii.ob_base" - else: - with self.block("._compact =", ","): - with self.block("._base =", ","): - self.object_head("PyUnicode_Type") - self.write(f".length = {len(s)},") - self.write(".hash = -1,") - with self.block(".state =", ","): - self.write(f".kind = {kind},") - self.write(".compact = 1,") - self.write(".ascii = 0,") - utf8 = s.encode('utf-8') - self.write(f'.utf8 = {make_string_literal(utf8)},') - self.write(f'.utf8_length = {len(utf8)},') - with self.block(f"._data =", ","): - for i in range(0, len(s), 16): - data = s[i:i+16] - self.write(", ".join(map(str, map(ord, data))) + ",") - return f"& {name}._compact._base.ob_base" - - - def generate_code(self, name: str, code: types.CodeType) -> str: - # The ordering here matches PyCode_NewWithPosOnlyArgs() - # (but see below). - co_consts = self.generate(name + "_consts", code.co_consts) - co_names = self.generate(name + "_names", code.co_names) - co_filename = self.generate(name + "_filename", code.co_filename) - co_name = self.generate(name + "_name", code.co_name) - co_qualname = self.generate(name + "_qualname", code.co_qualname) - co_linetable = self.generate(name + "_linetable", code.co_linetable) - co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable) - # These fields are not directly accessible - localsplusnames, localspluskinds = get_localsplus(code) - co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames) - co_localspluskinds = self.generate(name + "_localspluskinds", localspluskinds) - # Derived values - nlocals, nplaincellvars, ncellvars, nfreevars = \ - get_localsplus_counts(code, localsplusnames, localspluskinds) - co_code_adaptive = make_string_literal(code.co_code) - self.write("static") - with self.indent(): - self.write(f"struct _PyCode_DEF({len(code.co_code)})") - with self.block(f"{name} =", ";"): - self.object_var_head("PyCode_Type", len(code.co_code) // 2) - # But the ordering here must match that in cpython/code.h - # (which is a pain because we tend to reorder those for perf) - # otherwise MSVC doesn't like it. - self.write(f".co_consts = {co_consts},") - self.write(f".co_names = {co_names},") - self.write(f".co_exceptiontable = {co_exceptiontable},") - self.field(code, "co_flags") - self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,") - self.write("._co_linearray_entry_size = 0,") - self.field(code, "co_argcount") - self.field(code, "co_posonlyargcount") - self.field(code, "co_kwonlyargcount") - self.write(f".co_framesize = {code.co_stacksize + len(localsplusnames)} + FRAME_SPECIALS_SIZE,") - self.field(code, "co_stacksize") - self.field(code, "co_firstlineno") - self.write(f".co_nlocalsplus = {len(localsplusnames)},") - self.field(code, "co_nlocals") - self.write(f".co_nplaincellvars = {nplaincellvars},") - self.write(f".co_ncellvars = {ncellvars},") - self.write(f".co_nfreevars = {nfreevars},") - self.write(f".co_localsplusnames = {co_localsplusnames},") - self.write(f".co_localspluskinds = {co_localspluskinds},") - self.write(f".co_filename = {co_filename},") - self.write(f".co_name = {co_name},") - self.write(f".co_qualname = {co_qualname},") - self.write(f".co_linetable = {co_linetable},") - self.write(f"._co_cached = NULL,") - self.write("._co_linearray = NULL,") - self.write(f".co_code_adaptive = {co_code_adaptive},") - for i, op in enumerate(code.co_code[::2]): - if op == RESUME: - self.write(f"._co_firsttraceable = {i},") - break - name_as_code = f"(PyCodeObject *)&{name}" - self.deallocs.append(f"_PyStaticCode_Dealloc({name_as_code});") - self.interns.append(f"_PyStaticCode_InternStrings({name_as_code})") - return f"& {name}.ob_base.ob_base" - - def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str: - if len(t) == 0: - return f"(PyObject *)& _Py_SINGLETON(tuple_empty)" - items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)] - self.write("static") - with self.indent(): - with self.block("struct"): - self.write("PyGC_Head _gc_head;") - with self.block("struct", "_object;"): - self.write("PyObject_VAR_HEAD") - if t: - self.write(f"PyObject *ob_item[{len(t)}];") - with self.block(f"{name} =", ";"): - with self.block("._object =", ","): - self.object_var_head("PyTuple_Type", len(t)) - if items: - with self.block(f".ob_item =", ","): - for item in items: - self.write(item + ",") - return f"& {name}._object.ob_base.ob_base" - - def _generate_int_for_bits(self, name: str, i: int, digit: int) -> None: - sign = -1 if i < 0 else 0 if i == 0 else +1 - i = abs(i) - digits: list[int] = [] - while i: - i, rem = divmod(i, digit) - digits.append(rem) - self.write("static") - with self.indent(): - with self.block("struct"): - self.write("PyObject_VAR_HEAD") - self.write(f"digit ob_digit[{max(1, len(digits))}];") - with self.block(f"{name} =", ";"): - self.object_var_head("PyLong_Type", sign*len(digits)) - if digits: - ds = ", ".join(map(str, digits)) - self.write(f".ob_digit = {{ {ds} }},") - - def generate_int(self, name: str, i: int) -> str: - if -5 <= i <= 256: - return f"(PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + {i}]" - if i >= 0: - name = f"const_int_{i}" - else: - name = f"const_int_negative_{abs(i)}" - if abs(i) < 2**15: - self._generate_int_for_bits(name, i, 2**15) - else: - connective = "if" - for bits_in_digit in 15, 30: - self.write(f"#{connective} PYLONG_BITS_IN_DIGIT == {bits_in_digit}") - self._generate_int_for_bits(name, i, 2**bits_in_digit) - connective = "elif" - self.write("#else") - self.write('#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"') - self.write("#endif") - # If neither clause applies, it won't compile - return f"& {name}.ob_base.ob_base" - - def generate_float(self, name: str, x: float) -> str: - with self.block(f"static PyFloatObject {name} =", ";"): - self.object_head("PyFloat_Type") - self.write(f".ob_fval = {x},") - return f"&{name}.ob_base" - - def generate_complex(self, name: str, z: complex) -> str: - with self.block(f"static PyComplexObject {name} =", ";"): - self.object_head("PyComplex_Type") - self.write(f".cval = {{ {z.real}, {z.imag} }},") - return f"&{name}.ob_base" - - def generate_frozenset(self, name: str, fs: FrozenSet[object]) -> str: - try: - fs = sorted(fs) - except TypeError: - # frozen set with incompatible types, fallback to repr() - fs = sorted(fs, key=repr) - ret = self.generate_tuple(name, tuple(fs)) - self.write("// TODO: The above tuple should be a frozenset") - return ret - - def generate_file(self, module: str, code: object)-> None: - module = module.replace(".", "_") - self.generate(f"{module}_toplevel", code) - with self.block(f"static void {module}_do_patchups(void)"): - for p in self.patchups: - self.write(p) - self.patchups.clear() - self.write(EPILOGUE.replace("%%NAME%%", module)) - - def generate(self, name: str, obj: object) -> str: - # Use repr() in the key to distinguish -0.0 from +0.0 - key = (type(obj), obj, repr(obj)) - if key in self.cache: - self.hits += 1 - # print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}") - return self.cache[key] - self.misses += 1 - if isinstance(obj, (types.CodeType, umarshal.Code)) : - val = self.generate_code(name, obj) - elif isinstance(obj, tuple): - val = self.generate_tuple(name, obj) - elif isinstance(obj, str): - val = self.generate_unicode(name, obj) - elif isinstance(obj, bytes): - val = self.generate_bytes(name, obj) - elif obj is True: - return "Py_True" - elif obj is False: - return "Py_False" - elif isinstance(obj, int): - val = self.generate_int(name, obj) - elif isinstance(obj, float): - val = self.generate_float(name, obj) - elif isinstance(obj, complex): - val = self.generate_complex(name, obj) - elif isinstance(obj, frozenset): - val = self.generate_frozenset(name, obj) - elif obj is builtins.Ellipsis: - return "Py_Ellipsis" - elif obj is None: - return "Py_None" - else: - raise TypeError( - f"Cannot generate code for {type(obj).__name__} object") - # print(f"Cache store {key!r:.40}: {val!r:.40}") - self.cache[key] = val - return val - - -EPILOGUE = """ -PyObject * -_Py_get_%%NAME%%_toplevel(void) -{ - %%NAME%%_do_patchups(); - return Py_NewRef((PyObject *) &%%NAME%%_toplevel); -} -""" - -FROZEN_COMMENT_C = "/* Auto-generated by Programs/_freeze_module.c */" -FROZEN_COMMENT_PY = "/* Auto-generated by Programs/_freeze_module.py */" - -FROZEN_DATA_LINE = r"\s*(\d+,\s*)+\s*" - - -def is_frozen_header(source: str) -> bool: - return source.startswith((FROZEN_COMMENT_C, FROZEN_COMMENT_PY)) - - -def decode_frozen_data(source: str) -> types.CodeType: - lines = source.splitlines() - while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None: - del lines[0] - while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None: - del lines[-1] - values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip()) - data = bytes(values) - return umarshal.loads(data) - - -def generate(args: list[str], output: TextIO) -> None: - printer = Printer(output) - for arg in args: - file, modname = arg.rsplit(':', 1) - with open(file, "r", encoding="utf8") as fd: - source = fd.read() - if is_frozen_header(source): - code = decode_frozen_data(source) - else: - code = compile(fd.read(), f"<frozen {modname}>", "exec") - printer.generate_file(modname, code) - with printer.block(f"void\n_Py_Deepfreeze_Fini(void)"): - for p in printer.deallocs: - printer.write(p) - with printer.block(f"int\n_Py_Deepfreeze_Init(void)"): - for p in printer.interns: - with printer.block(f"if ({p} < 0)"): - printer.write("return -1;") - printer.write("return 0;") - if verbose: - print(f"Cache hits: {printer.hits}, misses: {printer.misses}") - - -parser = argparse.ArgumentParser() -parser.add_argument("-o", "--output", help="Defaults to deepfreeze.c", default="deepfreeze.c") -parser.add_argument("-v", "--verbose", action="store_true", help="Print diagnostics") -parser.add_argument('args', nargs="+", help="Input file and module name (required) in file:modname format") - -@contextlib.contextmanager -def report_time(label: str): - t0 = time.time() - try: - yield - finally: - t1 = time.time() - if verbose: - print(f"{label}: {t1-t0:.3f} sec") - - -def main() -> None: - global verbose - args = parser.parse_args() - verbose = args.verbose - output = args.output - with open(output, "w", encoding="utf-8") as file: - with report_time("generate"): - generate(args.args, file) - if verbose: - print(f"Wrote {os.path.getsize(output)} bytes to {output}") - - -if __name__ == "__main__": - main() diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py deleted file mode 100644 index aa1e4fe..0000000 --- a/Tools/scripts/freeze_modules.py +++ /dev/null @@ -1,733 +0,0 @@ -"""Freeze modules and regen related files (e.g. Python/frozen.c). - -See the notes at the top of Python/frozen.c for more info. -""" - -from collections import namedtuple -import hashlib -import os -import ntpath -import posixpath -import argparse -from update_file import updating_file_with_tmpfile - - -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) -ROOT_DIR = os.path.abspath(ROOT_DIR) -FROZEN_ONLY = os.path.join(ROOT_DIR, 'Tools', 'freeze', 'flag.py') - -STDLIB_DIR = os.path.join(ROOT_DIR, 'Lib') -# If FROZEN_MODULES_DIR or DEEPFROZEN_MODULES_DIR is changed then the -# .gitattributes and .gitignore files needs to be updated. -FROZEN_MODULES_DIR = os.path.join(ROOT_DIR, 'Python', 'frozen_modules') -DEEPFROZEN_MODULES_DIR = os.path.join(ROOT_DIR, 'Python', 'deepfreeze') - -FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c') -MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in') -PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj') -PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filters') -PCBUILD_PYTHONCORE = os.path.join(ROOT_DIR, 'PCbuild', 'pythoncore.vcxproj') - - -OS_PATH = 'ntpath' if os.name == 'nt' else 'posixpath' - -# These are modules that get frozen. -TESTS_SECTION = 'Test module' -FROZEN = [ - # See parse_frozen_spec() for the format. - # In cases where the frozenid is duplicated, the first one is re-used. - ('import system', [ - # These frozen modules are necessary for bootstrapping - # the import system. - 'importlib._bootstrap : _frozen_importlib', - 'importlib._bootstrap_external : _frozen_importlib_external', - # This module is important because some Python builds rely - # on a builtin zip file instead of a filesystem. - 'zipimport', - ]), - ('stdlib - startup, without site (python -S)', [ - 'abc', - 'codecs', - # For now we do not freeze the encodings, due # to the noise all - # those extra modules add to the text printed during the build. - # (See https://github.com/python/cpython/pull/28398#pullrequestreview-756856469.) - #'<encodings.*>', - 'io', - ]), - ('stdlib - startup, with site', [ - '_collections_abc', - '_sitebuiltins', - 'genericpath', - 'ntpath', - 'posixpath', - # We must explicitly mark os.path as a frozen module - # even though it will never be imported. - f'{OS_PATH} : os.path', - 'os', - 'site', - 'stat', - ]), - ('runpy - run module with -m', [ - "importlib.util", - "importlib.machinery", - "runpy", - ]), - (TESTS_SECTION, [ - '__hello__', - '__hello__ : __hello_alias__', - '__hello__ : <__phello_alias__>', - '__hello__ : __phello_alias__.spam', - '<__phello__.**.*>', - f'frozen_only : __hello_only__ = {FROZEN_ONLY}', - ]), -] -BOOTSTRAP = { - 'importlib._bootstrap', - 'importlib._bootstrap_external', - 'zipimport', -} - - -####################################### -# platform-specific helpers - -if os.path is posixpath: - relpath_for_posix_display = os.path.relpath - - def relpath_for_windows_display(path, base): - return ntpath.relpath( - ntpath.join(*path.split(os.path.sep)), - ntpath.join(*base.split(os.path.sep)), - ) - -else: - relpath_for_windows_display = ntpath.relpath - - def relpath_for_posix_display(path, base): - return posixpath.relpath( - posixpath.join(*path.split(os.path.sep)), - posixpath.join(*base.split(os.path.sep)), - ) - - -####################################### -# specs - -def parse_frozen_specs(): - seen = {} - for section, specs in FROZEN: - parsed = _parse_specs(specs, section, seen) - for item in parsed: - frozenid, pyfile, modname, ispkg, section = item - try: - source = seen[frozenid] - except KeyError: - source = FrozenSource.from_id(frozenid, pyfile) - seen[frozenid] = source - else: - assert not pyfile or pyfile == source.pyfile, item - yield FrozenModule(modname, ispkg, section, source) - - -def _parse_specs(specs, section, seen): - for spec in specs: - info, subs = _parse_spec(spec, seen, section) - yield info - for info in subs or (): - yield info - - -def _parse_spec(spec, knownids=None, section=None): - """Yield an info tuple for each module corresponding to the given spec. - - The info consists of: (frozenid, pyfile, modname, ispkg, section). - - Supported formats: - - frozenid - frozenid : modname - frozenid : modname = pyfile - - "frozenid" and "modname" must be valid module names (dot-separated - identifiers). If "modname" is not provided then "frozenid" is used. - If "pyfile" is not provided then the filename of the module - corresponding to "frozenid" is used. - - Angle brackets around a frozenid (e.g. '<encodings>") indicate - it is a package. This also means it must be an actual module - (i.e. "pyfile" cannot have been provided). Such values can have - patterns to expand submodules: - - <encodings.*> - also freeze all direct submodules - <encodings.**.*> - also freeze the full submodule tree - - As with "frozenid", angle brackets around "modname" indicate - it is a package. However, in this case "pyfile" should not - have been provided and patterns in "modname" are not supported. - Also, if "modname" has brackets then "frozenid" should not, - and "pyfile" should have been provided.. - """ - frozenid, _, remainder = spec.partition(':') - modname, _, pyfile = remainder.partition('=') - frozenid = frozenid.strip() - modname = modname.strip() - pyfile = pyfile.strip() - - submodules = None - if modname.startswith('<') and modname.endswith('>'): - assert check_modname(frozenid), spec - modname = modname[1:-1] - assert check_modname(modname), spec - if frozenid in knownids: - pass - elif pyfile: - assert not os.path.isdir(pyfile), spec - else: - pyfile = _resolve_module(frozenid, ispkg=False) - ispkg = True - elif pyfile: - assert check_modname(frozenid), spec - assert not knownids or frozenid not in knownids, spec - assert check_modname(modname), spec - assert not os.path.isdir(pyfile), spec - ispkg = False - elif knownids and frozenid in knownids: - assert check_modname(frozenid), spec - assert check_modname(modname), spec - ispkg = False - else: - assert not modname or check_modname(modname), spec - resolved = iter(resolve_modules(frozenid)) - frozenid, pyfile, ispkg = next(resolved) - if not modname: - modname = frozenid - if ispkg: - pkgid = frozenid - pkgname = modname - pkgfiles = {pyfile: pkgid} - def iter_subs(): - for frozenid, pyfile, ispkg in resolved: - if pkgname: - modname = frozenid.replace(pkgid, pkgname, 1) - else: - modname = frozenid - if pyfile: - if pyfile in pkgfiles: - frozenid = pkgfiles[pyfile] - pyfile = None - elif ispkg: - pkgfiles[pyfile] = frozenid - yield frozenid, pyfile, modname, ispkg, section - submodules = iter_subs() - - info = (frozenid, pyfile or None, modname, ispkg, section) - return info, submodules - - -####################################### -# frozen source files - -class FrozenSource(namedtuple('FrozenSource', 'id pyfile frozenfile deepfreezefile')): - - @classmethod - def from_id(cls, frozenid, pyfile=None): - if not pyfile: - pyfile = os.path.join(STDLIB_DIR, *frozenid.split('.')) + '.py' - #assert os.path.exists(pyfile), (frozenid, pyfile) - frozenfile = resolve_frozen_file(frozenid, FROZEN_MODULES_DIR) - deepfreezefile = resolve_frozen_file(frozenid, DEEPFROZEN_MODULES_DIR) - return cls(frozenid, pyfile, frozenfile, deepfreezefile) - - @property - def frozenid(self): - return self.id - - @property - def modname(self): - if self.pyfile.startswith(STDLIB_DIR): - return self.id - return None - - @property - def symbol(self): - # This matches what we do in Programs/_freeze_module.c: - name = self.frozenid.replace('.', '_') - return '_Py_M__' + name - - @property - def ispkg(self): - if not self.pyfile: - return False - elif self.frozenid.endswith('.__init__'): - return False - else: - return os.path.basename(self.pyfile) == '__init__.py' - - @property - def isbootstrap(self): - return self.id in BOOTSTRAP - - -def resolve_frozen_file(frozenid, destdir): - """Return the filename corresponding to the given frozen ID. - - For stdlib modules the ID will always be the full name - of the source module. - """ - if not isinstance(frozenid, str): - try: - frozenid = frozenid.frozenid - except AttributeError: - raise ValueError(f'unsupported frozenid {frozenid!r}') - # We use a consistent naming convention for all frozen modules. - frozenfile = f'{frozenid}.h' - if not destdir: - return frozenfile - return os.path.join(destdir, frozenfile) - - -####################################### -# frozen modules - -class FrozenModule(namedtuple('FrozenModule', 'name ispkg section source')): - - def __getattr__(self, name): - return getattr(self.source, name) - - @property - def modname(self): - return self.name - - @property - def orig(self): - return self.source.modname - - @property - def isalias(self): - orig = self.source.modname - if not orig: - return True - return self.name != orig - - def summarize(self): - source = self.source.modname - if source: - source = f'<{source}>' - else: - source = relpath_for_posix_display(self.pyfile, ROOT_DIR) - return { - 'module': self.name, - 'ispkg': self.ispkg, - 'source': source, - 'frozen': os.path.basename(self.frozenfile), - 'checksum': _get_checksum(self.frozenfile), - } - - -def _iter_sources(modules): - seen = set() - for mod in modules: - if mod.source not in seen: - yield mod.source - seen.add(mod.source) - - -####################################### -# generic helpers - -def _get_checksum(filename): - with open(filename, "rb") as infile: - contents = infile.read() - m = hashlib.sha256() - m.update(contents) - return m.hexdigest() - - -def resolve_modules(modname, pyfile=None): - if modname.startswith('<') and modname.endswith('>'): - if pyfile: - assert os.path.isdir(pyfile) or os.path.basename(pyfile) == '__init__.py', pyfile - ispkg = True - modname = modname[1:-1] - rawname = modname - # For now, we only expect match patterns at the end of the name. - _modname, sep, match = modname.rpartition('.') - if sep: - if _modname.endswith('.**'): - modname = _modname[:-3] - match = f'**.{match}' - elif match and not match.isidentifier(): - modname = _modname - # Otherwise it's a plain name so we leave it alone. - else: - match = None - else: - ispkg = False - rawname = modname - match = None - - if not check_modname(modname): - raise ValueError(f'not a valid module name ({rawname})') - - if not pyfile: - pyfile = _resolve_module(modname, ispkg=ispkg) - elif os.path.isdir(pyfile): - pyfile = _resolve_module(modname, pyfile, ispkg) - yield modname, pyfile, ispkg - - if match: - pkgdir = os.path.dirname(pyfile) - yield from iter_submodules(modname, pkgdir, match) - - -def check_modname(modname): - return all(n.isidentifier() for n in modname.split('.')) - - -def iter_submodules(pkgname, pkgdir=None, match='*'): - if not pkgdir: - pkgdir = os.path.join(STDLIB_DIR, *pkgname.split('.')) - if not match: - match = '**.*' - match_modname = _resolve_modname_matcher(match, pkgdir) - - def _iter_submodules(pkgname, pkgdir): - for entry in sorted(os.scandir(pkgdir), key=lambda e: e.name): - matched, recursive = match_modname(entry.name) - if not matched: - continue - modname = f'{pkgname}.{entry.name}' - if modname.endswith('.py'): - yield modname[:-3], entry.path, False - elif entry.is_dir(): - pyfile = os.path.join(entry.path, '__init__.py') - # We ignore namespace packages. - if os.path.exists(pyfile): - yield modname, pyfile, True - if recursive: - yield from _iter_submodules(modname, entry.path) - - return _iter_submodules(pkgname, pkgdir) - - -def _resolve_modname_matcher(match, rootdir=None): - if isinstance(match, str): - if match.startswith('**.'): - recursive = True - pat = match[3:] - assert match - else: - recursive = False - pat = match - - if pat == '*': - def match_modname(modname): - return True, recursive - else: - raise NotImplementedError(match) - elif callable(match): - match_modname = match(rootdir) - else: - raise ValueError(f'unsupported matcher {match!r}') - return match_modname - - -def _resolve_module(modname, pathentry=STDLIB_DIR, ispkg=False): - assert pathentry, pathentry - pathentry = os.path.normpath(pathentry) - assert os.path.isabs(pathentry) - if ispkg: - return os.path.join(pathentry, *modname.split('.'), '__init__.py') - return os.path.join(pathentry, *modname.split('.')) + '.py' - - -####################################### -# regenerating dependent files - -def find_marker(lines, marker, file): - for pos, line in enumerate(lines): - if marker in line: - return pos - raise Exception(f"Can't find {marker!r} in file {file}") - - -def replace_block(lines, start_marker, end_marker, replacements, file): - start_pos = find_marker(lines, start_marker, file) - end_pos = find_marker(lines, end_marker, file) - if end_pos <= start_pos: - raise Exception(f"End marker {end_marker!r} " - f"occurs before start marker {start_marker!r} " - f"in file {file}") - replacements = [line.rstrip() + '\n' for line in replacements] - return lines[:start_pos + 1] + replacements + lines[end_pos:] - - -def regen_frozen(modules, frozen_modules: bool): - headerlines = [] - parentdir = os.path.dirname(FROZEN_FILE) - if frozen_modules: - for src in _iter_sources(modules): - # Adding a comment to separate sections here doesn't add much, - # so we don't. - header = relpath_for_posix_display(src.frozenfile, parentdir) - headerlines.append(f'#include "{header}"') - - externlines = [] - bootstraplines = [] - stdliblines = [] - testlines = [] - aliaslines = [] - indent = ' ' - lastsection = None - for mod in modules: - if mod.isbootstrap: - lines = bootstraplines - elif mod.section == TESTS_SECTION: - lines = testlines - else: - lines = stdliblines - if mod.section != lastsection: - if lastsection is not None: - lines.append('') - lines.append(f'/* {mod.section} */') - lastsection = mod.section - - # Also add a extern declaration for the corresponding - # deepfreeze-generated function. - orig_name = mod.source.id - code_name = orig_name.replace(".", "_") - get_code_name = "_Py_get_%s_toplevel" % code_name - externlines.append("extern PyObject *%s(void);" % get_code_name) - - symbol = mod.symbol - pkg = 'true' if mod.ispkg else 'false' - if not frozen_modules: - line = ('{"%s", NULL, 0, %s, GET_CODE(%s)},' - ) % (mod.name, pkg, code_name) - else: - line = ('{"%s", %s, (int)sizeof(%s), %s, GET_CODE(%s)},' - ) % (mod.name, symbol, symbol, pkg, code_name) - lines.append(line) - - if mod.isalias: - if not mod.orig: - entry = '{"%s", NULL},' % (mod.name,) - elif mod.source.ispkg: - entry = '{"%s", "<%s"},' % (mod.name, mod.orig) - else: - entry = '{"%s", "%s"},' % (mod.name, mod.orig) - aliaslines.append(indent + entry) - - for lines in (bootstraplines, stdliblines, testlines): - # TODO: Is this necessary any more? - if not lines[0]: - del lines[0] - for i, line in enumerate(lines): - if line: - lines[i] = indent + line - - print(f'# Updating {os.path.relpath(FROZEN_FILE)}') - with updating_file_with_tmpfile(FROZEN_FILE) as (infile, outfile): - lines = infile.readlines() - # TODO: Use more obvious markers, e.g. - # $START GENERATED FOOBAR$ / $END GENERATED FOOBAR$ - lines = replace_block( - lines, - "/* Includes for frozen modules: */", - "/* End includes */", - headerlines, - FROZEN_FILE, - ) - lines = replace_block( - lines, - "/* Start extern declarations */", - "/* End extern declarations */", - externlines, - FROZEN_FILE, - ) - lines = replace_block( - lines, - "static const struct _frozen bootstrap_modules[] =", - "/* bootstrap sentinel */", - bootstraplines, - FROZEN_FILE, - ) - lines = replace_block( - lines, - "static const struct _frozen stdlib_modules[] =", - "/* stdlib sentinel */", - stdliblines, - FROZEN_FILE, - ) - lines = replace_block( - lines, - "static const struct _frozen test_modules[] =", - "/* test sentinel */", - testlines, - FROZEN_FILE, - ) - lines = replace_block( - lines, - "const struct _module_alias aliases[] =", - "/* aliases sentinel */", - aliaslines, - FROZEN_FILE, - ) - outfile.writelines(lines) - - -def regen_makefile(modules): - pyfiles = [] - frozenfiles = [] - rules = [''] - deepfreezerules = ["Python/deepfreeze/deepfreeze.c: $(DEEPFREEZE_DEPS)", - "\t$(PYTHON_FOR_FREEZE) $(srcdir)/Tools/scripts/deepfreeze.py \\"] - for src in _iter_sources(modules): - frozen_header = relpath_for_posix_display(src.frozenfile, ROOT_DIR) - frozenfiles.append(f'\t\t{frozen_header} \\') - - pyfile = relpath_for_posix_display(src.pyfile, ROOT_DIR) - pyfiles.append(f'\t\t{pyfile} \\') - - if src.isbootstrap: - freezecmd = '$(FREEZE_MODULE_BOOTSTRAP)' - freezedep = '$(FREEZE_MODULE_BOOTSTRAP_DEPS)' - else: - freezecmd = '$(FREEZE_MODULE)' - freezedep = '$(FREEZE_MODULE_DEPS)' - - freeze = (f'{freezecmd} {src.frozenid} ' - f'$(srcdir)/{pyfile} {frozen_header}') - rules.extend([ - f'{frozen_header}: {pyfile} {freezedep}', - f'\t{freeze}', - '', - ]) - deepfreezerules.append(f"\t{frozen_header}:{src.frozenid} \\") - deepfreezerules.append('\t-o Python/deepfreeze/deepfreeze.c') - pyfiles[-1] = pyfiles[-1].rstrip(" \\") - frozenfiles[-1] = frozenfiles[-1].rstrip(" \\") - - print(f'# Updating {os.path.relpath(MAKEFILE)}') - with updating_file_with_tmpfile(MAKEFILE) as (infile, outfile): - lines = infile.readlines() - lines = replace_block( - lines, - "FROZEN_FILES_IN =", - "# End FROZEN_FILES_IN", - pyfiles, - MAKEFILE, - ) - lines = replace_block( - lines, - "FROZEN_FILES_OUT =", - "# End FROZEN_FILES_OUT", - frozenfiles, - MAKEFILE, - ) - lines = replace_block( - lines, - "# BEGIN: freezing modules", - "# END: freezing modules", - rules, - MAKEFILE, - ) - lines = replace_block( - lines, - "# BEGIN: deepfreeze modules", - "# END: deepfreeze modules", - deepfreezerules, - MAKEFILE, - ) - outfile.writelines(lines) - - -def regen_pcbuild(modules): - projlines = [] - filterlines = [] - corelines = [] - deepfreezerules = ['\t<Exec Command=\'$(PythonForBuild) "$(PySourcePath)Tools\\scripts\\deepfreeze.py" ^'] - for src in _iter_sources(modules): - pyfile = relpath_for_windows_display(src.pyfile, ROOT_DIR) - header = relpath_for_windows_display(src.frozenfile, ROOT_DIR) - intfile = ntpath.splitext(ntpath.basename(header))[0] + '.g.h' - projlines.append(f' <None Include="..\\{pyfile}">') - projlines.append(f' <ModName>{src.frozenid}</ModName>') - projlines.append(f' <IntFile>$(IntDir){intfile}</IntFile>') - projlines.append(f' <OutFile>$(PySourcePath){header}</OutFile>') - projlines.append(f' </None>') - - filterlines.append(f' <None Include="..\\{pyfile}">') - filterlines.append(' <Filter>Python Files</Filter>') - filterlines.append(' </None>') - deepfreezerules.append(f'\t\t "$(PySourcePath){header}:{src.frozenid}" ^') - deepfreezerules.append('\t\t "-o" "$(PySourcePath)Python\\deepfreeze\\deepfreeze.c"\'/>' ) - - corelines.append(f' <ClCompile Include="..\\Python\\deepfreeze\\deepfreeze.c" />') - - print(f'# Updating {os.path.relpath(PCBUILD_PROJECT)}') - with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile): - lines = infile.readlines() - lines = replace_block( - lines, - '<!-- BEGIN frozen modules -->', - '<!-- END frozen modules -->', - projlines, - PCBUILD_PROJECT, - ) - outfile.writelines(lines) - with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile): - lines = infile.readlines() - lines = replace_block( - lines, - '<!-- BEGIN deepfreeze rule -->', - '<!-- END deepfreeze rule -->', - deepfreezerules, - PCBUILD_PROJECT, - ) - outfile.writelines(lines) - print(f'# Updating {os.path.relpath(PCBUILD_FILTERS)}') - with updating_file_with_tmpfile(PCBUILD_FILTERS) as (infile, outfile): - lines = infile.readlines() - lines = replace_block( - lines, - '<!-- BEGIN frozen modules -->', - '<!-- END frozen modules -->', - filterlines, - PCBUILD_FILTERS, - ) - outfile.writelines(lines) - print(f'# Updating {os.path.relpath(PCBUILD_PYTHONCORE)}') - with updating_file_with_tmpfile(PCBUILD_PYTHONCORE) as (infile, outfile): - lines = infile.readlines() - lines = replace_block( - lines, - '<!-- BEGIN deepfreeze -->', - '<!-- END deepfreeze -->', - corelines, - PCBUILD_FILTERS, - ) - outfile.writelines(lines) - - -####################################### -# the script - -parser = argparse.ArgumentParser() -parser.add_argument("--frozen-modules", action="store_true", - help="Use both frozen and deepfrozen modules. (default: uses only deepfrozen modules)") - -def main(): - args = parser.parse_args() - frozen_modules: bool = args.frozen_modules - # Expand the raw specs, preserving order. - modules = list(parse_frozen_specs()) - - # Regen build-related files. - regen_makefile(modules) - regen_pcbuild(modules) - regen_frozen(modules, frozen_modules) - - -if __name__ == '__main__': - main() diff --git a/Tools/scripts/generate_global_objects.py b/Tools/scripts/generate_global_objects.py deleted file mode 100644 index 0432bf5..0000000 --- a/Tools/scripts/generate_global_objects.py +++ /dev/null @@ -1,381 +0,0 @@ -import contextlib -import io -import os.path -import re - -__file__ = os.path.abspath(__file__) -ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) -INTERNAL = os.path.join(ROOT, 'Include', 'internal') - - -IGNORED = { - 'ACTION', # Python/_warnings.c - 'ATTR', # Python/_warnings.c and Objects/funcobject.c - 'DUNDER', # Objects/typeobject.c - 'RDUNDER', # Objects/typeobject.c - 'SPECIAL', # Objects/weakrefobject.c - 'NAME', # Objects/typeobject.c -} -IDENTIFIERS = [ - # from ADD() Python/_warnings.c - 'default', - 'ignore', - - # from GET_WARNINGS_ATTR() in Python/_warnings.c - 'WarningMessage', - '_showwarnmsg', - '_warn_unawaited_coroutine', - 'defaultaction', - 'filters', - 'onceregistry', - - # from WRAP_METHOD() in Objects/weakrefobject.c - '__bytes__', - '__reversed__', - - # from COPY_ATTR() in Objects/funcobject.c - '__module__', - '__name__', - '__qualname__', - '__doc__', - '__annotations__', - - # from SLOT* in Objects/typeobject.c - '__abs__', - '__add__', - '__aiter__', - '__and__', - '__anext__', - '__await__', - '__bool__', - '__call__', - '__contains__', - '__del__', - '__delattr__', - '__delete__', - '__delitem__', - '__eq__', - '__float__', - '__floordiv__', - '__ge__', - '__get__', - '__getattr__', - '__getattribute__', - '__getitem__', - '__gt__', - '__hash__', - '__iadd__', - '__iand__', - '__ifloordiv__', - '__ilshift__', - '__imatmul__', - '__imod__', - '__imul__', - '__index__', - '__init__', - '__int__', - '__invert__', - '__ior__', - '__ipow__', - '__irshift__', - '__isub__', - '__iter__', - '__itruediv__', - '__ixor__', - '__le__', - '__len__', - '__lshift__', - '__lt__', - '__matmul__', - '__mod__', - '__mul__', - '__ne__', - '__neg__', - '__new__', - '__next__', - '__or__', - '__pos__', - '__pow__', - '__radd__', - '__rand__', - '__repr__', - '__rfloordiv__', - '__rlshift__', - '__rmatmul__', - '__rmod__', - '__rmul__', - '__ror__', - '__rpow__', - '__rrshift__', - '__rshift__', - '__rsub__', - '__rtruediv__', - '__rxor__', - '__set__', - '__setattr__', - '__setitem__', - '__str__', - '__sub__', - '__truediv__', - '__xor__', - '__divmod__', - '__rdivmod__', -] - - -####################################### -# helpers - -def iter_files(): - for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'): - root = os.path.join(ROOT, name) - for dirname, _, files in os.walk(root): - for name in files: - if not name.endswith(('.c', '.h')): - continue - yield os.path.join(dirname, name) - - -def iter_global_strings(): - id_regex = re.compile(r'\b_Py_ID\((\w+)\)') - str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)') - for filename in iter_files(): - try: - infile = open(filename, encoding='utf-8') - except FileNotFoundError: - # The file must have been a temporary file. - continue - with infile: - for lno, line in enumerate(infile, 1): - for m in id_regex.finditer(line): - identifier, = m.groups() - yield identifier, None, filename, lno, line - for m in str_regex.finditer(line): - varname, string = m.groups() - yield varname, string, filename, lno, line - - -def iter_to_marker(lines, marker): - for line in lines: - if line.rstrip() == marker: - break - yield line - - -class Printer: - - def __init__(self, file): - self.level = 0 - self.file = file - self.continuation = [False] - - @contextlib.contextmanager - def indent(self): - save_level = self.level - try: - self.level += 1 - yield - finally: - self.level = save_level - - def write(self, arg): - eol = '\n' - if self.continuation[-1]: - eol = f' \\{eol}' if arg else f'\\{eol}' - self.file.writelines((" "*self.level, arg, eol)) - - @contextlib.contextmanager - def block(self, prefix, suffix="", *, continuation=None): - if continuation is None: - continuation = self.continuation[-1] - self.continuation.append(continuation) - - self.write(prefix + " {") - with self.indent(): - yield - self.continuation.pop() - self.write("}" + suffix) - - -@contextlib.contextmanager -def open_for_changes(filename, orig): - """Like open() but only write to the file if it changed.""" - outfile = io.StringIO() - yield outfile - text = outfile.getvalue() - if text != orig: - with open(filename, 'w', encoding='utf-8') as outfile: - outfile.write(text) - else: - print(f'# not changed: {filename}') - - -####################################### -# the global objects - -START = '/* The following is auto-generated by Tools/scripts/generate_global_objects.py. */' -END = '/* End auto-generated code */' - - -def generate_global_strings(identifiers, strings): - filename = os.path.join(INTERNAL, 'pycore_global_strings.h') - - # Read the non-generated part of the file. - with open(filename) as infile: - orig = infile.read() - lines = iter(orig.rstrip().splitlines()) - before = '\n'.join(iter_to_marker(lines, START)) - for _ in iter_to_marker(lines, END): - pass - after = '\n'.join(lines) - - # Generate the file. - with open_for_changes(filename, orig) as outfile: - printer = Printer(outfile) - printer.write(before) - printer.write(START) - with printer.block('struct _Py_global_strings', ';'): - with printer.block('struct', ' literals;'): - for literal, name in sorted(strings.items(), key=lambda x: x[1]): - printer.write(f'STRUCT_FOR_STR({name}, "{literal}")') - outfile.write('\n') - with printer.block('struct', ' identifiers;'): - for name in sorted(identifiers): - assert name.isidentifier(), name - printer.write(f'STRUCT_FOR_ID({name})') - with printer.block('struct', ' ascii[128];'): - printer.write("PyASCIIObject _ascii;") - printer.write("uint8_t _data[2];") - with printer.block('struct', ' latin1[128];'): - printer.write("PyCompactUnicodeObject _latin1;") - printer.write("uint8_t _data[2];") - printer.write(END) - printer.write(after) - - -def generate_runtime_init(identifiers, strings): - # First get some info from the declarations. - nsmallposints = None - nsmallnegints = None - with open(os.path.join(INTERNAL, 'pycore_global_objects.h')) as infile: - for line in infile: - if line.startswith('#define _PY_NSMALLPOSINTS'): - nsmallposints = int(line.split()[-1]) - elif line.startswith('#define _PY_NSMALLNEGINTS'): - nsmallnegints = int(line.split()[-1]) - break - else: - raise NotImplementedError - assert nsmallposints and nsmallnegints - - # Then target the runtime initializer. - filename = os.path.join(INTERNAL, 'pycore_runtime_init_generated.h') - - # Read the non-generated part of the file. - with open(filename) as infile: - orig = infile.read() - lines = iter(orig.rstrip().splitlines()) - before = '\n'.join(iter_to_marker(lines, START)) - for _ in iter_to_marker(lines, END): - pass - after = '\n'.join(lines) - - # Generate the file. - with open_for_changes(filename, orig) as outfile: - immortal_objects = [] - printer = Printer(outfile) - printer.write(before) - printer.write(START) - with printer.block('#define _Py_global_objects_INIT', continuation=True): - with printer.block('.singletons =', ','): - # Global int objects. - with printer.block('.small_ints =', ','): - for i in range(-nsmallnegints, nsmallposints): - printer.write(f'_PyLong_DIGIT_INIT({i}),') - immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + {i}]') - printer.write('') - # Global bytes objects. - printer.write('.bytes_empty = _PyBytes_SIMPLE_INIT(0, 0),') - immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_empty)') - with printer.block('.bytes_characters =', ','): - for i in range(256): - printer.write(f'_PyBytes_CHAR_INIT({i}),') - immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_characters)[{i}]') - printer.write('') - # Global strings. - with printer.block('.strings =', ','): - with printer.block('.literals =', ','): - for literal, name in sorted(strings.items(), key=lambda x: x[1]): - printer.write(f'INIT_STR({name}, "{literal}"),') - immortal_objects.append(f'(PyObject *)&_Py_STR({name})') - with printer.block('.identifiers =', ','): - for name in sorted(identifiers): - assert name.isidentifier(), name - printer.write(f'INIT_ID({name}),') - immortal_objects.append(f'(PyObject *)&_Py_ID({name})') - with printer.block('.ascii =', ','): - for i in range(128): - printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),') - immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).ascii[{i}]') - with printer.block('.latin1 =', ','): - for i in range(128, 256): - utf8 = ['"'] - for c in chr(i).encode('utf-8'): - utf8.append(f"\\x{c:02x}") - utf8.append('"') - printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}", {"".join(utf8)}),') - immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).latin1[{i} - 128]') - printer.write('') - with printer.block('.tuple_empty =', ','): - printer.write('.ob_base = _PyVarObject_IMMORTAL_INIT(&PyTuple_Type, 0)') - immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(tuple_empty)') - printer.write('') - printer.write("static inline void") - with printer.block("_PyUnicode_InitStaticStrings(void)"): - printer.write(f'PyObject *string;') - for i in sorted(identifiers): - # This use of _Py_ID() is ignored by iter_global_strings() - # since iter_files() ignores .h files. - printer.write(f'string = &_Py_ID({i});') - printer.write(f'PyUnicode_InternInPlace(&string);') - printer.write('') - printer.write('#ifdef Py_DEBUG') - printer.write("static inline void") - with printer.block("_PyStaticObjects_CheckRefcnt(void)"): - for i in immortal_objects: - with printer.block(f'if (Py_REFCNT({i}) < _PyObject_IMMORTAL_REFCNT)', ';'): - printer.write(f'_PyObject_Dump({i});') - printer.write(f'Py_FatalError("immortal object has less refcnt than ' - 'expected _PyObject_IMMORTAL_REFCNT");') - printer.write('#endif') - printer.write(END) - printer.write(after) - - -def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]': - identifiers = set(IDENTIFIERS) - strings = {} - for name, string, *_ in iter_global_strings(): - if string is None: - if name not in IGNORED: - identifiers.add(name) - else: - if string not in strings: - strings[string] = name - elif name != strings[string]: - raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}') - return identifiers, strings - - -####################################### -# the script - -def main() -> None: - identifiers, strings = get_identifiers_and_strings() - - generate_global_strings(identifiers, strings) - generate_runtime_init(identifiers, strings) - - -if __name__ == '__main__': - main() diff --git a/Tools/scripts/generate_levenshtein_examples.py b/Tools/scripts/generate_levenshtein_examples.py deleted file mode 100644 index 5a8360f..0000000 --- a/Tools/scripts/generate_levenshtein_examples.py +++ /dev/null @@ -1,70 +0,0 @@ -"""Generate 10,000 unique examples for the Levenshtein short-circuit tests.""" - -import argparse -from functools import cache -import json -import os.path -from random import choices, randrange - - -# This should be in sync with Lib/traceback.py. It's not importing those values -# because this script is being executed by PYTHON_FOR_REGEN and not by the in-tree -# build of Python. -_MOVE_COST = 2 -_CASE_COST = 1 - - -def _substitution_cost(ch_a, ch_b): - if ch_a == ch_b: - return 0 - if ch_a.lower() == ch_b.lower(): - return _CASE_COST - return _MOVE_COST - - -@cache -def levenshtein(a, b): - if not a or not b: - return (len(a) + len(b)) * _MOVE_COST - option1 = levenshtein(a[:-1], b[:-1]) + _substitution_cost(a[-1], b[-1]) - option2 = levenshtein(a[:-1], b) + _MOVE_COST - option3 = levenshtein(a, b[:-1]) + _MOVE_COST - return min(option1, option2, option3) - - -def main(): - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument('output_path', metavar='FILE', type=str) - parser.add_argument('--overwrite', dest='overwrite', action='store_const', - const=True, default=False, - help='overwrite an existing test file') - - args = parser.parse_args() - output_path = os.path.realpath(args.output_path) - if not args.overwrite and os.path.isfile(output_path): - print(f"{output_path} already exists, skipping regeneration.") - print( - "To force, add --overwrite to the invocation of this tool or" - " delete the existing file." - ) - return - - examples = set() - # Create a lot of non-empty examples, which should end up with a Gauss-like - # distribution for even costs (moves) and odd costs (case substitutions). - while len(examples) < 9990: - a = ''.join(choices("abcABC", k=randrange(1, 10))) - b = ''.join(choices("abcABC", k=randrange(1, 10))) - expected = levenshtein(a, b) - examples.add((a, b, expected)) - # Create one empty case each for strings between 0 and 9 in length. - for i in range(10): - b = ''.join(choices("abcABC", k=i)) - expected = levenshtein("", b) - examples.add(("", b, expected)) - with open(output_path, "w") as f: - json.dump(sorted(examples), f, indent=2) - - -if __name__ == "__main__": - main() diff --git a/Tools/scripts/generate_opcode_h.py b/Tools/scripts/generate_opcode_h.py deleted file mode 100644 index 9ff264a..0000000 --- a/Tools/scripts/generate_opcode_h.py +++ /dev/null @@ -1,199 +0,0 @@ -# This script generates the opcode.h header file. - -import sys -import tokenize - -SCRIPT_NAME = "Tools/scripts/generate_opcode_h.py" -PYTHON_OPCODE = "Lib/opcode.py" - -header = f""" -// Auto-generated by {SCRIPT_NAME} from {PYTHON_OPCODE} - -#ifndef Py_OPCODE_H -#define Py_OPCODE_H -#ifdef __cplusplus -extern "C" {{ -#endif - - -/* Instruction opcodes for compiled code */ -""".lstrip() - -footer = """ - -#define IS_PSEUDO_OPCODE(op) (((op) >= MIN_PSEUDO_OPCODE) && ((op) <= MAX_PSEUDO_OPCODE)) - -#ifdef __cplusplus -} -#endif -#endif /* !Py_OPCODE_H */ -""" - -internal_header = f""" -// Auto-generated by {SCRIPT_NAME} from {PYTHON_OPCODE} - -#ifndef Py_INTERNAL_OPCODE_H -#define Py_INTERNAL_OPCODE_H -#ifdef __cplusplus -extern "C" {{ -#endif - -#ifndef Py_BUILD_CORE -# error "this header requires Py_BUILD_CORE define" -#endif - -#include "opcode.h" -""".lstrip() - -internal_footer = """ -#ifdef __cplusplus -} -#endif -#endif // !Py_INTERNAL_OPCODE_H -""" - -DEFINE = "#define {:<38} {:>3}\n" - -UINT32_MASK = (1<<32)-1 - -def write_int_array_from_ops(name, ops, out): - bits = 0 - for op in ops: - bits |= 1<<op - out.write(f"static const uint32_t {name}[9] = {{\n") - for i in range(9): - out.write(f" {bits & UINT32_MASK}U,\n") - bits >>= 32 - assert bits == 0 - out.write(f"}};\n") - -def main(opcode_py, outfile='Include/opcode.h', internaloutfile='Include/internal/pycore_opcode.h'): - opcode = {} - if hasattr(tokenize, 'open'): - fp = tokenize.open(opcode_py) # Python 3.2+ - else: - fp = open(opcode_py) # Python 2.7 - with fp: - code = fp.read() - exec(code, opcode) - opmap = opcode['opmap'] - opname = opcode['opname'] - hasarg = opcode['hasarg'] - hasconst = opcode['hasconst'] - hasjrel = opcode['hasjrel'] - hasjabs = opcode['hasjabs'] - is_pseudo = opcode['is_pseudo'] - _pseudo_ops = opcode['_pseudo_ops'] - - HAVE_ARGUMENT = opcode["HAVE_ARGUMENT"] - MIN_PSEUDO_OPCODE = opcode["MIN_PSEUDO_OPCODE"] - MAX_PSEUDO_OPCODE = opcode["MAX_PSEUDO_OPCODE"] - - NUM_OPCODES = len(opname) - used = [ False ] * len(opname) - next_op = 1 - - for name, op in opmap.items(): - used[op] = True - - specialized_opmap = {} - opname_including_specialized = opname.copy() - for name in opcode['_specialized_instructions']: - while used[next_op]: - next_op += 1 - specialized_opmap[name] = next_op - opname_including_specialized[next_op] = name - used[next_op] = True - specialized_opmap['DO_TRACING'] = 255 - opname_including_specialized[255] = 'DO_TRACING' - used[255] = True - - with (open(outfile, 'w') as fobj, open(internaloutfile, 'w') as iobj): - fobj.write(header) - iobj.write(internal_header) - - for name in opname: - if name in opmap: - op = opmap[name] - if op == HAVE_ARGUMENT: - fobj.write(DEFINE.format("HAVE_ARGUMENT", HAVE_ARGUMENT)) - if op == MIN_PSEUDO_OPCODE: - fobj.write(DEFINE.format("MIN_PSEUDO_OPCODE", MIN_PSEUDO_OPCODE)) - - fobj.write(DEFINE.format(name, op)) - - if op == MAX_PSEUDO_OPCODE: - fobj.write(DEFINE.format("MAX_PSEUDO_OPCODE", MAX_PSEUDO_OPCODE)) - - - for name, op in specialized_opmap.items(): - fobj.write(DEFINE.format(name, op)) - - iobj.write("\nextern const uint8_t _PyOpcode_Caches[256];\n") - iobj.write("\nextern const uint8_t _PyOpcode_Deopt[256];\n") - iobj.write("\n#ifdef NEED_OPCODE_TABLES\n") - write_int_array_from_ops("_PyOpcode_RelativeJump", opcode['hasjrel'], iobj) - write_int_array_from_ops("_PyOpcode_Jump", opcode['hasjrel'] + opcode['hasjabs'], iobj) - - iobj.write("\nconst uint8_t _PyOpcode_Caches[256] = {\n") - for i, entries in enumerate(opcode["_inline_cache_entries"]): - if entries: - iobj.write(f" [{opname[i]}] = {entries},\n") - iobj.write("};\n") - - deoptcodes = {} - for basic, op in opmap.items(): - if not is_pseudo(op): - deoptcodes[basic] = basic - for basic, family in opcode["_specializations"].items(): - for specialized in family: - deoptcodes[specialized] = basic - iobj.write("\nconst uint8_t _PyOpcode_Deopt[256] = {\n") - for opt, deopt in sorted(deoptcodes.items()): - iobj.write(f" [{opt}] = {deopt},\n") - iobj.write("};\n") - iobj.write("#endif // NEED_OPCODE_TABLES\n") - - fobj.write("\n") - fobj.write("#define HAS_ARG(op) ((((op) >= HAVE_ARGUMENT) && (!IS_PSEUDO_OPCODE(op)))\\") - for op in _pseudo_ops: - if opmap[op] in hasarg: - fobj.write(f"\n || ((op) == {op}) \\") - fobj.write("\n )\n") - - fobj.write("\n") - fobj.write("#define HAS_CONST(op) (false\\") - for op in hasconst: - fobj.write(f"\n || ((op) == {opname[op]}) \\") - fobj.write("\n )\n") - - fobj.write("\n") - for i, (op, _) in enumerate(opcode["_nb_ops"]): - fobj.write(DEFINE.format(op, i)) - - iobj.write("\n") - iobj.write("#ifdef Py_DEBUG\n") - iobj.write(f"static const char *const _PyOpcode_OpName[{NUM_OPCODES}] = {{\n") - for op, name in enumerate(opname_including_specialized): - if name[0] != "<": - op = name - iobj.write(f''' [{op}] = "{name}",\n''') - iobj.write("};\n") - iobj.write("#endif\n") - - iobj.write("\n") - iobj.write("#define EXTRA_CASES \\\n") - for i, flag in enumerate(used): - if not flag: - iobj.write(f" case {i}: \\\n") - iobj.write(" ;\n") - - fobj.write(footer) - iobj.write(internal_footer) - - - print(f"{outfile} regenerated from {opcode_py}") - - -if __name__ == '__main__': - main(sys.argv[1], sys.argv[2], sys.argv[3]) diff --git a/Tools/scripts/generate_re_casefix.py b/Tools/scripts/generate_re_casefix.py deleted file mode 100755 index 625b065..0000000 --- a/Tools/scripts/generate_re_casefix.py +++ /dev/null @@ -1,94 +0,0 @@ -#! /usr/bin/env python3 -# This script generates Lib/re/_casefix.py. - -import collections -import sys -import unicodedata - -def update_file(file, content): - try: - with open(file, 'r', encoding='utf-8') as fobj: - if fobj.read() == content: - return False - except (OSError, ValueError): - pass - with open(file, 'w', encoding='utf-8') as fobj: - fobj.write(content) - return True - -re_casefix_template = """\ -# Auto-generated by Tools/scripts/generate_re_casefix.py. - -# Maps the code of lowercased character to codes of different lowercased -# characters which have the same uppercase. -_EXTRA_CASES = { -%s -} -""" - -def uname(i): - return unicodedata.name(chr(i), r'U+%04X' % i) - -class hexint(int): - def __repr__(self): - return '%#06x' % self - -def alpha(i): - c = chr(i) - return c if c.isalpha() else ascii(c)[1:-1] - - -def main(outfile='Lib/re/_casefix.py'): - # Find sets of characters which have the same uppercase. - equivalent_chars = collections.defaultdict(str) - for c in map(chr, range(sys.maxunicode + 1)): - equivalent_chars[c.upper()] += c - equivalent_chars = [t for t in equivalent_chars.values() if len(t) > 1] - - # List of codes of lowercased characters which have the same uppercase. - equivalent_lower_codes = [sorted(t) - for s in equivalent_chars - for t in [set(ord(c.lower()) for c in s)] - if len(t) > 1] - - bad_codes = [] - for t in equivalent_lower_codes: - for i in t: - if i > 0xffff: - bad_codes.extend(t) - try: - bad_codes.append(ord(chr(i).upper())) - except (ValueError, TypeError): - pass - break - if bad_codes: - print('Case-insensitive matching may not work correctly for character:', - file=sys.stderr) - for i in sorted(bad_codes): - print(" '%s' (U+%04x, %s)" % (alpha(i), i, uname(i)), - file=sys.stderr) - sys.exit(1) - - mapping = {i: tuple(j for j in t if i != j) - for t in equivalent_lower_codes - for i in t} - - items = [] - for i, t in sorted(mapping.items()): - items.append(' # %s: %s' % ( - uname(i), - ', '.join(map(uname, t)), - )) - items.append(" %r: %r, # '%s': '%s'" % ( - hexint(i), - tuple(map(hexint, t)), - alpha(i), - ''.join(map(alpha, t)), - )) - - update_file(outfile, re_casefix_template % '\n'.join(items)) - - -if __name__ == '__main__': - import sys - main(*sys.argv[1:]) diff --git a/Tools/scripts/generate_sre_constants.py b/Tools/scripts/generate_sre_constants.py deleted file mode 100755 index 7271507..0000000 --- a/Tools/scripts/generate_sre_constants.py +++ /dev/null @@ -1,78 +0,0 @@ -#! /usr/bin/env python3 -# This script generates Modules/_sre/sre_constants.h from Lib/re/_constants.py. - - -def update_file(file, content): - try: - with open(file, 'r') as fobj: - if fobj.read() == content: - return False - except (OSError, ValueError): - pass - with open(file, 'w') as fobj: - fobj.write(content) - return True - -sre_constants_header = """\ -/* - * Secret Labs' Regular Expression Engine - * - * regular expression matching engine - * - * Auto-generated by Tools/scripts/generate_sre_constants.py from - * Lib/re/_constants.py. - * - * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. - * - * See the sre.c file for information on usage and redistribution. - */ - -""" - -def main( - infile="Lib/re/_constants.py", - outfile_constants="Modules/_sre/sre_constants.h", - outfile_targets="Modules/_sre/sre_targets.h", -): - ns = {} - with open(infile) as fp: - code = fp.read() - exec(code, ns) - - def dump(d, prefix): - items = sorted(d) - for item in items: - yield "#define %s_%s %d\n" % (prefix, item, item) - - def dump2(d, prefix): - items = [(value, name) for name, value in d.items() - if name.startswith(prefix)] - for value, name in sorted(items): - yield "#define %s %d\n" % (name, value) - - def dump_gotos(d, prefix): - for i, item in enumerate(sorted(d)): - assert i == item - yield f" &&{prefix}_{item},\n" - - content = [sre_constants_header] - content.append("#define SRE_MAGIC %d\n" % ns["MAGIC"]) - content.extend(dump(ns["OPCODES"], "SRE_OP")) - content.extend(dump(ns["ATCODES"], "SRE")) - content.extend(dump(ns["CHCODES"], "SRE")) - content.extend(dump2(ns, "SRE_FLAG_")) - content.extend(dump2(ns, "SRE_INFO_")) - - update_file(outfile_constants, ''.join(content)) - - content = [sre_constants_header] - content.append(f"static void *sre_targets[{len(ns['OPCODES'])}] = {{\n") - content.extend(dump_gotos(ns["OPCODES"], "TARGET_SRE_OP")) - content.append("};\n") - - update_file(outfile_targets, ''.join(content)) - - -if __name__ == '__main__': - import sys - main(*sys.argv[1:]) diff --git a/Tools/scripts/generate_stdlib_module_names.py b/Tools/scripts/generate_stdlib_module_names.py deleted file mode 100644 index 92100bd..0000000 --- a/Tools/scripts/generate_stdlib_module_names.py +++ /dev/null @@ -1,137 +0,0 @@ -# This script lists the names of standard library modules -# to update Python/stdlib_mod_names.h -import _imp -import os.path -import re -import subprocess -import sys -import sysconfig - -from check_extension_modules import ModuleChecker - - -SRC_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) -STDLIB_PATH = os.path.join(SRC_DIR, 'Lib') - -IGNORE = { - '__init__', - '__pycache__', - 'site-packages', - - # Test modules and packages - '__hello__', - '__phello__', - '__hello_alias__', - '__phello_alias__', - '__hello_only__', - '_ctypes_test', - '_testbuffer', - '_testcapi', - '_testconsole', - '_testimportmultiple', - '_testinternalcapi', - '_testmultiphase', - '_xxsubinterpreters', - '_xxtestfuzz', - 'distutils.tests', - 'idlelib.idle_test', - 'test', - 'xxlimited', - 'xxlimited_35', - 'xxsubtype', -} - -# Pure Python modules (Lib/*.py) -def list_python_modules(names): - for filename in os.listdir(STDLIB_PATH): - if not filename.endswith(".py"): - continue - name = filename.removesuffix(".py") - names.add(name) - - -# Packages in Lib/ -def list_packages(names): - for name in os.listdir(STDLIB_PATH): - if name in IGNORE: - continue - package_path = os.path.join(STDLIB_PATH, name) - if not os.path.isdir(package_path): - continue - if any(package_file.endswith(".py") - for package_file in os.listdir(package_path)): - names.add(name) - - -# Built-in and extension modules built by Modules/Setup* -# includes Windows and macOS extensions. -def list_modules_setup_extensions(names): - checker = ModuleChecker() - names.update(checker.list_module_names(all=True)) - - -# List frozen modules of the PyImport_FrozenModules list (Python/frozen.c). -# Use the "./Programs/_testembed list_frozen" command. -def list_frozen(names): - submodules = set() - for name in _imp._frozen_module_names(): - # To skip __hello__, __hello_alias__ and etc. - if name.startswith('__'): - continue - if '.' in name: - submodules.add(name) - else: - names.add(name) - # Make sure all frozen submodules have a known parent. - for name in list(submodules): - if name.partition('.')[0] in names: - submodules.remove(name) - if submodules: - raise Exception(f'unexpected frozen submodules: {sorted(submodules)}') - - -def list_modules(): - names = set(sys.builtin_module_names) - list_modules_setup_extensions(names) - list_packages(names) - list_python_modules(names) - list_frozen(names) - - # Remove ignored packages and modules - for name in list(names): - package_name = name.split('.')[0] - # package_name can be equal to name - if package_name in IGNORE: - names.discard(name) - - for name in names: - if "." in name: - raise Exception("sub-modules must not be listed") - - return names - - -def write_modules(fp, names): - print("// Auto-generated by Tools/scripts/generate_stdlib_module_names.py.", - file=fp) - print("// List used to create sys.stdlib_module_names.", file=fp) - print(file=fp) - print("static const char* _Py_stdlib_module_names[] = {", file=fp) - for name in sorted(names): - print(f'"{name}",', file=fp) - print("};", file=fp) - - -def main(): - if not sysconfig.is_python_build(): - print(f"ERROR: {sys.executable} is not a Python build", - file=sys.stderr) - sys.exit(1) - - fp = sys.stdout - names = list_modules() - write_modules(fp, names) - - -if __name__ == "__main__": - main() diff --git a/Tools/scripts/generate_token.py b/Tools/scripts/generate_token.py deleted file mode 100755 index d8be8b9..0000000 --- a/Tools/scripts/generate_token.py +++ /dev/null @@ -1,275 +0,0 @@ -#! /usr/bin/env python3 -# This script generates token related files from Grammar/Tokens: -# -# Doc/library/token-list.inc -# Include/token.h -# Parser/token.c -# Lib/token.py - - -NT_OFFSET = 256 - -def load_tokens(path): - tok_names = [] - string_to_tok = {} - ERRORTOKEN = None - with open(path) as fp: - for line in fp: - line = line.strip() - # strip comments - i = line.find('#') - if i >= 0: - line = line[:i].strip() - if not line: - continue - fields = line.split() - name = fields[0] - value = len(tok_names) - if name == 'ERRORTOKEN': - ERRORTOKEN = value - string = fields[1] if len(fields) > 1 else None - if string: - string = eval(string) - string_to_tok[string] = value - tok_names.append(name) - return tok_names, ERRORTOKEN, string_to_tok - - -def update_file(file, content): - try: - with open(file, 'r') as fobj: - if fobj.read() == content: - return False - except (OSError, ValueError): - pass - with open(file, 'w') as fobj: - fobj.write(content) - return True - - -token_h_template = """\ -/* Auto-generated by Tools/scripts/generate_token.py */ - -/* Token types */ -#ifndef Py_INTERNAL_TOKEN_H -#define Py_INTERNAL_TOKEN_H -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef Py_BUILD_CORE -# error "this header requires Py_BUILD_CORE define" -#endif - -#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ - -%s\ -#define N_TOKENS %d -#define NT_OFFSET %d - -/* Special definitions for cooperation with parser */ - -#define ISTERMINAL(x) ((x) < NT_OFFSET) -#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) -#define ISEOF(x) ((x) == ENDMARKER) -#define ISWHITESPACE(x) ((x) == ENDMARKER || \\ - (x) == NEWLINE || \\ - (x) == INDENT || \\ - (x) == DEDENT) - - -// Symbols exported for test_peg_generator -PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ -PyAPI_FUNC(int) _PyToken_OneChar(int); -PyAPI_FUNC(int) _PyToken_TwoChars(int, int); -PyAPI_FUNC(int) _PyToken_ThreeChars(int, int, int); - -#ifdef __cplusplus -} -#endif -#endif // !Py_INTERNAL_TOKEN_H -""" - -def make_h(infile, outfile='Include/internal/pycore_token.h'): - tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) - - defines = [] - for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): - defines.append("#define %-15s %d\n" % (name, value)) - - if update_file(outfile, token_h_template % ( - ''.join(defines), - len(tok_names), - NT_OFFSET - )): - print("%s regenerated from %s" % (outfile, infile)) - - -token_c_template = """\ -/* Auto-generated by Tools/scripts/generate_token.py */ - -#include "Python.h" -#include "pycore_token.h" - -/* Token names */ - -const char * const _PyParser_TokenNames[] = { -%s\ -}; - -/* Return the token corresponding to a single character */ - -int -_PyToken_OneChar(int c1) -{ -%s\ - return OP; -} - -int -_PyToken_TwoChars(int c1, int c2) -{ -%s\ - return OP; -} - -int -_PyToken_ThreeChars(int c1, int c2, int c3) -{ -%s\ - return OP; -} -""" - -def generate_chars_to_token(mapping, n=1): - result = [] - write = result.append - indent = ' ' * n - write(indent) - write('switch (c%d) {\n' % (n,)) - for c in sorted(mapping): - write(indent) - value = mapping[c] - if isinstance(value, dict): - write("case '%s':\n" % (c,)) - write(generate_chars_to_token(value, n + 1)) - write(indent) - write(' break;\n') - else: - write("case '%s': return %s;\n" % (c, value)) - write(indent) - write('}\n') - return ''.join(result) - -def make_c(infile, outfile='Parser/token.c'): - tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) - string_to_tok['<>'] = string_to_tok['!='] - chars_to_token = {} - for string, value in string_to_tok.items(): - assert 1 <= len(string) <= 3 - name = tok_names[value] - m = chars_to_token.setdefault(len(string), {}) - for c in string[:-1]: - m = m.setdefault(c, {}) - m[string[-1]] = name - - names = [] - for value, name in enumerate(tok_names): - if value >= ERRORTOKEN: - name = '<%s>' % name - names.append(' "%s",\n' % name) - names.append(' "<N_TOKENS>",\n') - - if update_file(outfile, token_c_template % ( - ''.join(names), - generate_chars_to_token(chars_to_token[1]), - generate_chars_to_token(chars_to_token[2]), - generate_chars_to_token(chars_to_token[3]) - )): - print("%s regenerated from %s" % (outfile, infile)) - - -token_inc_template = """\ -.. Auto-generated by Tools/scripts/generate_token.py -%s -.. data:: N_TOKENS - -.. data:: NT_OFFSET -""" - -def make_rst(infile, outfile='Doc/library/token-list.inc'): - tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) - tok_to_string = {value: s for s, value in string_to_tok.items()} - - names = [] - for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): - names.append('.. data:: %s' % (name,)) - if value in tok_to_string: - names.append('') - names.append(' Token value for ``"%s"``.' % tok_to_string[value]) - names.append('') - - if update_file(outfile, token_inc_template % '\n'.join(names)): - print("%s regenerated from %s" % (outfile, infile)) - - -token_py_template = '''\ -"""Token constants.""" -# Auto-generated by Tools/scripts/generate_token.py - -__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] - -%s -N_TOKENS = %d -# Special definitions for cooperation with parser -NT_OFFSET = %d - -tok_name = {value: name - for name, value in globals().items() - if isinstance(value, int) and not name.startswith('_')} -__all__.extend(tok_name.values()) - -EXACT_TOKEN_TYPES = { -%s -} - -def ISTERMINAL(x): - return x < NT_OFFSET - -def ISNONTERMINAL(x): - return x >= NT_OFFSET - -def ISEOF(x): - return x == ENDMARKER -''' - -def make_py(infile, outfile='Lib/token.py'): - tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) - - constants = [] - for value, name in enumerate(tok_names): - constants.append('%s = %d' % (name, value)) - constants.insert(ERRORTOKEN, - "# These aren't used by the C tokenizer but are needed for tokenize.py") - - token_types = [] - for s, value in sorted(string_to_tok.items()): - token_types.append(' %r: %s,' % (s, tok_names[value])) - - if update_file(outfile, token_py_template % ( - '\n'.join(constants), - len(tok_names), - NT_OFFSET, - '\n'.join(token_types), - )): - print("%s regenerated from %s" % (outfile, infile)) - - -def main(op, infile='Grammar/Tokens', *args): - make = globals()['make_' + op] - make(infile, *args) - - -if __name__ == '__main__': - import sys - main(*sys.argv[1:]) diff --git a/Tools/scripts/parse_html5_entities.py b/Tools/scripts/parse_html5_entities.py deleted file mode 100755 index 1e5bdad..0000000 --- a/Tools/scripts/parse_html5_entities.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python3 -""" -Utility for parsing HTML5 entity definitions available from: - - https://html.spec.whatwg.org/entities.json - https://html.spec.whatwg.org/multipage/named-characters.html - -The page now contains the following note: - - "This list is static and will not be expanded or changed in the future." - -Written by Ezio Melotti and Iuliia Proskurnia. -""" - -import os -import sys -import json -from urllib.request import urlopen -from html.entities import html5 - -PAGE_URL = 'https://html.spec.whatwg.org/multipage/named-characters.html' -ENTITIES_URL = 'https://html.spec.whatwg.org/entities.json' -HTML5_SECTION_START = '# HTML5 named character references' - -def get_json(url): - """Download the json file from the url and returns a decoded object.""" - with urlopen(url) as f: - data = f.read().decode('utf-8') - return json.loads(data) - -def create_dict(entities): - """Create the html5 dict from the decoded json object.""" - new_html5 = {} - for name, value in entities.items(): - new_html5[name.lstrip('&')] = value['characters'] - return new_html5 - -def compare_dicts(old, new): - """Compare the old and new dicts and print the differences.""" - added = new.keys() - old.keys() - if added: - print('{} entitie(s) have been added:'.format(len(added))) - for name in sorted(added): - print(' {!r}: {!r}'.format(name, new[name])) - removed = old.keys() - new.keys() - if removed: - print('{} entitie(s) have been removed:'.format(len(removed))) - for name in sorted(removed): - print(' {!r}: {!r}'.format(name, old[name])) - changed = set() - for name in (old.keys() & new.keys()): - if old[name] != new[name]: - changed.add((name, old[name], new[name])) - if changed: - print('{} entitie(s) have been modified:'.format(len(changed))) - for item in sorted(changed): - print(' {!r}: {!r} -> {!r}'.format(*item)) - -def write_items(entities, file=sys.stdout): - """Write the items of the dictionary in the specified file.""" - # The keys in the generated dictionary should be sorted - # in a case-insensitive way, however, when two keys are equal, - # the uppercase version should come first so that the result - # looks like: ['Aacute', 'aacute', 'Aacute;', 'aacute;', ...] - # To do this we first sort in a case-sensitive way (so all the - # uppercase chars come first) and then sort with key=str.lower. - # Since the sorting is stable the uppercase keys will eventually - # be before their equivalent lowercase version. - keys = sorted(entities.keys()) - keys = sorted(keys, key=str.lower) - print(HTML5_SECTION_START, file=file) - print(f'# Generated by {sys.argv[0]!r}\n' - f'# from {ENTITIES_URL} and\n' - f'# {PAGE_URL}.\n' - f'# Map HTML5 named character references to the ' - f'equivalent Unicode character(s).', file=file) - print('html5 = {', file=file) - for name in keys: - print(f' {name!r}: {entities[name]!a},', file=file) - print('}', file=file) - - -if __name__ == '__main__': - # without args print a diff between html.entities.html5 and new_html5 - # with --create print the new html5 dict - # with --patch patch the Lib/html/entities.py file - new_html5 = create_dict(get_json(ENTITIES_URL)) - if '--create' in sys.argv: - write_items(new_html5) - elif '--patch' in sys.argv: - fname = 'Lib/html/entities.py' - temp_fname = fname + '.temp' - with open(fname) as f1, open(temp_fname, 'w') as f2: - skip = False - for line in f1: - if line.startswith(HTML5_SECTION_START): - write_items(new_html5, file=f2) - skip = True - continue - if skip: - # skip the old items until the } - if line.startswith('}'): - skip = False - continue - f2.write(line) - os.remove(fname) - os.rename(temp_fname, fname) - else: - if html5 == new_html5: - print('The current dictionary is updated.') - else: - compare_dicts(html5, new_html5) - print('Run "./python {0} --patch" to update Lib/html/entities.html ' - 'or "./python {0} --create" to see the generated ' 'dictionary.'.format(__file__)) diff --git a/Tools/scripts/smelly.py b/Tools/scripts/smelly.py deleted file mode 100755 index 276a5ab..0000000 --- a/Tools/scripts/smelly.py +++ /dev/null @@ -1,173 +0,0 @@ -#!/usr/bin/env python -# Script checking that all symbols exported by libpython start with Py or _Py - -import os.path -import subprocess -import sys -import sysconfig - - -ALLOWED_PREFIXES = ('Py', '_Py') -if sys.platform == 'darwin': - ALLOWED_PREFIXES += ('__Py',) - -IGNORED_EXTENSION = "_ctypes_test" -# Ignore constructor and destructor functions -IGNORED_SYMBOLS = {'_init', '_fini'} - - -def is_local_symbol_type(symtype): - # Ignore local symbols. - - # If lowercase, the symbol is usually local; if uppercase, the symbol - # is global (external). There are however a few lowercase symbols that - # are shown for special global symbols ("u", "v" and "w"). - if symtype.islower() and symtype not in "uvw": - return True - - # Ignore the initialized data section (d and D) and the BSS data - # section. For example, ignore "__bss_start (type: B)" - # and "_edata (type: D)". - if symtype in "bBdD": - return True - - return False - - -def get_exported_symbols(library, dynamic=False): - print(f"Check that {library} only exports symbols starting with Py or _Py") - - # Only look at dynamic symbols - args = ['nm', '--no-sort'] - if dynamic: - args.append('--dynamic') - args.append(library) - print("+ %s" % ' '.join(args)) - proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True) - if proc.returncode: - sys.stdout.write(proc.stdout) - sys.exit(proc.returncode) - - stdout = proc.stdout.rstrip() - if not stdout: - raise Exception("command output is empty") - return stdout - - -def get_smelly_symbols(stdout): - smelly_symbols = [] - python_symbols = [] - local_symbols = [] - - for line in stdout.splitlines(): - # Split line '0000000000001b80 D PyTextIOWrapper_Type' - if not line: - continue - - parts = line.split(maxsplit=2) - if len(parts) < 3: - continue - - symtype = parts[1].strip() - symbol = parts[-1] - result = '%s (type: %s)' % (symbol, symtype) - - if symbol.startswith(ALLOWED_PREFIXES): - python_symbols.append(result) - continue - - if is_local_symbol_type(symtype): - local_symbols.append(result) - elif symbol in IGNORED_SYMBOLS: - local_symbols.append(result) - else: - smelly_symbols.append(result) - - if local_symbols: - print(f"Ignore {len(local_symbols)} local symbols") - return smelly_symbols, python_symbols - - -def check_library(library, dynamic=False): - nm_output = get_exported_symbols(library, dynamic) - smelly_symbols, python_symbols = get_smelly_symbols(nm_output) - - if not smelly_symbols: - print(f"OK: no smelly symbol found ({len(python_symbols)} Python symbols)") - return 0 - - print() - smelly_symbols.sort() - for symbol in smelly_symbols: - print("Smelly symbol: %s" % symbol) - - print() - print("ERROR: Found %s smelly symbols!" % len(smelly_symbols)) - return len(smelly_symbols) - - -def check_extensions(): - print(__file__) - # This assumes pybuilddir.txt is in same directory as pyconfig.h. - # In the case of out-of-tree builds, we can't assume pybuilddir.txt is - # in the source folder. - config_dir = os.path.dirname(sysconfig.get_config_h_filename()) - filename = os.path.join(config_dir, "pybuilddir.txt") - try: - with open(filename, encoding="utf-8") as fp: - pybuilddir = fp.readline() - except FileNotFoundError: - print(f"Cannot check extensions because {filename} does not exist") - return True - - print(f"Check extension modules from {pybuilddir} directory") - builddir = os.path.join(config_dir, pybuilddir) - nsymbol = 0 - for name in os.listdir(builddir): - if not name.endswith(".so"): - continue - if IGNORED_EXTENSION in name: - print() - print(f"Ignore extension: {name}") - continue - - print() - filename = os.path.join(builddir, name) - nsymbol += check_library(filename, dynamic=True) - - return nsymbol - - -def main(): - nsymbol = 0 - - # static library - LIBRARY = sysconfig.get_config_var('LIBRARY') - if not LIBRARY: - raise Exception("failed to get LIBRARY variable from sysconfig") - if os.path.exists(LIBRARY): - nsymbol += check_library(LIBRARY) - - # dynamic library - LDLIBRARY = sysconfig.get_config_var('LDLIBRARY') - if not LDLIBRARY: - raise Exception("failed to get LDLIBRARY variable from sysconfig") - if LDLIBRARY != LIBRARY: - print() - nsymbol += check_library(LDLIBRARY, dynamic=True) - - # Check extension modules like _ssl.cpython-310d-x86_64-linux-gnu.so - nsymbol += check_extensions() - - if nsymbol: - print() - print(f"ERROR: Found {nsymbol} smelly symbols in total!") - sys.exit(1) - - print() - print(f"OK: all exported symbols of all libraries " - f"are prefixed with {' or '.join(map(repr, ALLOWED_PREFIXES))}") - - -if __name__ == "__main__": - main() diff --git a/Tools/scripts/stable_abi.py b/Tools/scripts/stable_abi.py deleted file mode 100755 index d557e10..0000000 --- a/Tools/scripts/stable_abi.py +++ /dev/null @@ -1,754 +0,0 @@ -"""Check the stable ABI manifest or generate files from it - -By default, the tool only checks existing files/libraries. -Pass --generate to recreate auto-generated files instead. - -For actions that take a FILENAME, the filename can be left out to use a default -(relative to the manifest file, as they appear in the CPython codebase). -""" - -from functools import partial -from pathlib import Path -import dataclasses -import subprocess -import sysconfig -import argparse -import textwrap -import tomllib -import difflib -import pprint -import sys -import os -import os.path -import io -import re -import csv - -MISSING = object() - -EXCLUDED_HEADERS = { - "bytes_methods.h", - "cellobject.h", - "classobject.h", - "code.h", - "compile.h", - "datetime.h", - "dtoa.h", - "frameobject.h", - "genobject.h", - "longintrepr.h", - "parsetok.h", - "pyatomic.h", - "pytime.h", - "token.h", - "ucnhash.h", -} -MACOS = (sys.platform == "darwin") -UNIXY = MACOS or (sys.platform == "linux") # XXX should this be "not Windows"? - - -# The stable ABI manifest (Misc/stable_abi.toml) exists only to fill the -# following dataclasses. -# Feel free to change its syntax (and the `parse_manifest` function) -# to better serve that purpose (while keeping it human-readable). - -class Manifest: - """Collection of `ABIItem`s forming the stable ABI/limited API.""" - def __init__(self): - self.contents = dict() - - def add(self, item): - if item.name in self.contents: - # We assume that stable ABI items do not share names, - # even if they're different kinds (e.g. function vs. macro). - raise ValueError(f'duplicate ABI item {item.name}') - self.contents[item.name] = item - - def select(self, kinds, *, include_abi_only=True, ifdef=None): - """Yield selected items of the manifest - - kinds: set of requested kinds, e.g. {'function', 'macro'} - include_abi_only: if True (default), include all items of the - stable ABI. - If False, include only items from the limited API - (i.e. items people should use today) - ifdef: set of feature macros (e.g. {'HAVE_FORK', 'MS_WINDOWS'}). - If None (default), items are not filtered by this. (This is - different from the empty set, which filters out all such - conditional items.) - """ - for name, item in sorted(self.contents.items()): - if item.kind not in kinds: - continue - if item.abi_only and not include_abi_only: - continue - if (ifdef is not None - and item.ifdef is not None - and item.ifdef not in ifdef): - continue - yield item - - def dump(self): - """Yield lines to recreate the manifest file (sans comments/newlines)""" - for item in self.contents.values(): - fields = dataclasses.fields(item) - yield f"[{item.kind}.{item.name}]" - for field in fields: - if field.name in {'name', 'value', 'kind'}: - continue - value = getattr(item, field.name) - if value == field.default: - pass - elif value is True: - yield f" {field.name} = true" - elif value: - yield f" {field.name} = {value!r}" - - -itemclasses = {} -def itemclass(kind): - """Register the decorated class in `itemclasses`""" - def decorator(cls): - itemclasses[kind] = cls - return cls - return decorator - -@itemclass('function') -@itemclass('macro') -@itemclass('data') -@itemclass('const') -@itemclass('typedef') -@dataclasses.dataclass -class ABIItem: - """Information on one item (function, macro, struct, etc.)""" - - name: str - kind: str - added: str = None - abi_only: bool = False - ifdef: str = None - -@itemclass('feature_macro') -@dataclasses.dataclass(kw_only=True) -class FeatureMacro(ABIItem): - name: str - doc: str - windows: bool = False - abi_only: bool = True - -@itemclass('struct') -@dataclasses.dataclass(kw_only=True) -class Struct(ABIItem): - struct_abi_kind: str - members: list = None - - -def parse_manifest(file): - """Parse the given file (iterable of lines) to a Manifest""" - - manifest = Manifest() - - data = tomllib.load(file) - - for kind, itemclass in itemclasses.items(): - for name, item_data in data[kind].items(): - try: - item = itemclass(name=name, kind=kind, **item_data) - manifest.add(item) - except BaseException as exc: - exc.add_note(f'in {kind} {name}') - raise - - return manifest - -# The tool can run individual "actions". -# Most actions are "generators", which generate a single file from the -# manifest. (Checking works by generating a temp file & comparing.) -# Other actions, like "--unixy-check", don't work on a single file. - -generators = [] -def generator(var_name, default_path): - """Decorates a file generator: function that writes to a file""" - def _decorator(func): - func.var_name = var_name - func.arg_name = '--' + var_name.replace('_', '-') - func.default_path = default_path - generators.append(func) - return func - return _decorator - - -@generator("python3dll", 'PC/python3dll.c') -def gen_python3dll(manifest, args, outfile): - """Generate/check the source for the Windows stable ABI library""" - write = partial(print, file=outfile) - write(textwrap.dedent(r""" - /* Re-export stable Python ABI */ - - /* Generated by Tools/scripts/stable_abi.py */ - - #ifdef _M_IX86 - #define DECORATE "_" - #else - #define DECORATE - #endif - - #define EXPORT_FUNC(name) \ - __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name)) - #define EXPORT_DATA(name) \ - __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name ",DATA")) - """)) - - def sort_key(item): - return item.name.lower() - - windows_feature_macros = { - item.name for item in manifest.select({'feature_macro'}) if item.windows - } - for item in sorted( - manifest.select( - {'function'}, - include_abi_only=True, - ifdef=windows_feature_macros), - key=sort_key): - write(f'EXPORT_FUNC({item.name})') - - write() - - for item in sorted( - manifest.select( - {'data'}, - include_abi_only=True, - ifdef=windows_feature_macros), - key=sort_key): - write(f'EXPORT_DATA({item.name})') - -REST_ROLES = { - 'function': 'function', - 'data': 'var', - 'struct': 'type', - 'macro': 'macro', - # 'const': 'const', # all undocumented - 'typedef': 'type', -} - -@generator("doc_list", 'Doc/data/stable_abi.dat') -def gen_doc_annotations(manifest, args, outfile): - """Generate/check the stable ABI list for documentation annotations""" - writer = csv.DictWriter( - outfile, - ['role', 'name', 'added', 'ifdef_note', 'struct_abi_kind'], - lineterminator='\n') - writer.writeheader() - for item in manifest.select(REST_ROLES.keys(), include_abi_only=False): - if item.ifdef: - ifdef_note = manifest.contents[item.ifdef].doc - else: - ifdef_note = None - row = { - 'role': REST_ROLES[item.kind], - 'name': item.name, - 'added': item.added, - 'ifdef_note': ifdef_note} - rows = [row] - if item.kind == 'struct': - row['struct_abi_kind'] = item.struct_abi_kind - for member_name in item.members or (): - rows.append({ - 'role': 'member', - 'name': f'{item.name}.{member_name}', - 'added': item.added}) - writer.writerows(rows) - -@generator("ctypes_test", 'Lib/test/test_stable_abi_ctypes.py') -def gen_ctypes_test(manifest, args, outfile): - """Generate/check the ctypes-based test for exported symbols""" - write = partial(print, file=outfile) - write(textwrap.dedent(''' - # Generated by Tools/scripts/stable_abi.py - - """Test that all symbols of the Stable ABI are accessible using ctypes - """ - - import sys - import unittest - from test.support.import_helper import import_module - from _testcapi import get_feature_macros - - feature_macros = get_feature_macros() - ctypes_test = import_module('ctypes') - - class TestStableABIAvailability(unittest.TestCase): - def test_available_symbols(self): - - for symbol_name in SYMBOL_NAMES: - with self.subTest(symbol_name): - ctypes_test.pythonapi[symbol_name] - - def test_feature_macros(self): - self.assertEqual( - set(get_feature_macros()), EXPECTED_FEATURE_MACROS) - - # The feature macros for Windows are used in creating the DLL - # definition, so they must be known on all platforms. - # If we are on Windows, we check that the hardcoded data matches - # the reality. - @unittest.skipIf(sys.platform != "win32", "Windows specific test") - def test_windows_feature_macros(self): - for name, value in WINDOWS_FEATURE_MACROS.items(): - if value != 'maybe': - with self.subTest(name): - self.assertEqual(feature_macros[name], value) - - SYMBOL_NAMES = ( - ''')) - items = manifest.select( - {'function', 'data'}, - include_abi_only=True, - ) - optional_items = {} - for item in items: - if item.name in ( - # Some symbols aren't exported on all platforms. - # This is a bug: https://bugs.python.org/issue44133 - 'PyModule_Create2', 'PyModule_FromDefAndSpec2', - ): - continue - if item.ifdef: - optional_items.setdefault(item.ifdef, []).append(item.name) - else: - write(f' "{item.name}",') - write(")") - for ifdef, names in optional_items.items(): - write(f"if feature_macros[{ifdef!r}]:") - write(f" SYMBOL_NAMES += (") - for name in names: - write(f" {name!r},") - write(" )") - write("") - feature_macros = list(manifest.select({'feature_macro'})) - feature_names = sorted(m.name for m in feature_macros) - write(f"EXPECTED_FEATURE_MACROS = set({pprint.pformat(feature_names)})") - - windows_feature_macros = {m.name: m.windows for m in feature_macros} - write(f"WINDOWS_FEATURE_MACROS = {pprint.pformat(windows_feature_macros)}") - - -@generator("testcapi_feature_macros", 'Modules/_testcapi_feature_macros.inc') -def gen_testcapi_feature_macros(manifest, args, outfile): - """Generate/check the stable ABI list for documentation annotations""" - write = partial(print, file=outfile) - write('// Generated by Tools/scripts/stable_abi.py') - write() - write('// Add an entry in dict `result` for each Stable ABI feature macro.') - write() - for macro in manifest.select({'feature_macro'}): - name = macro.name - write(f'#ifdef {name}') - write(f' res = PyDict_SetItemString(result, "{name}", Py_True);') - write('#else') - write(f' res = PyDict_SetItemString(result, "{name}", Py_False);') - write('#endif') - write('if (res) {') - write(' Py_DECREF(result); return NULL;') - write('}') - write() - - -def generate_or_check(manifest, args, path, func): - """Generate/check a file with a single generator - - Return True if successful; False if a comparison failed. - """ - - outfile = io.StringIO() - func(manifest, args, outfile) - generated = outfile.getvalue() - existing = path.read_text() - - if generated != existing: - if args.generate: - path.write_text(generated) - else: - print(f'File {path} differs from expected!') - diff = difflib.unified_diff( - generated.splitlines(), existing.splitlines(), - str(path), '<expected>', - lineterm='', - ) - for line in diff: - print(line) - return False - return True - - -def do_unixy_check(manifest, args): - """Check headers & library using "Unixy" tools (GCC/clang, binutils)""" - okay = True - - # Get all macros first: we'll need feature macros like HAVE_FORK and - # MS_WINDOWS for everything else - present_macros = gcc_get_limited_api_macros(['Include/Python.h']) - feature_macros = set(m.name for m in manifest.select({'feature_macro'})) - feature_macros &= present_macros - - # Check that we have all needed macros - expected_macros = set( - item.name for item in manifest.select({'macro'}) - ) - missing_macros = expected_macros - present_macros - okay &= _report_unexpected_items( - missing_macros, - 'Some macros from are not defined from "Include/Python.h"' - + 'with Py_LIMITED_API:') - - expected_symbols = set(item.name for item in manifest.select( - {'function', 'data'}, include_abi_only=True, ifdef=feature_macros, - )) - - # Check the static library (*.a) - LIBRARY = sysconfig.get_config_var("LIBRARY") - if not LIBRARY: - raise Exception("failed to get LIBRARY variable from sysconfig") - if os.path.exists(LIBRARY): - okay &= binutils_check_library( - manifest, LIBRARY, expected_symbols, dynamic=False) - - # Check the dynamic library (*.so) - LDLIBRARY = sysconfig.get_config_var("LDLIBRARY") - if not LDLIBRARY: - raise Exception("failed to get LDLIBRARY variable from sysconfig") - okay &= binutils_check_library( - manifest, LDLIBRARY, expected_symbols, dynamic=False) - - # Check definitions in the header files - expected_defs = set(item.name for item in manifest.select( - {'function', 'data'}, include_abi_only=False, ifdef=feature_macros, - )) - found_defs = gcc_get_limited_api_definitions(['Include/Python.h']) - missing_defs = expected_defs - found_defs - okay &= _report_unexpected_items( - missing_defs, - 'Some expected declarations were not declared in ' - + '"Include/Python.h" with Py_LIMITED_API:') - - # Some Limited API macros are defined in terms of private symbols. - # These are not part of Limited API (even though they're defined with - # Py_LIMITED_API). They must be part of the Stable ABI, though. - private_symbols = {n for n in expected_symbols if n.startswith('_')} - extra_defs = found_defs - expected_defs - private_symbols - okay &= _report_unexpected_items( - extra_defs, - 'Some extra declarations were found in "Include/Python.h" ' - + 'with Py_LIMITED_API:') - - return okay - - -def _report_unexpected_items(items, msg): - """If there are any `items`, report them using "msg" and return false""" - if items: - print(msg, file=sys.stderr) - for item in sorted(items): - print(' -', item, file=sys.stderr) - return False - return True - - -def binutils_get_exported_symbols(library, dynamic=False): - """Retrieve exported symbols using the nm(1) tool from binutils""" - # Only look at dynamic symbols - args = ["nm", "--no-sort"] - if dynamic: - args.append("--dynamic") - args.append(library) - proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True) - if proc.returncode: - sys.stdout.write(proc.stdout) - sys.exit(proc.returncode) - - stdout = proc.stdout.rstrip() - if not stdout: - raise Exception("command output is empty") - - for line in stdout.splitlines(): - # Split line '0000000000001b80 D PyTextIOWrapper_Type' - if not line: - continue - - parts = line.split(maxsplit=2) - if len(parts) < 3: - continue - - symbol = parts[-1] - if MACOS and symbol.startswith("_"): - yield symbol[1:] - else: - yield symbol - - -def binutils_check_library(manifest, library, expected_symbols, dynamic): - """Check that library exports all expected_symbols""" - available_symbols = set(binutils_get_exported_symbols(library, dynamic)) - missing_symbols = expected_symbols - available_symbols - if missing_symbols: - print(textwrap.dedent(f"""\ - Some symbols from the limited API are missing from {library}: - {', '.join(missing_symbols)} - - This error means that there are some missing symbols among the - ones exported in the library. - This normally means that some symbol, function implementation or - a prototype belonging to a symbol in the limited API has been - deleted or is missing. - """), file=sys.stderr) - return False - return True - - -def gcc_get_limited_api_macros(headers): - """Get all limited API macros from headers. - - Runs the preprocessor over all the header files in "Include" setting - "-DPy_LIMITED_API" to the correct value for the running version of the - interpreter and extracting all macro definitions (via adding -dM to the - compiler arguments). - - Requires Python built with a GCC-compatible compiler. (clang might work) - """ - - api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16 - - preprocesor_output_with_macros = subprocess.check_output( - sysconfig.get_config_var("CC").split() - + [ - # Prevent the expansion of the exported macros so we can - # capture them later - "-DSIZEOF_WCHAR_T=4", # The actual value is not important - f"-DPy_LIMITED_API={api_hexversion}", - "-I.", - "-I./Include", - "-dM", - "-E", - ] - + [str(file) for file in headers], - text=True, - ) - - return { - target - for target in re.findall( - r"#define (\w+)", preprocesor_output_with_macros - ) - } - - -def gcc_get_limited_api_definitions(headers): - """Get all limited API definitions from headers. - - Run the preprocessor over all the header files in "Include" setting - "-DPy_LIMITED_API" to the correct value for the running version of the - interpreter. - - The limited API symbols will be extracted from the output of this command - as it includes the prototypes and definitions of all the exported symbols - that are in the limited api. - - This function does *NOT* extract the macros defined on the limited API - - Requires Python built with a GCC-compatible compiler. (clang might work) - """ - api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16 - preprocesor_output = subprocess.check_output( - sysconfig.get_config_var("CC").split() - + [ - # Prevent the expansion of the exported macros so we can capture - # them later - "-DPyAPI_FUNC=__PyAPI_FUNC", - "-DPyAPI_DATA=__PyAPI_DATA", - "-DEXPORT_DATA=__EXPORT_DATA", - "-D_Py_NO_RETURN=", - "-DSIZEOF_WCHAR_T=4", # The actual value is not important - f"-DPy_LIMITED_API={api_hexversion}", - "-I.", - "-I./Include", - "-E", - ] - + [str(file) for file in headers], - text=True, - stderr=subprocess.DEVNULL, - ) - stable_functions = set( - re.findall(r"__PyAPI_FUNC\(.*?\)\s*(.*?)\s*\(", preprocesor_output) - ) - stable_exported_data = set( - re.findall(r"__EXPORT_DATA\((.*?)\)", preprocesor_output) - ) - stable_data = set( - re.findall(r"__PyAPI_DATA\(.*?\)[\s\*\(]*([^);]*)\)?.*;", preprocesor_output) - ) - return stable_data | stable_exported_data | stable_functions - -def check_private_names(manifest): - """Ensure limited API doesn't contain private names - - Names prefixed by an underscore are private by definition. - """ - for name, item in manifest.contents.items(): - if name.startswith('_') and not item.abi_only: - raise ValueError( - f'`{name}` is private (underscore-prefixed) and should be ' - + 'removed from the stable ABI list or or marked `abi_only`') - -def check_dump(manifest, filename): - """Check that manifest.dump() corresponds to the data. - - Mainly useful when debugging this script. - """ - dumped = tomllib.loads('\n'.join(manifest.dump())) - with filename.open('rb') as file: - from_file = tomllib.load(file) - if dumped != from_file: - print(f'Dump differs from loaded data!', file=sys.stderr) - diff = difflib.unified_diff( - pprint.pformat(dumped).splitlines(), - pprint.pformat(from_file).splitlines(), - '<dumped>', str(filename), - lineterm='', - ) - for line in diff: - print(line, file=sys.stderr) - return False - else: - return True - -def main(): - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument( - "file", type=Path, metavar='FILE', - help="file with the stable abi manifest", - ) - parser.add_argument( - "--generate", action='store_true', - help="generate file(s), rather than just checking them", - ) - parser.add_argument( - "--generate-all", action='store_true', - help="as --generate, but generate all file(s) using default filenames." - + " (unlike --all, does not run any extra checks)", - ) - parser.add_argument( - "-a", "--all", action='store_true', - help="run all available checks using default filenames", - ) - parser.add_argument( - "-l", "--list", action='store_true', - help="list available generators and their default filenames; then exit", - ) - parser.add_argument( - "--dump", action='store_true', - help="dump the manifest contents (used for debugging the parser)", - ) - - actions_group = parser.add_argument_group('actions') - for gen in generators: - actions_group.add_argument( - gen.arg_name, dest=gen.var_name, - type=str, nargs="?", default=MISSING, - metavar='FILENAME', - help=gen.__doc__, - ) - actions_group.add_argument( - '--unixy-check', action='store_true', - help=do_unixy_check.__doc__, - ) - args = parser.parse_args() - - base_path = args.file.parent.parent - - if args.list: - for gen in generators: - print(f'{gen.arg_name}: {base_path / gen.default_path}') - sys.exit(0) - - run_all_generators = args.generate_all - - if args.generate_all: - args.generate = True - - if args.all: - run_all_generators = True - args.unixy_check = True - - try: - file = args.file.open('rb') - except FileNotFoundError as err: - if args.file.suffix == '.txt': - # Provide a better error message - suggestion = args.file.with_suffix('.toml') - raise FileNotFoundError( - f'{args.file} not found. Did you mean {suggestion} ?') from err - raise - with file: - manifest = parse_manifest(file) - - check_private_names(manifest) - - # Remember results of all actions (as booleans). - # At the end we'll check that at least one action was run, - # and also fail if any are false. - results = {} - - if args.dump: - for line in manifest.dump(): - print(line) - results['dump'] = check_dump(manifest, args.file) - - for gen in generators: - filename = getattr(args, gen.var_name) - if filename is None or (run_all_generators and filename is MISSING): - filename = base_path / gen.default_path - elif filename is MISSING: - continue - - results[gen.var_name] = generate_or_check(manifest, args, filename, gen) - - if args.unixy_check: - results['unixy_check'] = do_unixy_check(manifest, args) - - if not results: - if args.generate: - parser.error('No file specified. Use --help for usage.') - parser.error('No check specified. Use --help for usage.') - - failed_results = [name for name, result in results.items() if not result] - - if failed_results: - raise Exception(f""" - These checks related to the stable ABI did not succeed: - {', '.join(failed_results)} - - If you see diffs in the output, files derived from the stable - ABI manifest the were not regenerated. - Run `make regen-limited-abi` to fix this. - - Otherwise, see the error(s) above. - - The stable ABI manifest is at: {args.file} - Note that there is a process to follow when modifying it. - - You can read more about the limited API and its contracts at: - - https://docs.python.org/3/c-api/stable.html - - And in PEP 384: - - https://peps.python.org/pep-0384/ - """) - - -if __name__ == "__main__": - main() diff --git a/Tools/scripts/umarshal.py b/Tools/scripts/umarshal.py deleted file mode 100644 index f61570c..0000000 --- a/Tools/scripts/umarshal.py +++ /dev/null @@ -1,325 +0,0 @@ -# Implementat marshal.loads() in pure Python - -import ast - -from typing import Any, Tuple - - -class Type: - # Adapted from marshal.c - NULL = ord('0') - NONE = ord('N') - FALSE = ord('F') - TRUE = ord('T') - STOPITER = ord('S') - ELLIPSIS = ord('.') - INT = ord('i') - INT64 = ord('I') - FLOAT = ord('f') - BINARY_FLOAT = ord('g') - COMPLEX = ord('x') - BINARY_COMPLEX = ord('y') - LONG = ord('l') - STRING = ord('s') - INTERNED = ord('t') - REF = ord('r') - TUPLE = ord('(') - LIST = ord('[') - DICT = ord('{') - CODE = ord('c') - UNICODE = ord('u') - UNKNOWN = ord('?') - SET = ord('<') - FROZENSET = ord('>') - ASCII = ord('a') - ASCII_INTERNED = ord('A') - SMALL_TUPLE = ord(')') - SHORT_ASCII = ord('z') - SHORT_ASCII_INTERNED = ord('Z') - - -FLAG_REF = 0x80 # with a type, add obj to index - -NULL = object() # marker - -# Cell kinds -CO_FAST_LOCAL = 0x20 -CO_FAST_CELL = 0x40 -CO_FAST_FREE = 0x80 - - -class Code: - def __init__(self, **kwds: Any): - self.__dict__.update(kwds) - - def __repr__(self) -> str: - return f"Code(**{self.__dict__})" - - co_localsplusnames: Tuple[str] - co_localspluskinds: Tuple[int] - - def get_localsplus_names(self, select_kind: int) -> Tuple[str, ...]: - varnames: list[str] = [] - for name, kind in zip(self.co_localsplusnames, - self.co_localspluskinds): - if kind & select_kind: - varnames.append(name) - return tuple(varnames) - - @property - def co_varnames(self) -> Tuple[str, ...]: - return self.get_localsplus_names(CO_FAST_LOCAL) - - @property - def co_cellvars(self) -> Tuple[str, ...]: - return self.get_localsplus_names(CO_FAST_CELL) - - @property - def co_freevars(self) -> Tuple[str, ...]: - return self.get_localsplus_names(CO_FAST_FREE) - - @property - def co_nlocals(self) -> int: - return len(self.co_varnames) - - -class Reader: - # A fairly literal translation of the marshal reader. - - def __init__(self, data: bytes): - self.data: bytes = data - self.end: int = len(self.data) - self.pos: int = 0 - self.refs: list[Any] = [] - self.level: int = 0 - - def r_string(self, n: int) -> bytes: - assert 0 <= n <= self.end - self.pos - buf = self.data[self.pos : self.pos + n] - self.pos += n - return buf - - def r_byte(self) -> int: - buf = self.r_string(1) - return buf[0] - - def r_short(self) -> int: - buf = self.r_string(2) - x = buf[0] - x |= buf[1] << 8 - x |= -(x & (1<<15)) # Sign-extend - return x - - def r_long(self) -> int: - buf = self.r_string(4) - x = buf[0] - x |= buf[1] << 8 - x |= buf[2] << 16 - x |= buf[3] << 24 - x |= -(x & (1<<31)) # Sign-extend - return x - - def r_long64(self) -> int: - buf = self.r_string(8) - x = buf[0] - x |= buf[1] << 8 - x |= buf[2] << 16 - x |= buf[3] << 24 - x |= buf[1] << 32 - x |= buf[1] << 40 - x |= buf[1] << 48 - x |= buf[1] << 56 - x |= -(x & (1<<63)) # Sign-extend - return x - - def r_PyLong(self) -> int: - n = self.r_long() - size = abs(n) - x = 0 - # Pray this is right - for i in range(size): - x |= self.r_short() << i*15 - if n < 0: - x = -x - return x - - def r_float_bin(self) -> float: - buf = self.r_string(8) - import struct # Lazy import to avoid breaking UNIX build - return struct.unpack("d", buf)[0] - - def r_float_str(self) -> float: - n = self.r_byte() - buf = self.r_string(n) - return ast.literal_eval(buf.decode("ascii")) - - def r_ref_reserve(self, flag: int) -> int: - if flag: - idx = len(self.refs) - self.refs.append(None) - return idx - else: - return 0 - - def r_ref_insert(self, obj: Any, idx: int, flag: int) -> Any: - if flag: - self.refs[idx] = obj - return obj - - def r_ref(self, obj: Any, flag: int) -> Any: - assert flag & FLAG_REF - self.refs.append(obj) - return obj - - def r_object(self) -> Any: - old_level = self.level - try: - return self._r_object() - finally: - self.level = old_level - - def _r_object(self) -> Any: - code = self.r_byte() - flag = code & FLAG_REF - type = code & ~FLAG_REF - # print(" "*self.level + f"{code} {flag} {type} {chr(type)!r}") - self.level += 1 - - def R_REF(obj: Any) -> Any: - if flag: - obj = self.r_ref(obj, flag) - return obj - - if type == Type.NULL: - return NULL - elif type == Type.NONE: - return None - elif type == Type.ELLIPSIS: - return Ellipsis - elif type == Type.FALSE: - return False - elif type == Type.TRUE: - return True - elif type == Type.INT: - return R_REF(self.r_long()) - elif type == Type.INT64: - return R_REF(self.r_long64()) - elif type == Type.LONG: - return R_REF(self.r_PyLong()) - elif type == Type.FLOAT: - return R_REF(self.r_float_str()) - elif type == Type.BINARY_FLOAT: - return R_REF(self.r_float_bin()) - elif type == Type.COMPLEX: - return R_REF(complex(self.r_float_str(), - self.r_float_str())) - elif type == Type.BINARY_COMPLEX: - return R_REF(complex(self.r_float_bin(), - self.r_float_bin())) - elif type == Type.STRING: - n = self.r_long() - return R_REF(self.r_string(n)) - elif type == Type.ASCII_INTERNED or type == Type.ASCII: - n = self.r_long() - return R_REF(self.r_string(n).decode("ascii")) - elif type == Type.SHORT_ASCII_INTERNED or type == Type.SHORT_ASCII: - n = self.r_byte() - return R_REF(self.r_string(n).decode("ascii")) - elif type == Type.INTERNED or type == Type.UNICODE: - n = self.r_long() - return R_REF(self.r_string(n).decode("utf8", "surrogatepass")) - elif type == Type.SMALL_TUPLE: - n = self.r_byte() - idx = self.r_ref_reserve(flag) - retval: Any = tuple(self.r_object() for _ in range(n)) - self.r_ref_insert(retval, idx, flag) - return retval - elif type == Type.TUPLE: - n = self.r_long() - idx = self.r_ref_reserve(flag) - retval = tuple(self.r_object() for _ in range(n)) - self.r_ref_insert(retval, idx, flag) - return retval - elif type == Type.LIST: - n = self.r_long() - retval = R_REF([]) - for _ in range(n): - retval.append(self.r_object()) - return retval - elif type == Type.DICT: - retval = R_REF({}) - while True: - key = self.r_object() - if key == NULL: - break - val = self.r_object() - retval[key] = val - return retval - elif type == Type.SET: - n = self.r_long() - retval = R_REF(set()) - for _ in range(n): - v = self.r_object() - retval.add(v) - return retval - elif type == Type.FROZENSET: - n = self.r_long() - s: set[Any] = set() - idx = self.r_ref_reserve(flag) - for _ in range(n): - v = self.r_object() - s.add(v) - retval = frozenset(s) - self.r_ref_insert(retval, idx, flag) - return retval - elif type == Type.CODE: - retval = R_REF(Code()) - retval.co_argcount = self.r_long() - retval.co_posonlyargcount = self.r_long() - retval.co_kwonlyargcount = self.r_long() - retval.co_stacksize = self.r_long() - retval.co_flags = self.r_long() - retval.co_code = self.r_object() - retval.co_consts = self.r_object() - retval.co_names = self.r_object() - retval.co_localsplusnames = self.r_object() - retval.co_localspluskinds = self.r_object() - retval.co_filename = self.r_object() - retval.co_name = self.r_object() - retval.co_qualname = self.r_object() - retval.co_firstlineno = self.r_long() - retval.co_linetable = self.r_object() - retval.co_exceptiontable = self.r_object() - return retval - elif type == Type.REF: - n = self.r_long() - retval = self.refs[n] - assert retval is not None - return retval - else: - breakpoint() - raise AssertionError(f"Unknown type {type} {chr(type)!r}") - - -def loads(data: bytes) -> Any: - assert isinstance(data, bytes) - r = Reader(data) - return r.r_object() - - -def main(): - # Test - import marshal, pprint - sample = {'foo': {(42, "bar", 3.14)}} - data = marshal.dumps(sample) - retval = loads(data) - assert retval == sample, retval - sample = main.__code__ - data = marshal.dumps(sample) - retval = loads(data) - assert isinstance(retval, Code), retval - pprint.pprint(retval.__dict__) - - -if __name__ == "__main__": - main() diff --git a/Tools/scripts/update_file.py b/Tools/scripts/update_file.py deleted file mode 100644 index b4182c1..0000000 --- a/Tools/scripts/update_file.py +++ /dev/null @@ -1,92 +0,0 @@ -""" -A script that replaces an old file with a new one, only if the contents -actually changed. If not, the new file is simply deleted. - -This avoids wholesale rebuilds when a code (re)generation phase does not -actually change the in-tree generated code. -""" - -import contextlib -import os -import os.path -import sys - - -@contextlib.contextmanager -def updating_file_with_tmpfile(filename, tmpfile=None): - """A context manager for updating a file via a temp file. - - The context manager provides two open files: the source file open - for reading, and the temp file, open for writing. - - Upon exiting: both files are closed, and the source file is replaced - with the temp file. - """ - # XXX Optionally use tempfile.TemporaryFile? - if not tmpfile: - tmpfile = filename + '.tmp' - elif os.path.isdir(tmpfile): - tmpfile = os.path.join(tmpfile, filename + '.tmp') - - with open(filename, 'rb') as infile: - line = infile.readline() - - if line.endswith(b'\r\n'): - newline = "\r\n" - elif line.endswith(b'\r'): - newline = "\r" - elif line.endswith(b'\n'): - newline = "\n" - else: - raise ValueError(f"unknown end of line: {filename}: {line!a}") - - with open(tmpfile, 'w', newline=newline) as outfile: - with open(filename) as infile: - yield infile, outfile - update_file_with_tmpfile(filename, tmpfile) - - -def update_file_with_tmpfile(filename, tmpfile, *, create=False): - try: - targetfile = open(filename, 'rb') - except FileNotFoundError: - if not create: - raise # re-raise - outcome = 'created' - os.replace(tmpfile, filename) - else: - with targetfile: - old_contents = targetfile.read() - with open(tmpfile, 'rb') as f: - new_contents = f.read() - # Now compare! - if old_contents != new_contents: - outcome = 'updated' - os.replace(tmpfile, filename) - else: - outcome = 'same' - os.unlink(tmpfile) - return outcome - - -if __name__ == '__main__': - import argparse - parser = argparse.ArgumentParser() - parser.add_argument('--create', action='store_true') - parser.add_argument('--exitcode', action='store_true') - parser.add_argument('filename', help='path to be updated') - parser.add_argument('tmpfile', help='path with new contents') - args = parser.parse_args() - kwargs = vars(args) - setexitcode = kwargs.pop('exitcode') - - outcome = update_file_with_tmpfile(**kwargs) - if setexitcode: - if outcome == 'same': - sys.exit(0) - elif outcome == 'updated': - sys.exit(1) - elif outcome == 'created': - sys.exit(2) - else: - raise NotImplementedError diff --git a/Tools/scripts/verify_ensurepip_wheels.py b/Tools/scripts/verify_ensurepip_wheels.py deleted file mode 100755 index 044d1fd..0000000 --- a/Tools/scripts/verify_ensurepip_wheels.py +++ /dev/null @@ -1,98 +0,0 @@ -#! /usr/bin/env python3 - -""" -Compare checksums for wheels in :mod:`ensurepip` against the Cheeseshop. - -When GitHub Actions executes the script, output is formatted accordingly. -https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-notice-message -""" - -import hashlib -import json -import os -import re -from pathlib import Path -from urllib.request import urlopen - -PACKAGE_NAMES = ("pip", "setuptools") -ENSURE_PIP_ROOT = Path(__file__).parent.parent.parent / "Lib/ensurepip" -WHEEL_DIR = ENSURE_PIP_ROOT / "_bundled" -ENSURE_PIP_INIT_PY_TEXT = (ENSURE_PIP_ROOT / "__init__.py").read_text(encoding="utf-8") -GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true" - - -def print_notice(file_path: str, message: str) -> None: - if GITHUB_ACTIONS: - message = f"::notice file={file_path}::{message}" - print(message, end="\n\n") - - -def print_error(file_path: str, message: str) -> None: - if GITHUB_ACTIONS: - message = f"::error file={file_path}::{message}" - print(message, end="\n\n") - - -def verify_wheel(package_name: str) -> bool: - # Find the package on disk - package_path = next(WHEEL_DIR.glob(f"{package_name}*.whl"), None) - if not package_path: - print_error("", f"Could not find a {package_name} wheel on disk.") - return False - - print(f"Verifying checksum for {package_path}.") - - # Find the version of the package used by ensurepip - package_version_match = re.search( - f'_{package_name.upper()}_VERSION = "([^"]+)', ENSURE_PIP_INIT_PY_TEXT - ) - if not package_version_match: - print_error( - package_path, - f"No {package_name} version found in Lib/ensurepip/__init__.py.", - ) - return False - package_version = package_version_match[1] - - # Get the SHA 256 digest from the Cheeseshop - try: - raw_text = urlopen(f"https://pypi.org/pypi/{package_name}/json").read() - except (OSError, ValueError): - print_error(package_path, f"Could not fetch JSON metadata for {package_name}.") - return False - - release_files = json.loads(raw_text)["releases"][package_version] - for release_info in release_files: - if package_path.name != release_info["filename"]: - continue - expected_digest = release_info["digests"].get("sha256", "") - break - else: - print_error(package_path, f"No digest for {package_name} found from PyPI.") - return False - - # Compute the SHA 256 digest of the wheel on disk - actual_digest = hashlib.sha256(package_path.read_bytes()).hexdigest() - - print(f"Expected digest: {expected_digest}") - print(f"Actual digest: {actual_digest}") - - if actual_digest != expected_digest: - print_error( - package_path, f"Failed to verify the checksum of the {package_name} wheel." - ) - return False - - print_notice( - package_path, - f"Successfully verified the checksum of the {package_name} wheel.", - ) - return True - - -if __name__ == "__main__": - exit_status = 0 - for package_name in PACKAGE_NAMES: - if not verify_wheel(package_name): - exit_status = 1 - raise SystemExit(exit_status) |