summaryrefslogtreecommitdiffstats
path: root/Tools/jit
diff options
context:
space:
mode:
authorBrandt Bucher <brandtbucher@microsoft.com>2024-01-29 02:48:48 (GMT)
committerGitHub <noreply@github.com>2024-01-29 02:48:48 (GMT)
commitf6d9e5926b6138994eaa60d1c36462e36105733d (patch)
tree53362fa9918ab65519ccf9a343cfcdfcfa9c4f6f /Tools/jit
parentf7c05d7ad3075a1dbeed86b6b12903032e4afba6 (diff)
downloadcpython-f6d9e5926b6138994eaa60d1c36462e36105733d.zip
cpython-f6d9e5926b6138994eaa60d1c36462e36105733d.tar.gz
cpython-f6d9e5926b6138994eaa60d1c36462e36105733d.tar.bz2
GH-113464: Add a JIT backend for tier 2 (GH-113465)
Add an option (--enable-experimental-jit for configure-based builds or --experimental-jit for PCbuild-based ones) to build an *experimental* just-in-time compiler, based on copy-and-patch (https://fredrikbk.com/publications/copy-and-patch.pdf). See Tools/jit/README.md for more information on how to install the required build-time tooling.
Diffstat (limited to 'Tools/jit')
-rw-r--r--Tools/jit/README.md46
-rw-r--r--Tools/jit/_llvm.py99
-rw-r--r--Tools/jit/_schema.py99
-rw-r--r--Tools/jit/_stencils.py220
-rw-r--r--Tools/jit/_targets.py394
-rw-r--r--Tools/jit/_writer.py95
-rw-r--r--Tools/jit/build.py28
-rw-r--r--Tools/jit/mypy.ini5
-rw-r--r--Tools/jit/template.c98
9 files changed, 1084 insertions, 0 deletions
diff --git a/Tools/jit/README.md b/Tools/jit/README.md
new file mode 100644
index 0000000..04a6c07
--- /dev/null
+++ b/Tools/jit/README.md
@@ -0,0 +1,46 @@
+The JIT Compiler
+================
+
+This version of CPython can be built with an experimental just-in-time compiler. While most everything you already know about building and using CPython is unchanged, you will probably need to install a compatible version of LLVM first.
+
+## Installing LLVM
+
+The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon).
+
+LLVM version 16 is required. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-16`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
+
+It's easy to install all of the required tools:
+
+### Linux
+
+Install LLVM 16 on Ubuntu/Debian:
+
+```sh
+wget https://apt.llvm.org/llvm.sh
+chmod +x llvm.sh
+sudo ./llvm.sh 16
+```
+
+### macOS
+
+Install LLVM 16 with [Homebrew](https://brew.sh):
+
+```sh
+brew install llvm@16
+```
+
+Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them.
+
+### Windows
+
+Install LLVM 16 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=16), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
+
+## Building
+
+For `PCbuild`-based builds, pass the new `--experimental-jit` option to `build.bat`.
+
+For all other builds, pass the new `--enable-experimental-jit` option to `configure`.
+
+Otherwise, just configure and build as you normally would. Cross-compiling "just works", since the JIT is built for the host platform.
+
+[^why-llvm]: Clang is specifically needed because it's the only C compiler with support for guaranteed tail calls (`musttail`), which are required by CPython's continuation-passing-style approach to JIT compilation. Since LLVM also includes other functionalities we need (namely, object file parsing and disassembly), it's convenient to only support one toolchain at this time.
diff --git a/Tools/jit/_llvm.py b/Tools/jit/_llvm.py
new file mode 100644
index 0000000..603bbef
--- /dev/null
+++ b/Tools/jit/_llvm.py
@@ -0,0 +1,99 @@
+"""Utilities for invoking LLVM tools."""
+import asyncio
+import functools
+import os
+import re
+import shlex
+import subprocess
+import typing
+
+_LLVM_VERSION = 16
+_LLVM_VERSION_PATTERN = re.compile(rf"version\s+{_LLVM_VERSION}\.\d+\.\d+\s+")
+
+_P = typing.ParamSpec("_P")
+_R = typing.TypeVar("_R")
+_C = typing.Callable[_P, typing.Awaitable[_R]]
+
+
+def _async_cache(f: _C[_P, _R]) -> _C[_P, _R]:
+ cache = {}
+ lock = asyncio.Lock()
+
+ @functools.wraps(f)
+ async def wrapper(
+ *args: _P.args, **kwargs: _P.kwargs # pylint: disable = no-member
+ ) -> _R:
+ async with lock:
+ if args not in cache:
+ cache[args] = await f(*args, **kwargs)
+ return cache[args]
+
+ return wrapper
+
+
+_CORES = asyncio.BoundedSemaphore(os.cpu_count() or 1)
+
+
+async def _run(tool: str, args: typing.Iterable[str], echo: bool = False) -> str | None:
+ command = [tool, *args]
+ async with _CORES:
+ if echo:
+ print(shlex.join(command))
+ try:
+ process = await asyncio.create_subprocess_exec(
+ *command, stdout=subprocess.PIPE
+ )
+ except FileNotFoundError:
+ return None
+ out, _ = await process.communicate()
+ if process.returncode:
+ raise RuntimeError(f"{tool} exited with return code {process.returncode}")
+ return out.decode()
+
+
+@_async_cache
+async def _check_tool_version(name: str, *, echo: bool = False) -> bool:
+ output = await _run(name, ["--version"], echo=echo)
+ return bool(output and _LLVM_VERSION_PATTERN.search(output))
+
+
+@_async_cache
+async def _get_brew_llvm_prefix(*, echo: bool = False) -> str | None:
+ output = await _run("brew", ["--prefix", f"llvm@{_LLVM_VERSION}"], echo=echo)
+ return output and output.removesuffix("\n")
+
+
+@_async_cache
+async def _find_tool(tool: str, *, echo: bool = False) -> str | None:
+ # Unversioned executables:
+ path = tool
+ if await _check_tool_version(path, echo=echo):
+ return path
+ # Versioned executables:
+ path = f"{tool}-{_LLVM_VERSION}"
+ if await _check_tool_version(path, echo=echo):
+ return path
+ # Homebrew-installed executables:
+ prefix = await _get_brew_llvm_prefix(echo=echo)
+ if prefix is not None:
+ path = os.path.join(prefix, "bin", tool)
+ if await _check_tool_version(path, echo=echo):
+ return path
+ # Nothing found:
+ return None
+
+
+async def maybe_run(
+ tool: str, args: typing.Iterable[str], echo: bool = False
+) -> str | None:
+ """Run an LLVM tool if it can be found. Otherwise, return None."""
+ path = await _find_tool(tool, echo=echo)
+ return path and await _run(path, args, echo=echo)
+
+
+async def run(tool: str, args: typing.Iterable[str], echo: bool = False) -> str:
+ """Run an LLVM tool if it can be found. Otherwise, raise RuntimeError."""
+ output = await maybe_run(tool, args, echo=echo)
+ if output is None:
+ raise RuntimeError(f"Can't find {tool}-{_LLVM_VERSION}!")
+ return output
diff --git a/Tools/jit/_schema.py b/Tools/jit/_schema.py
new file mode 100644
index 0000000..8eeb78e
--- /dev/null
+++ b/Tools/jit/_schema.py
@@ -0,0 +1,99 @@
+"""Schema for the JSON produced by llvm-readobj --elf-output-style=JSON."""
+import typing
+
+HoleKind: typing.TypeAlias = typing.Literal[
+ "ARM64_RELOC_GOT_LOAD_PAGE21",
+ "ARM64_RELOC_GOT_LOAD_PAGEOFF12",
+ "ARM64_RELOC_UNSIGNED",
+ "IMAGE_REL_AMD64_ADDR64",
+ "IMAGE_REL_I386_DIR32",
+ "R_AARCH64_ABS64",
+ "R_AARCH64_CALL26",
+ "R_AARCH64_JUMP26",
+ "R_AARCH64_MOVW_UABS_G0_NC",
+ "R_AARCH64_MOVW_UABS_G1_NC",
+ "R_AARCH64_MOVW_UABS_G2_NC",
+ "R_AARCH64_MOVW_UABS_G3",
+ "R_X86_64_64",
+ "X86_64_RELOC_UNSIGNED",
+]
+
+
+class COFFRelocation(typing.TypedDict):
+ """A COFF object file relocation record."""
+
+ Type: dict[typing.Literal["Value"], HoleKind]
+ Symbol: str
+ Offset: int
+
+
+class ELFRelocation(typing.TypedDict):
+ """An ELF object file relocation record."""
+
+ Addend: int
+ Offset: int
+ Symbol: dict[typing.Literal["Value"], str]
+ Type: dict[typing.Literal["Value"], HoleKind]
+
+
+class MachORelocation(typing.TypedDict):
+ """A Mach-O object file relocation record."""
+
+ Offset: int
+ Section: typing.NotRequired[dict[typing.Literal["Value"], str]]
+ Symbol: typing.NotRequired[dict[typing.Literal["Value"], str]]
+ Type: dict[typing.Literal["Value"], HoleKind]
+
+
+class _COFFSymbol(typing.TypedDict):
+ Name: str
+ Value: int
+
+
+class _ELFSymbol(typing.TypedDict):
+ Name: dict[typing.Literal["Value"], str]
+ Value: int
+
+
+class _MachOSymbol(typing.TypedDict):
+ Name: dict[typing.Literal["Value"], str]
+ Value: int
+
+
+class COFFSection(typing.TypedDict):
+ """A COFF object file section."""
+
+ Characteristics: dict[
+ typing.Literal["Flags"], list[dict[typing.Literal["Name"], str]]
+ ]
+ Number: int
+ RawDataSize: int
+ Relocations: list[dict[typing.Literal["Relocation"], COFFRelocation]]
+ SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]]
+ Symbols: list[dict[typing.Literal["Symbol"], _COFFSymbol]]
+
+
+class ELFSection(typing.TypedDict):
+ """An ELF object file section."""
+
+ Flags: dict[typing.Literal["Flags"], list[dict[typing.Literal["Name"], str]]]
+ Index: int
+ Info: int
+ Relocations: list[dict[typing.Literal["Relocation"], ELFRelocation]]
+ SectionData: dict[typing.Literal["Bytes"], list[int]]
+ Symbols: list[dict[typing.Literal["Symbol"], _ELFSymbol]]
+ Type: dict[typing.Literal["Value"], str]
+
+
+class MachOSection(typing.TypedDict):
+ """A Mach-O object file section."""
+
+ Address: int
+ Attributes: dict[typing.Literal["Flags"], list[dict[typing.Literal["Name"], str]]]
+ Index: int
+ Name: dict[typing.Literal["Value"], str]
+ Relocations: typing.NotRequired[
+ list[dict[typing.Literal["Relocation"], MachORelocation]]
+ ]
+ SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]]
+ Symbols: typing.NotRequired[list[dict[typing.Literal["Symbol"], _MachOSymbol]]]
diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py
new file mode 100644
index 0000000..71c678e
--- /dev/null
+++ b/Tools/jit/_stencils.py
@@ -0,0 +1,220 @@
+"""Core data structures for compiled code templates."""
+import dataclasses
+import enum
+import sys
+
+import _schema
+
+
+@enum.unique
+class HoleValue(enum.Enum):
+ """
+ Different "base" values that can be patched into holes (usually combined with the
+ address of a symbol and/or an addend).
+ """
+
+ # The base address of the machine code for the current uop (exposed as _JIT_ENTRY):
+ CODE = enum.auto()
+ # The base address of the machine code for the next uop (exposed as _JIT_CONTINUE):
+ CONTINUE = enum.auto()
+ # The base address of the read-only data for this uop:
+ DATA = enum.auto()
+ # The address of the current executor (exposed as _JIT_EXECUTOR):
+ EXECUTOR = enum.auto()
+ # The base address of the "global" offset table located in the read-only data.
+ # Shouldn't be present in the final stencils, since these are all replaced with
+ # equivalent DATA values:
+ GOT = enum.auto()
+ # The current uop's oparg (exposed as _JIT_OPARG):
+ OPARG = enum.auto()
+ # The current uop's operand (exposed as _JIT_OPERAND):
+ OPERAND = enum.auto()
+ # The current uop's target (exposed as _JIT_TARGET):
+ TARGET = enum.auto()
+ # The base address of the machine code for the first uop (exposed as _JIT_TOP):
+ TOP = enum.auto()
+ # A hardcoded value of zero (used for symbol lookups):
+ ZERO = enum.auto()
+
+
+@dataclasses.dataclass
+class Hole:
+ """
+ A "hole" in the stencil to be patched with a computed runtime value.
+
+ Analogous to relocation records in an object file.
+ """
+
+ offset: int
+ kind: _schema.HoleKind
+ # Patch with this base value:
+ value: HoleValue
+ # ...plus the address of this symbol:
+ symbol: str | None
+ # ...plus this addend:
+ addend: int
+ # Convenience method:
+ replace = dataclasses.replace
+
+ def as_c(self) -> str:
+ """Dump this hole as an initialization of a C Hole struct."""
+ parts = [
+ f"{self.offset:#x}",
+ f"HoleKind_{self.kind}",
+ f"HoleValue_{self.value.name}",
+ f"&{self.symbol}" if self.symbol else "NULL",
+ _format_addend(self.addend),
+ ]
+ return f"{{{', '.join(parts)}}}"
+
+
+@dataclasses.dataclass
+class Stencil:
+ """
+ A contiguous block of machine code or data to be copied-and-patched.
+
+ Analogous to a section or segment in an object file.
+ """
+
+ body: bytearray = dataclasses.field(default_factory=bytearray, init=False)
+ holes: list[Hole] = dataclasses.field(default_factory=list, init=False)
+ disassembly: list[str] = dataclasses.field(default_factory=list, init=False)
+
+ def pad(self, alignment: int) -> None:
+ """Pad the stencil to the given alignment."""
+ offset = len(self.body)
+ padding = -offset % alignment
+ self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}")
+ self.body.extend([0] * padding)
+
+ def emit_aarch64_trampoline(self, hole: Hole) -> None:
+ """Even with the large code model, AArch64 Linux insists on 28-bit jumps."""
+ base = len(self.body)
+ where = slice(hole.offset, hole.offset + 4)
+ instruction = int.from_bytes(self.body[where], sys.byteorder)
+ instruction &= 0xFC000000
+ instruction |= ((base - hole.offset) >> 2) & 0x03FFFFFF
+ self.body[where] = instruction.to_bytes(4, sys.byteorder)
+ self.disassembly += [
+ f"{base + 4 * 0: x}: d2800008 mov x8, #0x0",
+ f"{base + 4 * 0:016x}: R_AARCH64_MOVW_UABS_G0_NC {hole.symbol}",
+ f"{base + 4 * 1:x}: f2a00008 movk x8, #0x0, lsl #16",
+ f"{base + 4 * 1:016x}: R_AARCH64_MOVW_UABS_G1_NC {hole.symbol}",
+ f"{base + 4 * 2:x}: f2c00008 movk x8, #0x0, lsl #32",
+ f"{base + 4 * 2:016x}: R_AARCH64_MOVW_UABS_G2_NC {hole.symbol}",
+ f"{base + 4 * 3:x}: f2e00008 movk x8, #0x0, lsl #48",
+ f"{base + 4 * 3:016x}: R_AARCH64_MOVW_UABS_G3 {hole.symbol}",
+ f"{base + 4 * 4:x}: d61f0100 br x8",
+ ]
+ for code in [
+ 0xD2800008.to_bytes(4, sys.byteorder),
+ 0xF2A00008.to_bytes(4, sys.byteorder),
+ 0xF2C00008.to_bytes(4, sys.byteorder),
+ 0xF2E00008.to_bytes(4, sys.byteorder),
+ 0xD61F0100.to_bytes(4, sys.byteorder),
+ ]:
+ self.body.extend(code)
+ for i, kind in enumerate(
+ [
+ "R_AARCH64_MOVW_UABS_G0_NC",
+ "R_AARCH64_MOVW_UABS_G1_NC",
+ "R_AARCH64_MOVW_UABS_G2_NC",
+ "R_AARCH64_MOVW_UABS_G3",
+ ]
+ ):
+ self.holes.append(hole.replace(offset=base + 4 * i, kind=kind))
+
+
+@dataclasses.dataclass
+class StencilGroup:
+ """
+ Code and data corresponding to a given micro-opcode.
+
+ Analogous to an entire object file.
+ """
+
+ code: Stencil = dataclasses.field(default_factory=Stencil, init=False)
+ data: Stencil = dataclasses.field(default_factory=Stencil, init=False)
+ symbols: dict[int | str, tuple[HoleValue, int]] = dataclasses.field(
+ default_factory=dict, init=False
+ )
+ _got: dict[str, int] = dataclasses.field(default_factory=dict, init=False)
+
+ def process_relocations(self, *, alignment: int = 1) -> None:
+ """Fix up all GOT and internal relocations for this stencil group."""
+ self.code.pad(alignment)
+ self.data.pad(8)
+ for stencil in [self.code, self.data]:
+ holes = []
+ for hole in stencil.holes:
+ if hole.value is HoleValue.GOT:
+ assert hole.symbol is not None
+ hole.value = HoleValue.DATA
+ hole.addend += self._global_offset_table_lookup(hole.symbol)
+ hole.symbol = None
+ elif hole.symbol in self.symbols:
+ hole.value, addend = self.symbols[hole.symbol]
+ hole.addend += addend
+ hole.symbol = None
+ elif (
+ hole.kind in {"R_AARCH64_CALL26", "R_AARCH64_JUMP26"}
+ and hole.value is HoleValue.ZERO
+ ):
+ self.code.emit_aarch64_trampoline(hole)
+ continue
+ holes.append(hole)
+ stencil.holes[:] = holes
+ self.code.pad(alignment)
+ self._emit_global_offset_table()
+ self.code.holes.sort(key=lambda hole: hole.offset)
+ self.data.holes.sort(key=lambda hole: hole.offset)
+
+ def _global_offset_table_lookup(self, symbol: str) -> int:
+ return len(self.data.body) + self._got.setdefault(symbol, 8 * len(self._got))
+
+ def _emit_global_offset_table(self) -> None:
+ got = len(self.data.body)
+ for s, offset in self._got.items():
+ if s in self.symbols:
+ value, addend = self.symbols[s]
+ symbol = None
+ else:
+ value, symbol = symbol_to_value(s)
+ addend = 0
+ self.data.holes.append(
+ Hole(got + offset, "R_X86_64_64", value, symbol, addend)
+ )
+ value_part = value.name if value is not HoleValue.ZERO else ""
+ if value_part and not symbol and not addend:
+ addend_part = ""
+ else:
+ addend_part = f"&{symbol}" if symbol else ""
+ addend_part += _format_addend(addend, signed=symbol is not None)
+ if value_part:
+ value_part += "+"
+ self.data.disassembly.append(
+ f"{len(self.data.body):x}: {value_part}{addend_part}"
+ )
+ self.data.body.extend([0] * 8)
+
+
+def symbol_to_value(symbol: str) -> tuple[HoleValue, str | None]:
+ """
+ Convert a symbol name to a HoleValue and a symbol name.
+
+ Some symbols (starting with "_JIT_") are special and are converted to their
+ own HoleValues.
+ """
+ if symbol.startswith("_JIT_"):
+ try:
+ return HoleValue[symbol.removeprefix("_JIT_")], None
+ except KeyError:
+ pass
+ return HoleValue.ZERO, symbol
+
+
+def _format_addend(addend: int, signed: bool = False) -> str:
+ addend %= 1 << 64
+ if addend & (1 << 63):
+ addend -= 1 << 64
+ return f"{addend:{'+#x' if signed else '#x'}}"
diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py
new file mode 100644
index 0000000..51b091e
--- /dev/null
+++ b/Tools/jit/_targets.py
@@ -0,0 +1,394 @@
+"""Target-specific code generation, parsing, and processing."""
+import asyncio
+import dataclasses
+import hashlib
+import json
+import os
+import pathlib
+import re
+import sys
+import tempfile
+import typing
+
+import _llvm
+import _schema
+import _stencils
+import _writer
+
+if sys.version_info < (3, 11):
+ raise RuntimeError("Building the JIT compiler requires Python 3.11 or newer!")
+
+TOOLS_JIT_BUILD = pathlib.Path(__file__).resolve()
+TOOLS_JIT = TOOLS_JIT_BUILD.parent
+TOOLS = TOOLS_JIT.parent
+CPYTHON = TOOLS.parent
+PYTHON_EXECUTOR_CASES_C_H = CPYTHON / "Python" / "executor_cases.c.h"
+TOOLS_JIT_TEMPLATE_C = TOOLS_JIT / "template.c"
+
+
+_S = typing.TypeVar("_S", _schema.COFFSection, _schema.ELFSection, _schema.MachOSection)
+_R = typing.TypeVar(
+ "_R", _schema.COFFRelocation, _schema.ELFRelocation, _schema.MachORelocation
+)
+
+
+@dataclasses.dataclass
+class _Target(typing.Generic[_S, _R]):
+ triple: str
+ _: dataclasses.KW_ONLY
+ alignment: int = 1
+ prefix: str = ""
+ debug: bool = False
+ force: bool = False
+ verbose: bool = False
+
+ def _compute_digest(self, out: pathlib.Path) -> str:
+ hasher = hashlib.sha256()
+ hasher.update(self.triple.encode())
+ hasher.update(self.alignment.to_bytes())
+ hasher.update(self.prefix.encode())
+ # These dependencies are also reflected in _JITSources in regen.targets:
+ hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes())
+ hasher.update((out / "pyconfig.h").read_bytes())
+ for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)):
+ for filename in filenames:
+ hasher.update(pathlib.Path(dirpath, filename).read_bytes())
+ return hasher.hexdigest()
+
+ async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup:
+ group = _stencils.StencilGroup()
+ args = ["--disassemble", "--reloc", f"{path}"]
+ output = await _llvm.maybe_run("llvm-objdump", args, echo=self.verbose)
+ if output is not None:
+ group.code.disassembly.extend(
+ line.expandtabs().strip()
+ for line in output.splitlines()
+ if not line.isspace()
+ )
+ args = [
+ "--elf-output-style=JSON",
+ "--expand-relocs",
+ # "--pretty-print",
+ "--section-data",
+ "--section-relocations",
+ "--section-symbols",
+ "--sections",
+ f"{path}",
+ ]
+ output = await _llvm.run("llvm-readobj", args, echo=self.verbose)
+ # --elf-output-style=JSON is only *slightly* broken on Mach-O...
+ output = output.replace("PrivateExtern\n", "\n")
+ output = output.replace("Extern\n", "\n")
+ # ...and also COFF:
+ output = output[output.index("[", 1, None) :]
+ output = output[: output.rindex("]", None, -1) + 1]
+ sections: list[dict[typing.Literal["Section"], _S]] = json.loads(output)
+ for wrapped_section in sections:
+ self._handle_section(wrapped_section["Section"], group)
+ assert group.symbols["_JIT_ENTRY"] == (_stencils.HoleValue.CODE, 0)
+ if group.data.body:
+ line = f"0: {str(bytes(group.data.body)).removeprefix('b')}"
+ group.data.disassembly.append(line)
+ group.process_relocations()
+ return group
+
+ def _handle_section(self, section: _S, group: _stencils.StencilGroup) -> None:
+ raise NotImplementedError(type(self))
+
+ def _handle_relocation(
+ self, base: int, relocation: _R, raw: bytes
+ ) -> _stencils.Hole:
+ raise NotImplementedError(type(self))
+
+ async def _compile(
+ self, opname: str, c: pathlib.Path, tempdir: pathlib.Path
+ ) -> _stencils.StencilGroup:
+ o = tempdir / f"{opname}.o"
+ args = [
+ f"--target={self.triple}",
+ "-DPy_BUILD_CORE",
+ "-D_DEBUG" if self.debug else "-DNDEBUG",
+ f"-D_JIT_OPCODE={opname}",
+ "-D_PyJIT_ACTIVE",
+ "-D_Py_JIT",
+ "-I.",
+ f"-I{CPYTHON / 'Include'}",
+ f"-I{CPYTHON / 'Include' / 'internal'}",
+ f"-I{CPYTHON / 'Include' / 'internal' / 'mimalloc'}",
+ f"-I{CPYTHON / 'Python'}",
+ "-O3",
+ "-c",
+ "-fno-asynchronous-unwind-tables",
+ # SET_FUNCTION_ATTRIBUTE on 32-bit Windows debug builds:
+ "-fno-jump-tables",
+ # Position-independent code adds indirection to every load and jump:
+ "-fno-pic",
+ # Don't make calls to weird stack-smashing canaries:
+ "-fno-stack-protector",
+ # We have three options for code model:
+ # - "small": the default, assumes that code and data reside in the
+ # lowest 2GB of memory (128MB on aarch64)
+ # - "medium": assumes that code resides in the lowest 2GB of memory,
+ # and makes no assumptions about data (not available on aarch64)
+ # - "large": makes no assumptions about either code or data
+ "-mcmodel=large",
+ "-o",
+ f"{o}",
+ "-std=c11",
+ f"{c}",
+ ]
+ await _llvm.run("clang", args, echo=self.verbose)
+ return await self._parse(o)
+
+ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
+ generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text()
+ opnames = sorted(re.findall(r"\n {8}case (\w+): \{\n", generated_cases))
+ tasks = []
+ with tempfile.TemporaryDirectory() as tempdir:
+ work = pathlib.Path(tempdir).resolve()
+ async with asyncio.TaskGroup() as group:
+ for opname in opnames:
+ coro = self._compile(opname, TOOLS_JIT_TEMPLATE_C, work)
+ tasks.append(group.create_task(coro, name=opname))
+ return {task.get_name(): task.result() for task in tasks}
+
+ def build(self, out: pathlib.Path, *, comment: str = "") -> None:
+ """Build jit_stencils.h in the given directory."""
+ digest = f"// {self._compute_digest(out)}\n"
+ jit_stencils = out / "jit_stencils.h"
+ if (
+ not self.force
+ and jit_stencils.exists()
+ and jit_stencils.read_text().startswith(digest)
+ ):
+ return
+ stencil_groups = asyncio.run(self._build_stencils())
+ with jit_stencils.open("w") as file:
+ file.write(digest)
+ if comment:
+ file.write(f"// {comment}\n")
+ file.write("")
+ for line in _writer.dump(stencil_groups):
+ file.write(f"{line}\n")
+
+
+class _COFF(
+ _Target[_schema.COFFSection, _schema.COFFRelocation]
+): # pylint: disable = too-few-public-methods
+ def _handle_section(
+ self, section: _schema.COFFSection, group: _stencils.StencilGroup
+ ) -> None:
+ flags = {flag["Name"] for flag in section["Characteristics"]["Flags"]}
+ if "SectionData" in section:
+ section_data_bytes = section["SectionData"]["Bytes"]
+ else:
+ # Zeroed BSS data, seen with printf debugging calls:
+ section_data_bytes = [0] * section["RawDataSize"]
+ if "IMAGE_SCN_MEM_EXECUTE" in flags:
+ value = _stencils.HoleValue.CODE
+ stencil = group.code
+ elif "IMAGE_SCN_MEM_READ" in flags:
+ value = _stencils.HoleValue.DATA
+ stencil = group.data
+ else:
+ return
+ base = len(stencil.body)
+ group.symbols[section["Number"]] = value, base
+ stencil.body.extend(section_data_bytes)
+ for wrapped_symbol in section["Symbols"]:
+ symbol = wrapped_symbol["Symbol"]
+ offset = base + symbol["Value"]
+ name = symbol["Name"]
+ name = name.removeprefix(self.prefix)
+ group.symbols[name] = value, offset
+ for wrapped_relocation in section["Relocations"]:
+ relocation = wrapped_relocation["Relocation"]
+ hole = self._handle_relocation(base, relocation, stencil.body)
+ stencil.holes.append(hole)
+
+ def _handle_relocation(
+ self, base: int, relocation: _schema.COFFRelocation, raw: bytes
+ ) -> _stencils.Hole:
+ match relocation:
+ case {
+ "Offset": offset,
+ "Symbol": s,
+ "Type": {"Value": "IMAGE_REL_AMD64_ADDR64" as kind},
+ }:
+ offset += base
+ s = s.removeprefix(self.prefix)
+ value, symbol = _stencils.symbol_to_value(s)
+ addend = int.from_bytes(raw[offset : offset + 8], "little")
+ case {
+ "Offset": offset,
+ "Symbol": s,
+ "Type": {"Value": "IMAGE_REL_I386_DIR32" as kind},
+ }:
+ offset += base
+ s = s.removeprefix(self.prefix)
+ value, symbol = _stencils.symbol_to_value(s)
+ addend = int.from_bytes(raw[offset : offset + 4], "little")
+ case _:
+ raise NotImplementedError(relocation)
+ return _stencils.Hole(offset, kind, value, symbol, addend)
+
+
+class _ELF(
+ _Target[_schema.ELFSection, _schema.ELFRelocation]
+): # pylint: disable = too-few-public-methods
+ def _handle_section(
+ self, section: _schema.ELFSection, group: _stencils.StencilGroup
+ ) -> None:
+ section_type = section["Type"]["Value"]
+ flags = {flag["Name"] for flag in section["Flags"]["Flags"]}
+ if section_type == "SHT_RELA":
+ assert "SHF_INFO_LINK" in flags, flags
+ assert not section["Symbols"]
+ value, base = group.symbols[section["Info"]]
+ if value is _stencils.HoleValue.CODE:
+ stencil = group.code
+ else:
+ assert value is _stencils.HoleValue.DATA
+ stencil = group.data
+ for wrapped_relocation in section["Relocations"]:
+ relocation = wrapped_relocation["Relocation"]
+ hole = self._handle_relocation(base, relocation, stencil.body)
+ stencil.holes.append(hole)
+ elif section_type == "SHT_PROGBITS":
+ if "SHF_ALLOC" not in flags:
+ return
+ if "SHF_EXECINSTR" in flags:
+ value = _stencils.HoleValue.CODE
+ stencil = group.code
+ else:
+ value = _stencils.HoleValue.DATA
+ stencil = group.data
+ group.symbols[section["Index"]] = value, len(stencil.body)
+ for wrapped_symbol in section["Symbols"]:
+ symbol = wrapped_symbol["Symbol"]
+ offset = len(stencil.body) + symbol["Value"]
+ name = symbol["Name"]["Value"]
+ name = name.removeprefix(self.prefix)
+ group.symbols[name] = value, offset
+ stencil.body.extend(section["SectionData"]["Bytes"])
+ assert not section["Relocations"]
+ else:
+ assert section_type in {
+ "SHT_GROUP",
+ "SHT_LLVM_ADDRSIG",
+ "SHT_NULL",
+ "SHT_STRTAB",
+ "SHT_SYMTAB",
+ }, section_type
+
+ def _handle_relocation(
+ self, base: int, relocation: _schema.ELFRelocation, raw: bytes
+ ) -> _stencils.Hole:
+ match relocation:
+ case {
+ "Addend": addend,
+ "Offset": offset,
+ "Symbol": {"Value": s},
+ "Type": {"Value": kind},
+ }:
+ offset += base
+ s = s.removeprefix(self.prefix)
+ value, symbol = _stencils.symbol_to_value(s)
+ case _:
+ raise NotImplementedError(relocation)
+ return _stencils.Hole(offset, kind, value, symbol, addend)
+
+
+class _MachO(
+ _Target[_schema.MachOSection, _schema.MachORelocation]
+): # pylint: disable = too-few-public-methods
+ def _handle_section(
+ self, section: _schema.MachOSection, group: _stencils.StencilGroup
+ ) -> None:
+ assert section["Address"] >= len(group.code.body)
+ assert "SectionData" in section
+ flags = {flag["Name"] for flag in section["Attributes"]["Flags"]}
+ name = section["Name"]["Value"]
+ name = name.removeprefix(self.prefix)
+ if "SomeInstructions" in flags:
+ value = _stencils.HoleValue.CODE
+ stencil = group.code
+ start_address = 0
+ group.symbols[name] = value, section["Address"] - start_address
+ else:
+ value = _stencils.HoleValue.DATA
+ stencil = group.data
+ start_address = len(group.code.body)
+ group.symbols[name] = value, len(group.code.body)
+ base = section["Address"] - start_address
+ group.symbols[section["Index"]] = value, base
+ stencil.body.extend(
+ [0] * (section["Address"] - len(group.code.body) - len(group.data.body))
+ )
+ stencil.body.extend(section["SectionData"]["Bytes"])
+ assert "Symbols" in section
+ for wrapped_symbol in section["Symbols"]:
+ symbol = wrapped_symbol["Symbol"]
+ offset = symbol["Value"] - start_address
+ name = symbol["Name"]["Value"]
+ name = name.removeprefix(self.prefix)
+ group.symbols[name] = value, offset
+ assert "Relocations" in section
+ for wrapped_relocation in section["Relocations"]:
+ relocation = wrapped_relocation["Relocation"]
+ hole = self._handle_relocation(base, relocation, stencil.body)
+ stencil.holes.append(hole)
+
+ def _handle_relocation(
+ self, base: int, relocation: _schema.MachORelocation, raw: bytes
+ ) -> _stencils.Hole:
+ symbol: str | None
+ match relocation:
+ case {
+ "Offset": offset,
+ "Symbol": {"Value": s},
+ "Type": {
+ "Value": "ARM64_RELOC_GOT_LOAD_PAGE21"
+ | "ARM64_RELOC_GOT_LOAD_PAGEOFF12" as kind
+ },
+ }:
+ offset += base
+ s = s.removeprefix(self.prefix)
+ value, symbol = _stencils.HoleValue.GOT, s
+ addend = 0
+ case {
+ "Offset": offset,
+ "Section": {"Value": s},
+ "Type": {"Value": kind},
+ } | {
+ "Offset": offset,
+ "Symbol": {"Value": s},
+ "Type": {"Value": kind},
+ }:
+ offset += base
+ s = s.removeprefix(self.prefix)
+ value, symbol = _stencils.symbol_to_value(s)
+ addend = 0
+ case _:
+ raise NotImplementedError(relocation)
+ # Turn Clang's weird __bzero calls into normal bzero calls:
+ if symbol == "__bzero":
+ symbol = "bzero"
+ return _stencils.Hole(offset, kind, value, symbol, addend)
+
+
+def get_target(host: str) -> _COFF | _ELF | _MachO:
+ """Build a _Target for the given host "triple" and options."""
+ if re.fullmatch(r"aarch64-apple-darwin.*", host):
+ return _MachO(host, alignment=8, prefix="_")
+ if re.fullmatch(r"aarch64-.*-linux-gnu", host):
+ return _ELF(host, alignment=8)
+ if re.fullmatch(r"i686-pc-windows-msvc", host):
+ return _COFF(host, prefix="_")
+ if re.fullmatch(r"x86_64-apple-darwin.*", host):
+ return _MachO(host, prefix="_")
+ if re.fullmatch(r"x86_64-pc-windows-msvc", host):
+ return _COFF(host)
+ if re.fullmatch(r"x86_64-.*-linux-gnu", host):
+ return _ELF(host)
+ raise ValueError(host)
diff --git a/Tools/jit/_writer.py b/Tools/jit/_writer.py
new file mode 100644
index 0000000..8a2a42e
--- /dev/null
+++ b/Tools/jit/_writer.py
@@ -0,0 +1,95 @@
+"""Utilities for writing StencilGroups out to a C header file."""
+import typing
+
+import _schema
+import _stencils
+
+
+def _dump_header() -> typing.Iterator[str]:
+ yield "typedef enum {"
+ for kind in typing.get_args(_schema.HoleKind):
+ yield f" HoleKind_{kind},"
+ yield "} HoleKind;"
+ yield ""
+ yield "typedef enum {"
+ for value in _stencils.HoleValue:
+ yield f" HoleValue_{value.name},"
+ yield "} HoleValue;"
+ yield ""
+ yield "typedef struct {"
+ yield " const uint64_t offset;"
+ yield " const HoleKind kind;"
+ yield " const HoleValue value;"
+ yield " const void *symbol;"
+ yield " const uint64_t addend;"
+ yield "} Hole;"
+ yield ""
+ yield "typedef struct {"
+ yield " const size_t body_size;"
+ yield " const unsigned char * const body;"
+ yield " const size_t holes_size;"
+ yield " const Hole * const holes;"
+ yield "} Stencil;"
+ yield ""
+ yield "typedef struct {"
+ yield " const Stencil code;"
+ yield " const Stencil data;"
+ yield "} StencilGroup;"
+ yield ""
+
+
+def _dump_footer(opnames: typing.Iterable[str]) -> typing.Iterator[str]:
+ yield "#define INIT_STENCIL(STENCIL) { \\"
+ yield " .body_size = Py_ARRAY_LENGTH(STENCIL##_body) - 1, \\"
+ yield " .body = STENCIL##_body, \\"
+ yield " .holes_size = Py_ARRAY_LENGTH(STENCIL##_holes) - 1, \\"
+ yield " .holes = STENCIL##_holes, \\"
+ yield "}"
+ yield ""
+ yield "#define INIT_STENCIL_GROUP(OP) { \\"
+ yield " .code = INIT_STENCIL(OP##_code), \\"
+ yield " .data = INIT_STENCIL(OP##_data), \\"
+ yield "}"
+ yield ""
+ yield "static const StencilGroup stencil_groups[512] = {"
+ for opname in opnames:
+ yield f" [{opname}] = INIT_STENCIL_GROUP({opname}),"
+ yield "};"
+ yield ""
+ yield "#define GET_PATCHES() { \\"
+ for value in _stencils.HoleValue:
+ yield f" [HoleValue_{value.name}] = (uint64_t)0xBADBADBADBADBADB, \\"
+ yield "}"
+
+
+def _dump_stencil(opname: str, group: _stencils.StencilGroup) -> typing.Iterator[str]:
+ yield f"// {opname}"
+ for part, stencil in [("code", group.code), ("data", group.data)]:
+ for line in stencil.disassembly:
+ yield f"// {line}"
+ if stencil.body:
+ size = len(stencil.body) + 1
+ yield f"static const unsigned char {opname}_{part}_body[{size}] = {{"
+ for i in range(0, len(stencil.body), 8):
+ row = " ".join(f"{byte:#04x}," for byte in stencil.body[i : i + 8])
+ yield f" {row}"
+ yield "};"
+ else:
+ yield f"static const unsigned char {opname}_{part}_body[1];"
+ if stencil.holes:
+ size = len(stencil.holes) + 1
+ yield f"static const Hole {opname}_{part}_holes[{size}] = {{"
+ for hole in stencil.holes:
+ yield f" {hole.as_c()},"
+ yield "};"
+ else:
+ yield f"static const Hole {opname}_{part}_holes[1];"
+ yield ""
+
+
+def dump(groups: dict[str, _stencils.StencilGroup]) -> typing.Iterator[str]:
+ """Yield a JIT compiler line-by-line as a C header file."""
+ yield from _dump_header()
+ for opname, group in groups.items():
+ yield from _dump_stencil(opname, group)
+ yield from _dump_footer(groups)
diff --git a/Tools/jit/build.py b/Tools/jit/build.py
new file mode 100644
index 0000000..4d4ace1
--- /dev/null
+++ b/Tools/jit/build.py
@@ -0,0 +1,28 @@
+"""Build an experimental just-in-time compiler for CPython."""
+import argparse
+import pathlib
+import shlex
+import sys
+
+import _targets
+
+if __name__ == "__main__":
+ comment = f"$ {shlex.join([sys.executable] + sys.argv)}"
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument(
+ "target", type=_targets.get_target, help="a PEP 11 target triple to compile for"
+ )
+ parser.add_argument(
+ "-d", "--debug", action="store_true", help="compile for a debug build of Python"
+ )
+ parser.add_argument(
+ "-f", "--force", action="store_true", help="force the entire JIT to be rebuilt"
+ )
+ parser.add_argument(
+ "-v", "--verbose", action="store_true", help="echo commands as they are run"
+ )
+ args = parser.parse_args()
+ args.target.debug = args.debug
+ args.target.force = args.force
+ args.target.verbose = args.verbose
+ args.target.build(pathlib.Path.cwd(), comment=comment)
diff --git a/Tools/jit/mypy.ini b/Tools/jit/mypy.ini
new file mode 100644
index 0000000..768d002
--- /dev/null
+++ b/Tools/jit/mypy.ini
@@ -0,0 +1,5 @@
+[mypy]
+files = Tools/jit
+pretty = True
+python_version = 3.11
+strict = True
diff --git a/Tools/jit/template.c b/Tools/jit/template.c
new file mode 100644
index 0000000..12303a5
--- /dev/null
+++ b/Tools/jit/template.c
@@ -0,0 +1,98 @@
+#include "Python.h"
+
+#include "pycore_call.h"
+#include "pycore_ceval.h"
+#include "pycore_dict.h"
+#include "pycore_emscripten_signal.h"
+#include "pycore_intrinsics.h"
+#include "pycore_jit.h"
+#include "pycore_long.h"
+#include "pycore_opcode_metadata.h"
+#include "pycore_opcode_utils.h"
+#include "pycore_range.h"
+#include "pycore_setobject.h"
+#include "pycore_sliceobject.h"
+
+#include "ceval_macros.h"
+
+#undef CURRENT_OPARG
+#define CURRENT_OPARG() (_oparg)
+
+#undef CURRENT_OPERAND
+#define CURRENT_OPERAND() (_operand)
+
+#undef DEOPT_IF
+#define DEOPT_IF(COND, INSTNAME) \
+ do { \
+ if ((COND)) { \
+ goto deoptimize; \
+ } \
+ } while (0)
+
+#undef ENABLE_SPECIALIZATION
+#define ENABLE_SPECIALIZATION (0)
+
+#undef GOTO_ERROR
+#define GOTO_ERROR(LABEL) \
+ do { \
+ goto LABEL ## _tier_two; \
+ } while (0)
+
+#undef LOAD_IP
+#define LOAD_IP(UNUSED) \
+ do { \
+ } while (0)
+
+#define PATCH_VALUE(TYPE, NAME, ALIAS) \
+ extern void ALIAS; \
+ TYPE NAME = (TYPE)(uint64_t)&ALIAS;
+
+#define PATCH_JUMP(ALIAS) \
+ extern void ALIAS; \
+ __attribute__((musttail)) \
+ return ((jit_func)&ALIAS)(frame, stack_pointer, tstate);
+
+_Py_CODEUNIT *
+_JIT_ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate)
+{
+ // Locals that the instruction implementations expect to exist:
+ PATCH_VALUE(_PyExecutorObject *, current_executor, _JIT_EXECUTOR)
+ int oparg;
+ int opcode = _JIT_OPCODE;
+ _PyUOpInstruction *next_uop;
+ // Other stuff we need handy:
+ PATCH_VALUE(uint16_t, _oparg, _JIT_OPARG)
+ PATCH_VALUE(uint64_t, _operand, _JIT_OPERAND)
+ PATCH_VALUE(uint32_t, _target, _JIT_TARGET)
+ // The actual instruction definitions (only one will be used):
+ if (opcode == _JUMP_TO_TOP) {
+ CHECK_EVAL_BREAKER();
+ PATCH_JUMP(_JIT_TOP);
+ }
+ switch (opcode) {
+#include "executor_cases.c.h"
+ default:
+ Py_UNREACHABLE();
+ }
+ PATCH_JUMP(_JIT_CONTINUE);
+ // Labels that the instruction implementations expect to exist:
+unbound_local_error_tier_two:
+ _PyEval_FormatExcCheckArg(
+ tstate, PyExc_UnboundLocalError, UNBOUNDLOCAL_ERROR_MSG,
+ PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg));
+ goto error_tier_two;
+pop_4_error_tier_two:
+ STACK_SHRINK(1);
+pop_3_error_tier_two:
+ STACK_SHRINK(1);
+pop_2_error_tier_two:
+ STACK_SHRINK(1);
+pop_1_error_tier_two:
+ STACK_SHRINK(1);
+error_tier_two:
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ return NULL;
+deoptimize:
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ return _PyCode_CODE(_PyFrame_GetCode(frame)) + _target;
+}