diff options
author | Brandt Bucher <brandtbucher@microsoft.com> | 2024-02-29 16:11:28 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-29 16:11:28 (GMT) |
commit | f0df35eeca2ccdfd58cfb9801f06ffa23537270b (patch) | |
tree | 9c7c6a5de052e09462b0fda44ac7f55a4032540a /Tools | |
parent | 45d8871dc4da33fcef92991031707c5bf88a40cf (diff) | |
download | cpython-f0df35eeca2ccdfd58cfb9801f06ffa23537270b.zip cpython-f0df35eeca2ccdfd58cfb9801f06ffa23537270b.tar.gz cpython-f0df35eeca2ccdfd58cfb9801f06ffa23537270b.tar.bz2 |
GH-115802: JIT "small" code for Windows (GH-115964)
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/jit/_schema.py | 5 | ||||
-rw-r--r-- | Tools/jit/_stencils.py | 9 | ||||
-rw-r--r-- | Tools/jit/_targets.py | 46 | ||||
-rw-r--r-- | Tools/jit/template.c | 5 |
4 files changed, 46 insertions, 19 deletions
diff --git a/Tools/jit/_schema.py b/Tools/jit/_schema.py index 975ca65..14e5fc2 100644 --- a/Tools/jit/_schema.py +++ b/Tools/jit/_schema.py @@ -4,9 +4,12 @@ import typing HoleKind: typing.TypeAlias = typing.Literal[ "ARM64_RELOC_GOT_LOAD_PAGE21", "ARM64_RELOC_GOT_LOAD_PAGEOFF12", + "ARM64_RELOC_PAGE21", + "ARM64_RELOC_PAGEOFF12", "ARM64_RELOC_UNSIGNED", - "IMAGE_REL_AMD64_ADDR64", + "IMAGE_REL_AMD64_REL32", "IMAGE_REL_I386_DIR32", + "IMAGE_REL_I386_REL32", "R_AARCH64_ABS64", "R_AARCH64_ADR_GOT_PAGE", "R_AARCH64_CALL26", diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 71c678e..eddec73 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -96,7 +96,7 @@ class Stencil: instruction |= ((base - hole.offset) >> 2) & 0x03FFFFFF self.body[where] = instruction.to_bytes(4, sys.byteorder) self.disassembly += [ - f"{base + 4 * 0: x}: d2800008 mov x8, #0x0", + f"{base + 4 * 0:x}: d2800008 mov x8, #0x0", f"{base + 4 * 0:016x}: R_AARCH64_MOVW_UABS_G0_NC {hole.symbol}", f"{base + 4 * 1:x}: f2a00008 movk x8, #0x0, lsl #16", f"{base + 4 * 1:016x}: R_AARCH64_MOVW_UABS_G1_NC {hole.symbol}", @@ -162,6 +162,13 @@ class StencilGroup: ): self.code.emit_aarch64_trampoline(hole) continue + elif ( + hole.kind in {"IMAGE_REL_AMD64_REL32"} + and hole.value is HoleValue.ZERO + ): + raise ValueError( + f"Add PyAPI_FUNC(...) or PyAPI_DATA(...) to declaration of {hole.symbol}!" + ) holes.append(hole) stencil.holes[:] = holes self.code.pad(alignment) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 06dc4e7..07959b1 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -106,7 +106,7 @@ class _Target(typing.Generic[_S, _R]): o = tempdir / f"{opname}.o" args = [ f"--target={self.triple}", - "-DPy_BUILD_CORE", + "-DPy_BUILD_CORE_MODULE", "-D_DEBUG" if self.debug else "-DNDEBUG", f"-D_JIT_OPCODE={opname}", "-D_PyJIT_ACTIVE", @@ -118,12 +118,17 @@ class _Target(typing.Generic[_S, _R]): f"-I{CPYTHON / 'Python'}", "-O3", "-c", + # This debug info isn't necessary, and bloats out the JIT'ed code. + # We *may* be able to re-enable this, process it, and JIT it for a + # nicer debugging experience... but that needs a lot more research: "-fno-asynchronous-unwind-tables", + # Don't call built-in functions that we can't find or patch: "-fno-builtin", - # SET_FUNCTION_ATTRIBUTE on 32-bit Windows debug builds: - "-fno-jump-tables", + # Emit relaxable 64-bit calls/jumps, so we don't have to worry about + # about emitting in-range trampolines for out-of-range targets. + # We can probably remove this and emit trampolines in the future: "-fno-plt", - # Don't make calls to weird stack-smashing canaries: + # Don't call stack-smashing canaries that we can't find or patch: "-fno-stack-protector", "-o", f"{o}", @@ -194,12 +199,21 @@ class _COFF( offset = base + symbol["Value"] name = symbol["Name"] name = name.removeprefix(self.prefix) - group.symbols[name] = value, offset + if name not in group.symbols: + group.symbols[name] = value, offset for wrapped_relocation in section["Relocations"]: relocation = wrapped_relocation["Relocation"] hole = self._handle_relocation(base, relocation, stencil.body) stencil.holes.append(hole) + def _unwrap_dllimport(self, name: str) -> tuple[_stencils.HoleValue, str | None]: + if name.startswith("__imp_"): + name = name.removeprefix("__imp_") + name = name.removeprefix(self.prefix) + return _stencils.HoleValue.GOT, name + name = name.removeprefix(self.prefix) + return _stencils.symbol_to_value(name) + def _handle_relocation( self, base: int, relocation: _schema.COFFRelocation, raw: bytes ) -> _stencils.Hole: @@ -207,21 +221,23 @@ class _COFF( case { "Offset": offset, "Symbol": s, - "Type": {"Value": "IMAGE_REL_AMD64_ADDR64" as kind}, + "Type": {"Value": "IMAGE_REL_I386_DIR32" as kind}, }: offset += base - s = s.removeprefix(self.prefix) - value, symbol = _stencils.symbol_to_value(s) - addend = int.from_bytes(raw[offset : offset + 8], "little") + value, symbol = self._unwrap_dllimport(s) + addend = int.from_bytes(raw[offset : offset + 4], "little") case { "Offset": offset, "Symbol": s, - "Type": {"Value": "IMAGE_REL_I386_DIR32" as kind}, + "Type": { + "Value": "IMAGE_REL_AMD64_REL32" | "IMAGE_REL_I386_REL32" as kind + }, }: offset += base - s = s.removeprefix(self.prefix) - value, symbol = _stencils.symbol_to_value(s) - addend = int.from_bytes(raw[offset : offset + 4], "little") + value, symbol = self._unwrap_dllimport(s) + addend = ( + int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 + ) case _: raise NotImplementedError(relocation) return _stencils.Hole(offset, kind, value, symbol, addend) @@ -423,12 +439,12 @@ def get_target(host: str) -> _COFF | _ELF | _MachO: args = ["-mcmodel=large"] return _ELF(host, alignment=8, args=args) if re.fullmatch(r"i686-pc-windows-msvc", host): - args = ["-mcmodel=large"] + args = ["-DPy_NO_ENABLE_SHARED"] return _COFF(host, args=args, prefix="_") if re.fullmatch(r"x86_64-apple-darwin.*", host): return _MachO(host, prefix="_") if re.fullmatch(r"x86_64-pc-windows-msvc", host): - args = ["-mcmodel=large"] + args = ["-fms-runtime-lib=dll"] return _COFF(host, args=args) if re.fullmatch(r"x86_64-.*-linux-gnu", host): return _ELF(host) diff --git a/Tools/jit/template.c b/Tools/jit/template.c index d79c6ef..8aaf458 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -9,6 +9,7 @@ #include "pycore_long.h" #include "pycore_opcode_metadata.h" #include "pycore_opcode_utils.h" +#include "pycore_optimizer.h" #include "pycore_range.h" #include "pycore_setobject.h" #include "pycore_sliceobject.h" @@ -58,11 +59,11 @@ do { \ } while (0) #define PATCH_VALUE(TYPE, NAME, ALIAS) \ - extern void ALIAS; \ + PyAPI_DATA(void) ALIAS; \ TYPE NAME = (TYPE)(uint64_t)&ALIAS; #define PATCH_JUMP(ALIAS) \ - extern void ALIAS; \ + PyAPI_DATA(void) ALIAS; \ __attribute__((musttail)) \ return ((jit_func)&ALIAS)(frame, stack_pointer, tstate); |