summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
authorBrandt Bucher <brandtbucher@microsoft.com>2024-05-01 15:05:53 (GMT)
committerGitHub <noreply@github.com>2024-05-01 15:05:53 (GMT)
commit49baa656cb994122869bc807a88ea2f3f0d7751b (patch)
tree6e8dca68a9b31ded4bcb2d3133e8edbb5022f526 /Tools
parentbeb653cc24275025708758d444835db2ddbb74e4 (diff)
downloadcpython-49baa656cb994122869bc807a88ea2f3f0d7751b.zip
cpython-49baa656cb994122869bc807a88ea2f3f0d7751b.tar.gz
cpython-49baa656cb994122869bc807a88ea2f3f0d7751b.tar.bz2
GH-115802: Use the GHC calling convention in JIT code (GH-118287)
Diffstat (limited to 'Tools')
-rw-r--r--Tools/jit/_targets.py54
-rw-r--r--Tools/jit/_writer.py4
-rw-r--r--Tools/jit/template.c4
-rw-r--r--Tools/jit/trampoline.c25
4 files changed, 76 insertions, 11 deletions
diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py
index 91734b3..274d17b 100644
--- a/Tools/jit/_targets.py
+++ b/Tools/jit/_targets.py
@@ -38,6 +38,7 @@ class _Target(typing.Generic[_S, _R]):
_: dataclasses.KW_ONLY
alignment: int = 1
args: typing.Sequence[str] = ()
+ ghccc: bool = False
prefix: str = ""
debug: bool = False
force: bool = False
@@ -85,7 +86,11 @@ class _Target(typing.Generic[_S, _R]):
sections: list[dict[typing.Literal["Section"], _S]] = json.loads(output)
for wrapped_section in sections:
self._handle_section(wrapped_section["Section"], group)
- assert group.symbols["_JIT_ENTRY"] == (_stencils.HoleValue.CODE, 0)
+ # The trampoline's entry point is just named "_ENTRY", since on some
+ # platforms we later assume that any function starting with "_JIT_" uses
+ # the GHC calling convention:
+ entry_symbol = "_JIT_ENTRY" if "_JIT_ENTRY" in group.symbols else "_ENTRY"
+ assert group.symbols[entry_symbol] == (_stencils.HoleValue.CODE, 0)
if group.data.body:
line = f"0: {str(bytes(group.data.body)).removeprefix('b')}"
group.data.disassembly.append(line)
@@ -103,6 +108,9 @@ class _Target(typing.Generic[_S, _R]):
async def _compile(
self, opname: str, c: pathlib.Path, tempdir: pathlib.Path
) -> _stencils.StencilGroup:
+ # "Compile" the trampoline to an empty stencil group if it's not needed:
+ if opname == "trampoline" and not self.ghccc:
+ return _stencils.StencilGroup()
o = tempdir / f"{opname}.o"
args = [
f"--target={self.triple}",
@@ -130,13 +138,38 @@ class _Target(typing.Generic[_S, _R]):
"-fno-plt",
# Don't call stack-smashing canaries that we can't find or patch:
"-fno-stack-protector",
- "-o",
- f"{o}",
"-std=c11",
- f"{c}",
*self.args,
]
- await _llvm.run("clang", args, echo=self.verbose)
+ if self.ghccc:
+ # This is a bit of an ugly workaround, but it makes the code much
+ # smaller and faster, so it's worth it. We want to use the GHC
+ # calling convention, but Clang doesn't support it. So, we *first*
+ # compile the code to LLVM IR, perform some text replacements on the
+ # IR to change the calling convention(!), and then compile *that*.
+ # Once we have access to Clang 19, we can get rid of this and use
+ # __attribute__((preserve_none)) directly in the C code instead:
+ ll = tempdir / f"{opname}.ll"
+ args_ll = args + [
+ # -fomit-frame-pointer is necessary because the GHC calling
+ # convention uses RBP to pass arguments:
+ "-S", "-emit-llvm", "-fomit-frame-pointer", "-o", f"{ll}", f"{c}"
+ ]
+ await _llvm.run("clang", args_ll, echo=self.verbose)
+ ir = ll.read_text()
+ # This handles declarations, definitions, and calls to named symbols
+ # starting with "_JIT_":
+ ir = re.sub(r"(((noalias|nonnull|noundef) )*ptr @_JIT_\w+\()", r"ghccc \1", ir)
+ # This handles calls to anonymous callees, since anything with
+ # "musttail" needs to use the same calling convention:
+ ir = ir.replace("musttail call", "musttail call ghccc")
+ # Sometimes *both* replacements happen at the same site, so fix it:
+ ir = ir.replace("ghccc ghccc", "ghccc")
+ ll.write_text(ir)
+ args_o = args + ["-Wno-unused-command-line-argument", "-o", f"{o}", f"{ll}"]
+ else:
+ args_o = args + ["-o", f"{o}", f"{c}"]
+ await _llvm.run("clang", args_o, echo=self.verbose)
return await self._parse(o)
async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
@@ -146,6 +179,8 @@ class _Target(typing.Generic[_S, _R]):
with tempfile.TemporaryDirectory() as tempdir:
work = pathlib.Path(tempdir).resolve()
async with asyncio.TaskGroup() as group:
+ coro = self._compile("trampoline", TOOLS_JIT / "trampoline.c", work)
+ tasks.append(group.create_task(coro, name="trampoline"))
for opname in opnames:
coro = self._compile(opname, TOOLS_JIT_TEMPLATE_C, work)
tasks.append(group.create_task(coro, name=opname))
@@ -445,6 +480,7 @@ class _MachO(
def get_target(host: str) -> _COFF | _ELF | _MachO:
"""Build a _Target for the given host "triple" and options."""
+ # ghccc currently crashes Clang when combined with musttail on aarch64. :(
if re.fullmatch(r"aarch64-apple-darwin.*", host):
return _MachO(host, alignment=8, prefix="_")
if re.fullmatch(r"aarch64-pc-windows-msvc", host):
@@ -455,13 +491,13 @@ def get_target(host: str) -> _COFF | _ELF | _MachO:
return _ELF(host, alignment=8, args=args)
if re.fullmatch(r"i686-pc-windows-msvc", host):
args = ["-DPy_NO_ENABLE_SHARED"]
- return _COFF(host, args=args, prefix="_")
+ return _COFF(host, args=args, ghccc=True, prefix="_")
if re.fullmatch(r"x86_64-apple-darwin.*", host):
- return _MachO(host, prefix="_")
+ return _MachO(host, ghccc=True, prefix="_")
if re.fullmatch(r"x86_64-pc-windows-msvc", host):
args = ["-fms-runtime-lib=dll"]
- return _COFF(host, args=args)
+ return _COFF(host, args=args, ghccc=True)
if re.fullmatch(r"x86_64-.*-linux-gnu", host):
args = ["-fpic"]
- return _ELF(host, args=args)
+ return _ELF(host, args=args, ghccc=True)
raise ValueError(host)
diff --git a/Tools/jit/_writer.py b/Tools/jit/_writer.py
index cbc1ed2..6b36d8a 100644
--- a/Tools/jit/_writer.py
+++ b/Tools/jit/_writer.py
@@ -53,9 +53,13 @@ def _dump_footer(opnames: typing.Iterable[str]) -> typing.Iterator[str]:
yield ""
yield "static const StencilGroup stencil_groups[512] = {"
for opname in opnames:
+ if opname == "trampoline":
+ continue
yield f" [{opname}] = INIT_STENCIL_GROUP({opname}),"
yield "};"
yield ""
+ yield "static const StencilGroup trampoline = INIT_STENCIL_GROUP(trampoline);"
+ yield ""
yield "#define GET_PATCHES() { \\"
for value in _stencils.HoleValue:
yield f" [HoleValue_{value.name}] = (uintptr_t)0xBADBADBADBADBADB, \\"
diff --git a/Tools/jit/template.c b/Tools/jit/template.c
index 3e81fd1..0dd0744 100644
--- a/Tools/jit/template.c
+++ b/Tools/jit/template.c
@@ -48,7 +48,7 @@
do { \
OPT_STAT_INC(traces_executed); \
__attribute__((musttail)) \
- return ((jit_func)((EXECUTOR)->jit_code))(frame, stack_pointer, tstate); \
+ return ((jit_func)((EXECUTOR)->jit_side_entry))(frame, stack_pointer, tstate); \
} while (0)
#undef GOTO_TIER_ONE
@@ -65,7 +65,7 @@ do { \
#define PATCH_VALUE(TYPE, NAME, ALIAS) \
PyAPI_DATA(void) ALIAS; \
- TYPE NAME = (TYPE)(uint64_t)&ALIAS;
+ TYPE NAME = (TYPE)(uintptr_t)&ALIAS;
#define PATCH_JUMP(ALIAS) \
do { \
diff --git a/Tools/jit/trampoline.c b/Tools/jit/trampoline.c
new file mode 100644
index 0000000..01b3d63
--- /dev/null
+++ b/Tools/jit/trampoline.c
@@ -0,0 +1,25 @@
+#include "Python.h"
+
+#include "pycore_ceval.h"
+#include "pycore_frame.h"
+#include "pycore_jit.h"
+
+// This is where the calling convention changes, on platforms that require it.
+// The actual change is patched in while the JIT compiler is being built, in
+// Tools/jit/_targets.py. On other platforms, this function compiles to nothing.
+_Py_CODEUNIT *
+_ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate)
+{
+ // This is subtle. The actual trace will return to us once it exits, so we
+ // need to make sure that we stay alive until then. If our trace side-exits
+ // into another trace, and this trace is then invalidated, the code for
+ // *this function* will be freed and we'll crash upon return:
+ PyAPI_DATA(void) _JIT_EXECUTOR;
+ PyObject *executor = (PyObject *)(uintptr_t)&_JIT_EXECUTOR;
+ Py_INCREF(executor);
+ // Note that this is *not* a tail call:
+ PyAPI_DATA(void) _JIT_CONTINUE;
+ _Py_CODEUNIT *target = ((jit_func)&_JIT_CONTINUE)(frame, stack_pointer, tstate);
+ Py_SETREF(tstate->previous_executor, executor);
+ return target;
+}