summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/cpython/sysmodule.h3
-rw-r--r--Include/internal/pycore_ceval_state.h2
-rw-r--r--Include/sysmodule.h3
-rw-r--r--Lib/test/test_perf_profiler.py76
-rw-r--r--Misc/NEWS.d/next/C API/2023-10-02-23-08-53.gh-issue-109587.UqqnDY.rst2
-rw-r--r--Modules/_testinternalcapi.c32
-rw-r--r--Python/perf_trampoline.c50
-rw-r--r--Python/sysmodule.c41
8 files changed, 199 insertions, 10 deletions
diff --git a/Include/cpython/sysmodule.h b/Include/cpython/sysmodule.h
index df12ae4..9fd7cc0 100644
--- a/Include/cpython/sysmodule.h
+++ b/Include/cpython/sysmodule.h
@@ -21,3 +21,6 @@ PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(
unsigned int code_size,
const char *entry_name);
PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void);
+PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename);
+PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *);
+PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable);
diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h
index 1717ec4..072bbcd 100644
--- a/Include/internal/pycore_ceval_state.h
+++ b/Include/internal/pycore_ceval_state.h
@@ -55,6 +55,7 @@ struct _ceval_runtime_state {
struct code_arena_st *code_arena;
struct trampoline_api_st trampoline_api;
FILE *map_file;
+ Py_ssize_t persist_after_fork;
#else
int _not_used;
#endif
@@ -68,6 +69,7 @@ struct _ceval_runtime_state {
{ \
.status = PERF_STATUS_NO_INIT, \
.extra_code_index = -1, \
+ .persist_after_fork = 0, \
}
#else
# define _PyEval_RUNTIME_PERF_INIT {0}
diff --git a/Include/sysmodule.h b/Include/sysmodule.h
index 7406513..7b14f72 100644
--- a/Include/sysmodule.h
+++ b/Include/sysmodule.h
@@ -1,6 +1,3 @@
-
-/* System module interface */
-
#ifndef Py_SYSMODULE_H
#define Py_SYSMODULE_H
#ifdef __cplusplus
diff --git a/Lib/test/test_perf_profiler.py b/Lib/test/test_perf_profiler.py
index fe8707a..040be63 100644
--- a/Lib/test/test_perf_profiler.py
+++ b/Lib/test/test_perf_profiler.py
@@ -353,6 +353,82 @@ class TestPerfProfiler(unittest.TestCase):
self.assertNotIn(f"py::bar:{script}", stdout)
self.assertNotIn(f"py::baz:{script}", stdout)
+ def test_pre_fork_compile(self):
+ code = """if 1:
+ import sys
+ import os
+ import sysconfig
+ from _testinternalcapi import (
+ compile_perf_trampoline_entry,
+ perf_trampoline_set_persist_after_fork,
+ )
+
+ def foo_fork():
+ pass
+
+ def bar_fork():
+ foo_fork()
+
+ def foo():
+ pass
+
+ def bar():
+ foo()
+
+ def compile_trampolines_for_all_functions():
+ perf_trampoline_set_persist_after_fork(1)
+ for _, obj in globals().items():
+ if callable(obj) and hasattr(obj, '__code__'):
+ compile_perf_trampoline_entry(obj.__code__)
+
+ if __name__ == "__main__":
+ compile_trampolines_for_all_functions()
+ pid = os.fork()
+ if pid == 0:
+ print(os.getpid())
+ bar_fork()
+ else:
+ bar()
+ """
+
+ with temp_dir() as script_dir:
+ script = make_script(script_dir, "perftest", code)
+ with subprocess.Popen(
+ [sys.executable, "-Xperf", script],
+ universal_newlines=True,
+ stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ ) as process:
+ stdout, stderr = process.communicate()
+
+ self.assertEqual(process.returncode, 0)
+ self.assertNotIn("Error:", stderr)
+ child_pid = int(stdout.strip())
+ perf_file = pathlib.Path(f"/tmp/perf-{process.pid}.map")
+ perf_child_file = pathlib.Path(f"/tmp/perf-{child_pid}.map")
+ self.assertTrue(perf_file.exists())
+ self.assertTrue(perf_child_file.exists())
+
+ perf_file_contents = perf_file.read_text()
+ self.assertIn(f"py::foo:{script}", perf_file_contents)
+ self.assertIn(f"py::bar:{script}", perf_file_contents)
+ self.assertIn(f"py::foo_fork:{script}", perf_file_contents)
+ self.assertIn(f"py::bar_fork:{script}", perf_file_contents)
+
+ child_perf_file_contents = perf_child_file.read_text()
+ self.assertIn(f"py::foo_fork:{script}", child_perf_file_contents)
+ self.assertIn(f"py::bar_fork:{script}", child_perf_file_contents)
+
+ # Pre-compiled perf-map entries of a forked process must be
+ # identical in both the parent and child perf-map files.
+ perf_file_lines = perf_file_contents.split("\n")
+ for line in perf_file_lines:
+ if (
+ f"py::foo_fork:{script}" in line
+ or f"py::bar_fork:{script}" in line
+ ):
+ self.assertIn(line, child_perf_file_contents)
+
if __name__ == "__main__":
unittest.main()
diff --git a/Misc/NEWS.d/next/C API/2023-10-02-23-08-53.gh-issue-109587.UqqnDY.rst b/Misc/NEWS.d/next/C API/2023-10-02-23-08-53.gh-issue-109587.UqqnDY.rst
new file mode 100644
index 0000000..c6fa24f
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2023-10-02-23-08-53.gh-issue-109587.UqqnDY.rst
@@ -0,0 +1,2 @@
+Introduced :c:func:`PyUnstable_PerfTrampoline_CompileCode`, :c:func:`PyUnstable_PerfTrampoline_SetPersistAfterFork` and
+:c:func:`PyUnstable_CopyPerfMapFile`. These functions allow extension modules to initialize trampolines eagerly, after the application is "warmed up". This makes it possible to have perf-trampolines running in an always-enabled fashion.
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index 4ead1b6..1869f48 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -1556,6 +1556,36 @@ _testinternalcapi_test_long_numbits_impl(PyObject *module)
Py_RETURN_NONE;
}
+static PyObject *
+compile_perf_trampoline_entry(PyObject *self, PyObject *args)
+{
+ PyObject *co;
+ if (!PyArg_ParseTuple(args, "O!", &PyCode_Type, &co)) {
+ return NULL;
+ }
+ int ret = PyUnstable_PerfTrampoline_CompileCode((PyCodeObject *)co);
+ if (ret != 0) {
+ PyErr_SetString(PyExc_AssertionError, "Failed to compile trampoline");
+ return NULL;
+ }
+ return PyLong_FromLong(ret);
+}
+
+static PyObject *
+perf_trampoline_set_persist_after_fork(PyObject *self, PyObject *args)
+{
+ int enable;
+ if (!PyArg_ParseTuple(args, "i", &enable)) {
+ return NULL;
+ }
+ int ret = PyUnstable_PerfTrampoline_SetPersistAfterFork(enable);
+ if (ret == 0) {
+ PyErr_SetString(PyExc_AssertionError, "Failed to set persist_after_fork");
+ return NULL;
+ }
+ return PyLong_FromLong(ret);
+}
+
static PyMethodDef module_functions[] = {
{"get_configs", get_configs, METH_NOARGS},
@@ -1613,6 +1643,8 @@ static PyMethodDef module_functions[] = {
{"run_in_subinterp_with_config",
_PyCFunction_CAST(run_in_subinterp_with_config),
METH_VARARGS | METH_KEYWORDS},
+ {"compile_perf_trampoline_entry", compile_perf_trampoline_entry, METH_VARARGS},
+ {"perf_trampoline_set_persist_after_fork", perf_trampoline_set_persist_after_fork, METH_VARARGS},
_TESTINTERNALCAPI_WRITE_UNRAISABLE_EXC_METHODDEF
_TESTINTERNALCAPI_TEST_LONG_NUMBITS_METHODDEF
{NULL, NULL} /* sentinel */
diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c
index 209a23b..4912239 100644
--- a/Python/perf_trampoline.c
+++ b/Python/perf_trampoline.c
@@ -193,7 +193,7 @@ typedef struct trampoline_api_st trampoline_api_t;
#define perf_code_arena _PyRuntime.ceval.perf.code_arena
#define trampoline_api _PyRuntime.ceval.perf.trampoline_api
#define perf_map_file _PyRuntime.ceval.perf.map_file
-
+#define persist_after_fork _PyRuntime.ceval.perf.persist_after_fork
static void
perf_map_write_entry(void *state, const void *code_addr,
@@ -361,6 +361,26 @@ default_eval:
}
#endif // PY_HAVE_PERF_TRAMPOLINE
+int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co)
+{
+#ifdef PY_HAVE_PERF_TRAMPOLINE
+ py_trampoline f = NULL;
+ assert(extra_code_index != -1);
+ int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
+ if (ret != 0 || f == NULL) {
+ py_trampoline new_trampoline = compile_trampoline();
+ if (new_trampoline == NULL) {
+ return 0;
+ }
+ trampoline_api.write_state(trampoline_api.state, new_trampoline,
+ perf_code_arena->code_size, co);
+ return _PyCode_SetExtra((PyObject *)co, extra_code_index,
+ (void *)new_trampoline);
+ }
+#endif // PY_HAVE_PERF_TRAMPOLINE
+ return 0;
+}
+
int
_PyIsPerfTrampolineActive(void)
{
@@ -448,16 +468,34 @@ _PyPerfTrampoline_Fini(void)
return 0;
}
+int
+PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable){
+#ifdef PY_HAVE_PERF_TRAMPOLINE
+ persist_after_fork = enable;
+ return persist_after_fork;
+#endif
+ return 0;
+}
+
PyStatus
_PyPerfTrampoline_AfterFork_Child(void)
{
#ifdef PY_HAVE_PERF_TRAMPOLINE
- // Restart trampoline in file in child.
- int was_active = _PyIsPerfTrampolineActive();
- _PyPerfTrampoline_Fini();
PyUnstable_PerfMapState_Fini();
- if (was_active) {
- _PyPerfTrampoline_Init(1);
+ if (persist_after_fork) {
+ char filename[256];
+ pid_t parent_pid = getppid();
+ snprintf(filename, sizeof(filename), "/tmp/perf-%d.map", parent_pid);
+ if (PyUnstable_CopyPerfMapFile(filename) != 0) {
+ return PyStatus_Error("Failed to copy perf map file.");
+ }
+ } else {
+ // Restart trampoline in file in child.
+ int was_active = _PyIsPerfTrampolineActive();
+ _PyPerfTrampoline_Fini();
+ if (was_active) {
+ _PyPerfTrampoline_Init(1);
+ }
}
#endif
return PyStatus_Ok();
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 3debe7f..4008a28 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -2361,7 +2361,7 @@ PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(
#ifndef MS_WINDOWS
if (perf_map_state.perf_map == NULL) {
int ret = PyUnstable_PerfMapState_Init();
- if(ret != 0){
+ if (ret != 0){
return ret;
}
}
@@ -2388,6 +2388,45 @@ PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void) {
#endif
}
+PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename) {
+#ifndef MS_WINDOWS
+ FILE* from = fopen(parent_filename, "r");
+ if (!from) {
+ return -1;
+ }
+ if (perf_map_state.perf_map == NULL) {
+ int ret = PyUnstable_PerfMapState_Init();
+ if (ret != 0) {
+ return ret;
+ }
+ }
+ char buf[4096];
+ PyThread_acquire_lock(perf_map_state.map_lock, 1);
+ int fflush_result = 0, result = 0;
+ while (1) {
+ size_t bytes_read = fread(buf, 1, sizeof(buf), from);
+ size_t bytes_written = fwrite(buf, 1, bytes_read, perf_map_state.perf_map);
+ fflush_result = fflush(perf_map_state.perf_map);
+ if (fflush_result != 0 || bytes_read == 0 || bytes_written < bytes_read) {
+ result = -1;
+ goto close_and_release;
+ }
+ if (bytes_read < sizeof(buf) && feof(from)) {
+ goto close_and_release;
+ }
+ }
+close_and_release:
+ fclose(from);
+ PyThread_release_lock(perf_map_state.map_lock);
+ return result;
+#endif
+ return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
static PyMethodDef sys_methods[] = {
/* Might as well keep this in alphabetic order */