summaryrefslogtreecommitdiffstats
path: root/Python/perf_trampoline.c
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2024-05-05 01:07:29 (GMT)
committerGitHub <noreply@github.com>2024-05-05 01:07:29 (GMT)
commit1b22d801b86ed314c4804b19a1fc4b13484e3cea (patch)
tree9cb5cea113afe83bb1ce106c565396a7bb344691 /Python/perf_trampoline.c
parent999f0c512281995fb61a0d9eda075fd846e8c505 (diff)
downloadcpython-1b22d801b86ed314c4804b19a1fc4b13484e3cea.zip
cpython-1b22d801b86ed314c4804b19a1fc4b13484e3cea.tar.gz
cpython-1b22d801b86ed314c4804b19a1fc4b13484e3cea.tar.bz2
gh-118518: Allow perf to work without frame pointers (#112254)
Diffstat (limited to 'Python/perf_trampoline.c')
-rw-r--r--Python/perf_trampoline.c52
1 files changed, 47 insertions, 5 deletions
diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c
index 750ba18..f144f7d 100644
--- a/Python/perf_trampoline.c
+++ b/Python/perf_trampoline.c
@@ -143,6 +143,8 @@ any DWARF information available for them).
#include <sys/mman.h> // mmap()
#include <sys/types.h>
#include <unistd.h> // sysconf()
+#include <sys/time.h> // gettimeofday()
+
#if defined(__arm__) || defined(__arm64__) || defined(__aarch64__)
#define PY_HAVE_INVALIDATE_ICACHE
@@ -187,12 +189,19 @@ struct code_arena_st {
typedef struct code_arena_st code_arena_t;
typedef struct trampoline_api_st trampoline_api_t;
+enum perf_trampoline_type {
+ PERF_TRAMPOLINE_UNSET = 0,
+ PERF_TRAMPOLINE_TYPE_MAP = 1,
+ PERF_TRAMPOLINE_TYPE_JITDUMP = 2,
+};
+
#define perf_status _PyRuntime.ceval.perf.status
#define extra_code_index _PyRuntime.ceval.perf.extra_code_index
#define perf_code_arena _PyRuntime.ceval.perf.code_arena
#define trampoline_api _PyRuntime.ceval.perf.trampoline_api
#define perf_map_file _PyRuntime.ceval.perf.map_file
#define persist_after_fork _PyRuntime.ceval.perf.persist_after_fork
+#define perf_trampoline_type _PyRuntime.ceval.perf.perf_trampoline_type
static void
perf_map_write_entry(void *state, const void *code_addr,
@@ -220,6 +229,8 @@ static void*
perf_map_init_state(void)
{
PyUnstable_PerfMapState_Init();
+ trampoline_api.code_padding = 0;
+ perf_trampoline_type = PERF_TRAMPOLINE_TYPE_MAP;
return NULL;
}
@@ -236,6 +247,30 @@ _PyPerf_Callbacks _Py_perfmap_callbacks = {
&perf_map_free_state,
};
+
+static size_t round_up(int64_t value, int64_t multiple) {
+ if (multiple == 0) {
+ // Avoid division by zero
+ return value;
+ }
+
+ int64_t remainder = value % multiple;
+ if (remainder == 0) {
+ // Value is already a multiple of 'multiple'
+ return value;
+ }
+
+ // Calculate the difference to the next multiple
+ int64_t difference = multiple - remainder;
+
+ // Add the difference to the value
+ int64_t rounded_up_value = value + difference;
+
+ return rounded_up_value;
+}
+
+// TRAMPOLINE MANAGEMENT API
+
static int
new_code_arena(void)
{
@@ -256,6 +291,7 @@ new_code_arena(void)
void *start = &_Py_trampoline_func_start;
void *end = &_Py_trampoline_func_end;
size_t code_size = end - start;
+ size_t chunk_size = round_up(code_size + trampoline_api.code_padding, 16);
// TODO: Check the effect of alignment of the code chunks. Initial investigation
// showed that this has no effect on performance in x86-64 or aarch64 and the current
// version has the advantage that the unwinder in GDB can unwind across JIT-ed code.
@@ -264,9 +300,9 @@ new_code_arena(void)
// measurable performance improvement by rounding trampolines up to 32-bit
// or 64-bit alignment.
- size_t n_copies = mem_size / code_size;
+ size_t n_copies = mem_size / chunk_size;
for (size_t i = 0; i < n_copies; i++) {
- memcpy(memory + i * code_size, start, code_size * sizeof(char));
+ memcpy(memory + i * chunk_size, start, code_size * sizeof(char));
}
// Some systems may prevent us from creating executable code on the fly.
int res = mprotect(memory, mem_size, PROT_READ | PROT_EXEC);
@@ -320,16 +356,18 @@ static inline py_trampoline
code_arena_new_code(code_arena_t *code_arena)
{
py_trampoline trampoline = (py_trampoline)code_arena->current_addr;
- code_arena->size_left -= code_arena->code_size;
- code_arena->current_addr += code_arena->code_size;
+ size_t total_code_size = round_up(code_arena->code_size + trampoline_api.code_padding, 16);
+ code_arena->size_left -= total_code_size;
+ code_arena->current_addr += total_code_size;
return trampoline;
}
static inline py_trampoline
compile_trampoline(void)
{
+ size_t total_code_size = round_up(perf_code_arena->code_size + trampoline_api.code_padding, 16);
if ((perf_code_arena == NULL) ||
- (perf_code_arena->size_left <= perf_code_arena->code_size)) {
+ (perf_code_arena->size_left <= total_code_size)) {
if (new_code_arena() < 0) {
return NULL;
}
@@ -480,6 +518,7 @@ _PyPerfTrampoline_Fini(void)
}
if (perf_status == PERF_STATUS_OK) {
trampoline_api.free_state(trampoline_api.state);
+ perf_trampoline_type = PERF_TRAMPOLINE_UNSET;
}
extra_code_index = -1;
perf_status = PERF_STATUS_NO_INIT;
@@ -508,6 +547,9 @@ _PyPerfTrampoline_AfterFork_Child(void)
{
#ifdef PY_HAVE_PERF_TRAMPOLINE
if (persist_after_fork) {
+ if (perf_trampoline_type != PERF_TRAMPOLINE_TYPE_MAP) {
+ return PyStatus_Error("Failed to copy perf map file as perf trampoline type is not type map.");
+ }
_PyPerfTrampoline_Fini();
char filename[256];
pid_t parent_pid = getppid();