diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2024-05-05 01:07:29 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-05 01:07:29 (GMT) |
commit | 1b22d801b86ed314c4804b19a1fc4b13484e3cea (patch) | |
tree | 9cb5cea113afe83bb1ce106c565396a7bb344691 /Python/perf_trampoline.c | |
parent | 999f0c512281995fb61a0d9eda075fd846e8c505 (diff) | |
download | cpython-1b22d801b86ed314c4804b19a1fc4b13484e3cea.zip cpython-1b22d801b86ed314c4804b19a1fc4b13484e3cea.tar.gz cpython-1b22d801b86ed314c4804b19a1fc4b13484e3cea.tar.bz2 |
gh-118518: Allow perf to work without frame pointers (#112254)
Diffstat (limited to 'Python/perf_trampoline.c')
-rw-r--r-- | Python/perf_trampoline.c | 52 |
1 files changed, 47 insertions, 5 deletions
diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index 750ba18..f144f7d 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -143,6 +143,8 @@ any DWARF information available for them). #include <sys/mman.h> // mmap() #include <sys/types.h> #include <unistd.h> // sysconf() +#include <sys/time.h> // gettimeofday() + #if defined(__arm__) || defined(__arm64__) || defined(__aarch64__) #define PY_HAVE_INVALIDATE_ICACHE @@ -187,12 +189,19 @@ struct code_arena_st { typedef struct code_arena_st code_arena_t; typedef struct trampoline_api_st trampoline_api_t; +enum perf_trampoline_type { + PERF_TRAMPOLINE_UNSET = 0, + PERF_TRAMPOLINE_TYPE_MAP = 1, + PERF_TRAMPOLINE_TYPE_JITDUMP = 2, +}; + #define perf_status _PyRuntime.ceval.perf.status #define extra_code_index _PyRuntime.ceval.perf.extra_code_index #define perf_code_arena _PyRuntime.ceval.perf.code_arena #define trampoline_api _PyRuntime.ceval.perf.trampoline_api #define perf_map_file _PyRuntime.ceval.perf.map_file #define persist_after_fork _PyRuntime.ceval.perf.persist_after_fork +#define perf_trampoline_type _PyRuntime.ceval.perf.perf_trampoline_type static void perf_map_write_entry(void *state, const void *code_addr, @@ -220,6 +229,8 @@ static void* perf_map_init_state(void) { PyUnstable_PerfMapState_Init(); + trampoline_api.code_padding = 0; + perf_trampoline_type = PERF_TRAMPOLINE_TYPE_MAP; return NULL; } @@ -236,6 +247,30 @@ _PyPerf_Callbacks _Py_perfmap_callbacks = { &perf_map_free_state, }; + +static size_t round_up(int64_t value, int64_t multiple) { + if (multiple == 0) { + // Avoid division by zero + return value; + } + + int64_t remainder = value % multiple; + if (remainder == 0) { + // Value is already a multiple of 'multiple' + return value; + } + + // Calculate the difference to the next multiple + int64_t difference = multiple - remainder; + + // Add the difference to the value + int64_t rounded_up_value = value + difference; + + return rounded_up_value; +} + +// TRAMPOLINE MANAGEMENT API + static int new_code_arena(void) { @@ -256,6 +291,7 @@ new_code_arena(void) void *start = &_Py_trampoline_func_start; void *end = &_Py_trampoline_func_end; size_t code_size = end - start; + size_t chunk_size = round_up(code_size + trampoline_api.code_padding, 16); // TODO: Check the effect of alignment of the code chunks. Initial investigation // showed that this has no effect on performance in x86-64 or aarch64 and the current // version has the advantage that the unwinder in GDB can unwind across JIT-ed code. @@ -264,9 +300,9 @@ new_code_arena(void) // measurable performance improvement by rounding trampolines up to 32-bit // or 64-bit alignment. - size_t n_copies = mem_size / code_size; + size_t n_copies = mem_size / chunk_size; for (size_t i = 0; i < n_copies; i++) { - memcpy(memory + i * code_size, start, code_size * sizeof(char)); + memcpy(memory + i * chunk_size, start, code_size * sizeof(char)); } // Some systems may prevent us from creating executable code on the fly. int res = mprotect(memory, mem_size, PROT_READ | PROT_EXEC); @@ -320,16 +356,18 @@ static inline py_trampoline code_arena_new_code(code_arena_t *code_arena) { py_trampoline trampoline = (py_trampoline)code_arena->current_addr; - code_arena->size_left -= code_arena->code_size; - code_arena->current_addr += code_arena->code_size; + size_t total_code_size = round_up(code_arena->code_size + trampoline_api.code_padding, 16); + code_arena->size_left -= total_code_size; + code_arena->current_addr += total_code_size; return trampoline; } static inline py_trampoline compile_trampoline(void) { + size_t total_code_size = round_up(perf_code_arena->code_size + trampoline_api.code_padding, 16); if ((perf_code_arena == NULL) || - (perf_code_arena->size_left <= perf_code_arena->code_size)) { + (perf_code_arena->size_left <= total_code_size)) { if (new_code_arena() < 0) { return NULL; } @@ -480,6 +518,7 @@ _PyPerfTrampoline_Fini(void) } if (perf_status == PERF_STATUS_OK) { trampoline_api.free_state(trampoline_api.state); + perf_trampoline_type = PERF_TRAMPOLINE_UNSET; } extra_code_index = -1; perf_status = PERF_STATUS_NO_INIT; @@ -508,6 +547,9 @@ _PyPerfTrampoline_AfterFork_Child(void) { #ifdef PY_HAVE_PERF_TRAMPOLINE if (persist_after_fork) { + if (perf_trampoline_type != PERF_TRAMPOLINE_TYPE_MAP) { + return PyStatus_Error("Failed to copy perf map file as perf trampoline type is not type map."); + } _PyPerfTrampoline_Fini(); char filename[256]; pid_t parent_pid = getppid(); |