diff options
author | Michael Droettboom <mdboom@gmail.com> | 2023-10-04 21:52:28 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-04 21:52:28 (GMT) |
commit | e561e9805854980a61967d07869b4ec4205b32c8 (patch) | |
tree | 4cf341fc0bf0091ad1b3e7eb034699b23a4ae851 /Python | |
parent | f7860295b16a402621e209871c8eaeeea16f464e (diff) | |
download | cpython-e561e9805854980a61967d07869b4ec4205b32c8.zip cpython-e561e9805854980a61967d07869b4ec4205b32c8.tar.gz cpython-e561e9805854980a61967d07869b4ec4205b32c8.tar.bz2 |
GH-109329: Add tier 2 stats (GH-109913)
Diffstat (limited to 'Python')
-rw-r--r-- | Python/bytecodes.c | 3 | ||||
-rw-r--r-- | Python/executor.c | 25 | ||||
-rw-r--r-- | Python/executor_cases.c.h | 1 | ||||
-rw-r--r-- | Python/generated_cases.c.h | 2 | ||||
-rw-r--r-- | Python/optimizer.c | 13 | ||||
-rw-r--r-- | Python/specialize.c | 54 |
6 files changed, 88 insertions, 10 deletions
diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6482252..9b733ce 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2278,7 +2278,7 @@ dummy_func( // Double-check that the opcode isn't instrumented or something: here->op.code == JUMP_BACKWARD) { - OBJECT_STAT_INC(optimization_attempts); + OPT_STAT_INC(attempts); int optimized = _PyOptimizer_BackEdge(frame, here, next_instr, stack_pointer); ERROR_IF(optimized < 0, error); if (optimized) { @@ -3965,6 +3965,7 @@ dummy_func( frame->prev_instr--; // Back up to just before destination _PyFrame_SetStackPointer(frame, stack_pointer); Py_DECREF(self); + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); return frame; } diff --git a/Python/executor.c b/Python/executor.c index ac91042..6da3af0 100644 --- a/Python/executor.c +++ b/Python/executor.c @@ -2,6 +2,7 @@ #include "opcode.h" +#include "pycore_bitutils.h" #include "pycore_call.h" #include "pycore_ceval.h" #include "pycore_dict.h" @@ -30,6 +31,16 @@ goto deoptimize; \ } +#ifdef Py_STATS +// Disable these macros that apply to Tier 1 stats when we are in Tier 2 +#undef STAT_INC +#define STAT_INC(opname, name) ((void)0) +#undef STAT_DEC +#define STAT_DEC(opname, name) ((void)0) +#undef CALL_STAT_INC +#define CALL_STAT_INC(name) ((void)0) +#endif + #undef ENABLE_SPECIALIZATION #define ENABLE_SPECIALIZATION 0 @@ -62,12 +73,15 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject CHECK_EVAL_BREAKER(); - OBJECT_STAT_INC(optimization_traces_executed); + OPT_STAT_INC(traces_executed); _Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; int pc = 0; int opcode; int oparg; uint64_t operand; +#ifdef Py_STATS + uint64_t trace_uop_execution_counter = 0; +#endif for (;;) { opcode = self->trace[pc].opcode; @@ -81,7 +95,12 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject operand, (int)(stack_pointer - _PyFrame_Stackbase(frame))); pc++; - OBJECT_STAT_INC(optimization_uops_executed); + OPT_STAT_INC(uops_executed); + UOP_EXE_INC(opcode); +#ifdef Py_STATS + trace_uop_execution_counter++; +#endif + switch (opcode) { #include "executor_cases.c.h" @@ -114,6 +133,7 @@ error: // On ERROR_IF we return NULL as the frame. // The caller recovers the frame from tstate->current_frame. DPRINTF(2, "Error: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand); + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); _PyFrame_SetStackPointer(frame, stack_pointer); Py_DECREF(self); return NULL; @@ -122,6 +142,7 @@ deoptimize: // On DEOPT_IF we just repeat the last instruction. // This presumes nothing was popped from the stack (nor pushed). DPRINTF(2, "DEOPT: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand); + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); frame->prev_instr--; // Back up to just before destination _PyFrame_SetStackPointer(frame, stack_pointer); Py_DECREF(self); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index c6eeee4..e2f4f98 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3289,6 +3289,7 @@ frame->prev_instr--; // Back up to just before destination _PyFrame_SetStackPointer(frame, stack_pointer); Py_DECREF(self); + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); return frame; break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 221d477..eac1368 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2988,7 +2988,7 @@ // Double-check that the opcode isn't instrumented or something: here->op.code == JUMP_BACKWARD) { - OBJECT_STAT_INC(optimization_attempts); + OPT_STAT_INC(attempts); int optimized = _PyOptimizer_BackEdge(frame, here, next_instr, stack_pointer); if (optimized < 0) goto error; if (optimized) { diff --git a/Python/optimizer.c b/Python/optimizer.c index fbdbf72..f8796eb 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -461,6 +461,7 @@ translate_bytecode_to_trace( if (trace_length + (n) > max_length) { \ DPRINTF(2, "No room for %s (need %d, got %d)\n", \ (opname), (n), max_length - trace_length); \ + OPT_STAT_INC(trace_too_long); \ goto done; \ } \ reserved = (n); // Keep ADD_TO_TRACE / ADD_TO_STUB honest @@ -472,6 +473,7 @@ translate_bytecode_to_trace( #define TRACE_STACK_PUSH() \ if (trace_stack_depth >= TRACE_STACK_SIZE) { \ DPRINTF(2, "Trace stack overflow\n"); \ + OPT_STAT_INC(trace_stack_overflow); \ ADD_TO_TRACE(_SET_IP, 0, 0); \ goto done; \ } \ @@ -572,6 +574,7 @@ pop_jump_if_bool: ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0); } else { + OPT_STAT_INC(inner_loop); DPRINTF(2, "JUMP_BACKWARD not to top ends trace\n"); } goto done; @@ -638,7 +641,9 @@ pop_jump_if_bool: // LOAD_CONST + _POP_FRAME. if (trace_stack_depth == 0) { DPRINTF(2, "Trace stack underflow\n"); - goto done;} + OPT_STAT_INC(trace_stack_underflow); + goto done; + } } uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM for (int i = 0; i < nuops; i++) { @@ -713,6 +718,7 @@ pop_jump_if_bool: PyUnicode_AsUTF8(new_code->co_qualname), PyUnicode_AsUTF8(new_code->co_filename), new_code->co_firstlineno); + OPT_STAT_INC(recursive_call); ADD_TO_TRACE(_SET_IP, 0, 0); goto done; } @@ -744,6 +750,7 @@ pop_jump_if_bool: break; } DPRINTF(2, "Unsupported opcode %s\n", uop_name(opcode)); + OPT_UNSUPPORTED_OPCODE(opcode); goto done; // Break out of loop } // End default @@ -891,7 +898,8 @@ uop_optimize( // Error or nothing translated return trace_length; } - OBJECT_STAT_INC(optimization_traces_created); + OPT_HIST(trace_length, trace_length_hist); + OPT_STAT_INC(traces_created); char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE"); if (uop_optimize != NULL && *uop_optimize > '0') { trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries); @@ -901,6 +909,7 @@ uop_optimize( if (executor == NULL) { return -1; } + OPT_HIST(trace_length, optimized_trace_length_hist); executor->base.execute = _PyUopExecute; memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); *exec_ptr = (_PyExecutorObject *)executor; diff --git a/Python/specialize.c b/Python/specialize.c index d9b748c..ff732eb 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -199,10 +199,6 @@ print_object_stats(FILE *out, ObjectStats *stats) fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions); fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits); fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses); - fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->optimization_attempts); - fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->optimization_traces_created); - fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->optimization_traces_executed); - fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->optimization_uops_executed); } static void @@ -216,12 +212,62 @@ print_gc_stats(FILE *out, GCStats *stats) } static void +print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE]) +{ + for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) { + fprintf(out, "%s[%" PRIu64"]: %" PRIu64 "\n", name, (uint64_t)1 << i, hist[i]); + } +} + +static void +print_optimization_stats(FILE *out, OptimizationStats *stats) +{ + fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->attempts); + fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->traces_created); + fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->traces_executed); + fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->uops_executed); + fprintf(out, "Optimization trace stack overflow: %" PRIu64 "\n", stats->trace_stack_overflow); + fprintf(out, "Optimization trace stack underflow: %" PRIu64 "\n", stats->trace_stack_underflow); + fprintf(out, "Optimization trace too long: %" PRIu64 "\n", stats->trace_too_long); + fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop); + fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call); + + print_histogram(out, "Trace length", stats->trace_length_hist); + print_histogram(out, "Trace run length", stats->trace_run_length_hist); + print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist); + + const char* const* names; + for (int i = 0; i < 512; i++) { + if (i < 256) { + names = _PyOpcode_OpName; + } else { + names = _PyOpcode_uop_name; + } + if (stats->opcode[i].execution_count) { + fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", names[i], stats->opcode[i].execution_count); + } + } + + for (int i = 0; i < 256; i++) { + if (stats->unsupported_opcode[i]) { + fprintf( + out, + "unsupported_opcode[%s].count : %" PRIu64 "\n", + _PyOpcode_OpName[i], + stats->unsupported_opcode[i] + ); + } + } +} + +static void print_stats(FILE *out, PyStats *stats) { print_spec_stats(out, stats->opcode_stats); print_call_stats(out, &stats->call_stats); print_object_stats(out, &stats->object_stats); print_gc_stats(out, stats->gc_stats); + print_optimization_stats(out, &stats->optimization_stats); } void |