From e561e9805854980a61967d07869b4ec4205b32c8 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Wed, 4 Oct 2023 17:52:28 -0400 Subject: GH-109329: Add tier 2 stats (GH-109913) --- Include/cpython/pystats.h | 28 ++- Include/internal/pycore_code.h | 15 ++ Python/bytecodes.c | 3 +- Python/executor.c | 25 ++- Python/executor_cases.c.h | 1 + Python/generated_cases.c.h | 2 +- Python/optimizer.c | 13 +- Python/specialize.c | 54 ++++- Tools/scripts/summarize_stats.py | 470 +++++++++++++++++++++++++++++---------- 9 files changed, 483 insertions(+), 128 deletions(-) diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index 150e16f..056406e 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -86,10 +86,6 @@ typedef struct _object_stats { uint64_t type_cache_dunder_hits; uint64_t type_cache_dunder_misses; uint64_t type_cache_collisions; - uint64_t optimization_attempts; - uint64_t optimization_traces_created; - uint64_t optimization_traces_executed; - uint64_t optimization_uops_executed; /* Temporary value used during GC */ uint64_t object_visits; } ObjectStats; @@ -100,10 +96,34 @@ typedef struct _gc_stats { uint64_t objects_collected; } GCStats; +typedef struct _uop_stats { + uint64_t execution_count; +} UOpStats; + +#define _Py_UOP_HIST_SIZE 32 + +typedef struct _optimization_stats { + uint64_t attempts; + uint64_t traces_created; + uint64_t traces_executed; + uint64_t uops_executed; + uint64_t trace_stack_overflow; + uint64_t trace_stack_underflow; + uint64_t trace_too_long; + uint64_t inner_loop; + uint64_t recursive_call; + UOpStats opcode[512]; + uint64_t unsupported_opcode[256]; + uint64_t trace_length_hist[_Py_UOP_HIST_SIZE]; + uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE]; + uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE]; +} OptimizationStats; + typedef struct _stats { OpcodeStats opcode_stats[256]; CallStats call_stats; ObjectStats object_stats; + OptimizationStats optimization_stats; GCStats *gc_stats; } PyStats; diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index a77fa11..d31d836 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -282,6 +282,17 @@ extern int _PyStaticCode_Init(PyCodeObject *co); #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \ do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0) #define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0) +#define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0) +#define UOP_EXE_INC(opname) do { if (_Py_stats) _Py_stats->optimization_stats.opcode[opname].execution_count++; } while (0) +#define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0) +#define OPT_HIST(length, name) \ + do { \ + if (_Py_stats) { \ + int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \ + bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \ + _Py_stats->optimization_stats.name[bucket]++; \ + } \ + } while (0) // Export for '_opcode' shared extension PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); @@ -296,6 +307,10 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); #define EVAL_CALL_STAT_INC(name) ((void)0) #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0) #define GC_STAT_ADD(gen, name, n) ((void)0) +#define OPT_STAT_INC(name) ((void)0) +#define UOP_EXE_INC(opname) ((void)0) +#define OPT_UNSUPPORTED_OPCODE(opname) ((void)0) +#define OPT_HIST(length, name) ((void)0) #endif // !Py_STATS // Utility functions for reading/writing 32/64-bit values in the inline caches. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6482252..9b733ce 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2278,7 +2278,7 @@ dummy_func( // Double-check that the opcode isn't instrumented or something: here->op.code == JUMP_BACKWARD) { - OBJECT_STAT_INC(optimization_attempts); + OPT_STAT_INC(attempts); int optimized = _PyOptimizer_BackEdge(frame, here, next_instr, stack_pointer); ERROR_IF(optimized < 0, error); if (optimized) { @@ -3965,6 +3965,7 @@ dummy_func( frame->prev_instr--; // Back up to just before destination _PyFrame_SetStackPointer(frame, stack_pointer); Py_DECREF(self); + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); return frame; } diff --git a/Python/executor.c b/Python/executor.c index ac91042..6da3af0 100644 --- a/Python/executor.c +++ b/Python/executor.c @@ -2,6 +2,7 @@ #include "opcode.h" +#include "pycore_bitutils.h" #include "pycore_call.h" #include "pycore_ceval.h" #include "pycore_dict.h" @@ -30,6 +31,16 @@ goto deoptimize; \ } +#ifdef Py_STATS +// Disable these macros that apply to Tier 1 stats when we are in Tier 2 +#undef STAT_INC +#define STAT_INC(opname, name) ((void)0) +#undef STAT_DEC +#define STAT_DEC(opname, name) ((void)0) +#undef CALL_STAT_INC +#define CALL_STAT_INC(name) ((void)0) +#endif + #undef ENABLE_SPECIALIZATION #define ENABLE_SPECIALIZATION 0 @@ -62,12 +73,15 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject CHECK_EVAL_BREAKER(); - OBJECT_STAT_INC(optimization_traces_executed); + OPT_STAT_INC(traces_executed); _Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; int pc = 0; int opcode; int oparg; uint64_t operand; +#ifdef Py_STATS + uint64_t trace_uop_execution_counter = 0; +#endif for (;;) { opcode = self->trace[pc].opcode; @@ -81,7 +95,12 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject operand, (int)(stack_pointer - _PyFrame_Stackbase(frame))); pc++; - OBJECT_STAT_INC(optimization_uops_executed); + OPT_STAT_INC(uops_executed); + UOP_EXE_INC(opcode); +#ifdef Py_STATS + trace_uop_execution_counter++; +#endif + switch (opcode) { #include "executor_cases.c.h" @@ -114,6 +133,7 @@ error: // On ERROR_IF we return NULL as the frame. // The caller recovers the frame from tstate->current_frame. DPRINTF(2, "Error: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand); + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); _PyFrame_SetStackPointer(frame, stack_pointer); Py_DECREF(self); return NULL; @@ -122,6 +142,7 @@ deoptimize: // On DEOPT_IF we just repeat the last instruction. // This presumes nothing was popped from the stack (nor pushed). DPRINTF(2, "DEOPT: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand); + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); frame->prev_instr--; // Back up to just before destination _PyFrame_SetStackPointer(frame, stack_pointer); Py_DECREF(self); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index c6eeee4..e2f4f98 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3289,6 +3289,7 @@ frame->prev_instr--; // Back up to just before destination _PyFrame_SetStackPointer(frame, stack_pointer); Py_DECREF(self); + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); return frame; break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 221d477..eac1368 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2988,7 +2988,7 @@ // Double-check that the opcode isn't instrumented or something: here->op.code == JUMP_BACKWARD) { - OBJECT_STAT_INC(optimization_attempts); + OPT_STAT_INC(attempts); int optimized = _PyOptimizer_BackEdge(frame, here, next_instr, stack_pointer); if (optimized < 0) goto error; if (optimized) { diff --git a/Python/optimizer.c b/Python/optimizer.c index fbdbf72..f8796eb 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -461,6 +461,7 @@ translate_bytecode_to_trace( if (trace_length + (n) > max_length) { \ DPRINTF(2, "No room for %s (need %d, got %d)\n", \ (opname), (n), max_length - trace_length); \ + OPT_STAT_INC(trace_too_long); \ goto done; \ } \ reserved = (n); // Keep ADD_TO_TRACE / ADD_TO_STUB honest @@ -472,6 +473,7 @@ translate_bytecode_to_trace( #define TRACE_STACK_PUSH() \ if (trace_stack_depth >= TRACE_STACK_SIZE) { \ DPRINTF(2, "Trace stack overflow\n"); \ + OPT_STAT_INC(trace_stack_overflow); \ ADD_TO_TRACE(_SET_IP, 0, 0); \ goto done; \ } \ @@ -572,6 +574,7 @@ pop_jump_if_bool: ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0); } else { + OPT_STAT_INC(inner_loop); DPRINTF(2, "JUMP_BACKWARD not to top ends trace\n"); } goto done; @@ -638,7 +641,9 @@ pop_jump_if_bool: // LOAD_CONST + _POP_FRAME. if (trace_stack_depth == 0) { DPRINTF(2, "Trace stack underflow\n"); - goto done;} + OPT_STAT_INC(trace_stack_underflow); + goto done; + } } uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM for (int i = 0; i < nuops; i++) { @@ -713,6 +718,7 @@ pop_jump_if_bool: PyUnicode_AsUTF8(new_code->co_qualname), PyUnicode_AsUTF8(new_code->co_filename), new_code->co_firstlineno); + OPT_STAT_INC(recursive_call); ADD_TO_TRACE(_SET_IP, 0, 0); goto done; } @@ -744,6 +750,7 @@ pop_jump_if_bool: break; } DPRINTF(2, "Unsupported opcode %s\n", uop_name(opcode)); + OPT_UNSUPPORTED_OPCODE(opcode); goto done; // Break out of loop } // End default @@ -891,7 +898,8 @@ uop_optimize( // Error or nothing translated return trace_length; } - OBJECT_STAT_INC(optimization_traces_created); + OPT_HIST(trace_length, trace_length_hist); + OPT_STAT_INC(traces_created); char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE"); if (uop_optimize != NULL && *uop_optimize > '0') { trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries); @@ -901,6 +909,7 @@ uop_optimize( if (executor == NULL) { return -1; } + OPT_HIST(trace_length, optimized_trace_length_hist); executor->base.execute = _PyUopExecute; memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); *exec_ptr = (_PyExecutorObject *)executor; diff --git a/Python/specialize.c b/Python/specialize.c index d9b748c..ff732eb 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -199,10 +199,6 @@ print_object_stats(FILE *out, ObjectStats *stats) fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions); fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits); fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses); - fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->optimization_attempts); - fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->optimization_traces_created); - fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->optimization_traces_executed); - fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->optimization_uops_executed); } static void @@ -216,12 +212,62 @@ print_gc_stats(FILE *out, GCStats *stats) } static void +print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE]) +{ + for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) { + fprintf(out, "%s[%" PRIu64"]: %" PRIu64 "\n", name, (uint64_t)1 << i, hist[i]); + } +} + +static void +print_optimization_stats(FILE *out, OptimizationStats *stats) +{ + fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->attempts); + fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->traces_created); + fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->traces_executed); + fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->uops_executed); + fprintf(out, "Optimization trace stack overflow: %" PRIu64 "\n", stats->trace_stack_overflow); + fprintf(out, "Optimization trace stack underflow: %" PRIu64 "\n", stats->trace_stack_underflow); + fprintf(out, "Optimization trace too long: %" PRIu64 "\n", stats->trace_too_long); + fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop); + fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call); + + print_histogram(out, "Trace length", stats->trace_length_hist); + print_histogram(out, "Trace run length", stats->trace_run_length_hist); + print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist); + + const char* const* names; + for (int i = 0; i < 512; i++) { + if (i < 256) { + names = _PyOpcode_OpName; + } else { + names = _PyOpcode_uop_name; + } + if (stats->opcode[i].execution_count) { + fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", names[i], stats->opcode[i].execution_count); + } + } + + for (int i = 0; i < 256; i++) { + if (stats->unsupported_opcode[i]) { + fprintf( + out, + "unsupported_opcode[%s].count : %" PRIu64 "\n", + _PyOpcode_OpName[i], + stats->unsupported_opcode[i] + ); + } + } +} + +static void print_stats(FILE *out, PyStats *stats) { print_spec_stats(out, stats->opcode_stats); print_call_stats(out, &stats->call_stats); print_object_stats(out, &stats->object_stats); print_gc_stats(out, stats->gc_stats); + print_optimization_stats(out, &stats->optimization_stats); } void diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 3b2bdd8..b9cc2f9 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -13,6 +13,7 @@ import os.path from datetime import date import itertools import sys +import re if os.name == "nt": DEFAULT_DIR = "c:\\temp\\py_stats\\" @@ -21,6 +22,7 @@ else: TOTAL = "specialization.hit", "specialization.miss", "execution_count" + def format_ratio(num, den): """ Format a ratio as a percentage. When the denominator is 0, returns the empty @@ -31,6 +33,7 @@ def format_ratio(num, den): else: return f"{num/den:.01%}" + def percentage_to_float(s): """ Converts a percentage string to a float. The empty string is returned as 0.0 @@ -41,6 +44,7 @@ def percentage_to_float(s): assert s[-1] == "%" return float(s[:-1]) + def join_rows(a_rows, b_rows): """ Joins two tables together, side-by-side, where the first column in each is a @@ -79,40 +83,53 @@ def join_rows(a_rows, b_rows): keys = list(a_data.keys()) + [k for k in b_data.keys() if k not in a_data] return [(k, *a_data.get(k, default), *b_data.get(k, default)) for k in keys] + def calculate_specialization_stats(family_stats, total): rows = [] for key in sorted(family_stats): if key.startswith("specialization.failure_kinds"): continue if key in ("specialization.hit", "specialization.miss"): - label = key[len("specialization."):] + label = key[len("specialization.") :] elif key == "execution_count": continue - elif key in ("specialization.success", "specialization.failure", "specializable"): + elif key in ( + "specialization.success", + "specialization.failure", + "specializable", + ): continue elif key.startswith("pair"): continue else: label = key - rows.append((f"{label:>12}", f"{family_stats[key]:>12}", format_ratio(family_stats[key], total))) + rows.append( + ( + f"{label:>12}", + f"{family_stats[key]:>12}", + format_ratio(family_stats[key], total), + ) + ) return rows + def calculate_specialization_success_failure(family_stats): total_attempts = 0 - for key in ("specialization.success", "specialization.failure"): + for key in ("specialization.success", "specialization.failure"): total_attempts += family_stats.get(key, 0) rows = [] if total_attempts: - for key in ("specialization.success", "specialization.failure"): - label = key[len("specialization."):] + for key in ("specialization.success", "specialization.failure"): + label = key[len("specialization.") :] label = label[0].upper() + label[1:] val = family_stats.get(key, 0) rows.append((label, val, format_ratio(val, total_attempts))) return rows + def calculate_specialization_failure_kinds(name, family_stats, defines): total_failures = family_stats.get("specialization.failure", 0) - failure_kinds = [ 0 ] * 40 + failure_kinds = [0] * 40 for key in family_stats: if not key.startswith("specialization.failure_kind"): continue @@ -125,9 +142,16 @@ def calculate_specialization_failure_kinds(name, family_stats, defines): for value, index in failures: if not value: continue - rows.append((kind_to_text(index, defines, name), value, format_ratio(value, total_failures))) + rows.append( + ( + kind_to_text(index, defines, name), + value, + format_ratio(value, total_failures), + ) + ) return rows + def print_specialization_stats(name, family_stats, defines): if "specializable" not in family_stats: return @@ -144,7 +168,10 @@ def print_specialization_stats(name, family_stats, defines): rows = calculate_specialization_failure_kinds(name, family_stats, defines) emit_table(("Failure kind", "Count:", "Ratio:"), rows) -def print_comparative_specialization_stats(name, base_family_stats, head_family_stats, defines): + +def print_comparative_specialization_stats( + name, base_family_stats, head_family_stats, defines +): if "specializable" not in base_family_stats: return @@ -157,21 +184,34 @@ def print_comparative_specialization_stats(name, base_family_stats, head_family_ head_rows = calculate_specialization_stats(head_family_stats, head_total) emit_table( ("Kind", "Base Count", "Base Ratio", "Head Count", "Head Ratio"), - join_rows(base_rows, head_rows) + join_rows(base_rows, head_rows), ) base_rows = calculate_specialization_success_failure(base_family_stats) head_rows = calculate_specialization_success_failure(head_family_stats) rows = join_rows(base_rows, head_rows) if rows: print_title("Specialization attempts", 4) - emit_table(("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), rows) - base_rows = calculate_specialization_failure_kinds(name, base_family_stats, defines) - head_rows = calculate_specialization_failure_kinds(name, head_family_stats, defines) emit_table( - ("Failure kind", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), - join_rows(base_rows, head_rows) + ("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), rows + ) + base_rows = calculate_specialization_failure_kinds( + name, base_family_stats, defines + ) + head_rows = calculate_specialization_failure_kinds( + name, head_family_stats, defines + ) + emit_table( + ( + "Failure kind", + "Base Count:", + "Base Ratio:", + "Head Count:", + "Head Ratio:", + ), + join_rows(base_rows, head_rows), ) + def gather_stats(input): # Note the output of this function must be JSON-serializable @@ -179,7 +219,9 @@ def gather_stats(input): with open(input, "r") as fd: stats = json.load(fd) - stats["_stats_defines"] = {int(k): v for k, v in stats["_stats_defines"].items()} + stats["_stats_defines"] = { + int(k): v for k, v in stats["_stats_defines"].items() + } stats["_defines"] = {int(k): v for k, v in stats["_defines"].items()} return stats @@ -191,18 +233,20 @@ def gather_stats(input): try: key, value = line.split(":") except ValueError: - print(f"Unparsable line: '{line.strip()}' in {filename}", file=sys.stderr) + print( + f"Unparsable line: '{line.strip()}' in {filename}", + file=sys.stderr, + ) continue key = key.strip() value = int(value) stats[key] += value - stats['__nfiles__'] += 1 + stats["__nfiles__"] += 1 import opcode stats["_specialized_instructions"] = [ - op for op in opcode._specialized_opmap.keys() - if "__" not in op + op for op in opcode._specialized_opmap.keys() if "__" not in op ] stats["_stats_defines"] = get_stats_defines() stats["_defines"] = get_defines() @@ -211,15 +255,17 @@ def gather_stats(input): else: raise ValueError(f"{input:r} is not a file or directory path") -def extract_opcode_stats(stats): + +def extract_opcode_stats(stats, prefix): opcode_stats = collections.defaultdict(dict) for key, value in stats.items(): - if not key.startswith("opcode"): + if not key.startswith(prefix): continue - name, _, rest = key[7:].partition("]") + name, _, rest = key[len(prefix) + 1 :].partition("]") opcode_stats[name][rest.strip(".")] = value return opcode_stats + def parse_kinds(spec_src, prefix="SPEC_FAIL"): defines = collections.defaultdict(list) start = "#define " + prefix + "_" @@ -227,14 +273,16 @@ def parse_kinds(spec_src, prefix="SPEC_FAIL"): line = line.strip() if not line.startswith(start): continue - line = line[len(start):] + line = line[len(start) :] name, val = line.split() defines[int(val.strip())].append(name.strip()) return defines + def pretty(defname): return defname.replace("_", " ").lower() + def kind_to_text(kind, defines, opname): if kind <= 8: return pretty(defines[kind][0]) @@ -248,9 +296,10 @@ def kind_to_text(kind, defines, opname): opname = "SUBSCR" for name in defines[kind]: if name.startswith(opname): - return pretty(name[len(opname)+1:]) + return pretty(name[len(opname) + 1 :]) return "kind " + str(kind) + def categorized_counts(opcode_stats, specialized_instructions): basic = 0 specialized = 0 @@ -258,7 +307,7 @@ def categorized_counts(opcode_stats, specialized_instructions): for name, opcode_stat in opcode_stats.items(): if "execution_count" not in opcode_stat: continue - count = opcode_stat['execution_count'] + count = opcode_stat["execution_count"] if "specializable" in opcode_stat: not_specialized += count elif name in specialized_instructions: @@ -269,12 +318,13 @@ def categorized_counts(opcode_stats, specialized_instructions): basic += count return basic, not_specialized, specialized + def print_title(name, level=2): - print("#"*level, name) + print("#" * level, name) print() -class Section: +class Section: def __init__(self, title, level=2, summary=None): self.title = title self.level = level @@ -295,12 +345,14 @@ class Section: print("") print() + def to_str(x): if isinstance(x, int): return format(x, ",d") else: return str(x) + def emit_table(header, rows): width = len(header) header_line = "|" @@ -320,11 +372,28 @@ def emit_table(header, rows): print("|", " | ".join(to_str(i) for i in row), "|") print() + +def emit_histogram(title, stats, key, total): + rows = [] + for k, v in stats.items(): + if k.startswith(key): + entry = int(re.match(r".+\[([0-9]+)\]", k).groups()[0]) + rows.append((f"<= {entry}", int(v), format_ratio(int(v), total))) + # Don't include larger buckets with 0 entries + for j in range(len(rows) - 1, -1, -1): + if rows[j][1] != 0: + break + rows = rows[: j + 1] + + print(f"**{title}**\n") + emit_table(("Range", "Count:", "Ratio:"), rows) + + def calculate_execution_counts(opcode_stats, total): counts = [] for name, opcode_stat in opcode_stats.items(): if "execution_count" in opcode_stat: - count = opcode_stat['execution_count'] + count = opcode_stat["execution_count"] miss = 0 if "specializable" not in opcode_stat: miss = opcode_stat.get("specialization.miss") @@ -332,53 +401,61 @@ def calculate_execution_counts(opcode_stats, total): counts.sort(reverse=True) cumulative = 0 rows = [] - for (count, name, miss) in counts: + for count, name, miss in counts: cumulative += count if miss: miss = format_ratio(miss, count) else: miss = "" - rows.append((name, count, format_ratio(count, total), - format_ratio(cumulative, total), miss)) + rows.append( + ( + name, + count, + format_ratio(count, total), + format_ratio(cumulative, total), + miss, + ) + ) return rows + def emit_execution_counts(opcode_stats, total): with Section("Execution counts", summary="execution counts for all instructions"): rows = calculate_execution_counts(opcode_stats, total) - emit_table( - ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"), - rows - ) + emit_table(("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"), rows) + + +def _emit_comparative_execution_counts(base_rows, head_rows): + base_data = {x[0]: x[1:] for x in base_rows} + head_data = {x[0]: x[1:] for x in head_rows} + opcodes = base_data.keys() | head_data.keys() + + rows = [] + default = [0, "0.0%", "0.0%", 0] + for opcode in opcodes: + base_entry = base_data.get(opcode, default) + head_entry = head_data.get(opcode, default) + if base_entry[0] == 0: + change = 1 + else: + change = (head_entry[0] - base_entry[0]) / base_entry[0] + rows.append((opcode, base_entry[0], head_entry[0], f"{change:0.1%}")) + + rows.sort(key=lambda x: abs(percentage_to_float(x[-1])), reverse=True) + + emit_table(("Name", "Base Count:", "Head Count:", "Change:"), rows) + def emit_comparative_execution_counts( - base_opcode_stats, base_total, head_opcode_stats, head_total + base_opcode_stats, base_total, head_opcode_stats, head_total, level=2 ): - with Section("Execution counts", summary="execution counts for all instructions"): + with Section( + "Execution counts", summary="execution counts for all instructions", level=level + ): base_rows = calculate_execution_counts(base_opcode_stats, base_total) head_rows = calculate_execution_counts(head_opcode_stats, head_total) - base_data = dict((x[0], x[1:]) for x in base_rows) - head_data = dict((x[0], x[1:]) for x in head_rows) - opcodes = set(base_data.keys()) | set(head_data.keys()) - - rows = [] - default = [0, "0.0%", "0.0%", 0] - for opcode in opcodes: - base_entry = base_data.get(opcode, default) - head_entry = head_data.get(opcode, default) - if base_entry[0] == 0: - change = 1 - else: - change = (head_entry[0] - base_entry[0]) / base_entry[0] - rows.append( - (opcode, base_entry[0], head_entry[0], - f"{100*change:0.1f}%")) - - rows.sort(key=lambda x: -abs(percentage_to_float(x[-1]))) + _emit_comparative_execution_counts(base_rows, head_rows) - emit_table( - ("Name", "Base Count:", "Head Count:", "Change:"), - rows - ) def get_defines(): spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c") @@ -386,12 +463,16 @@ def get_defines(): defines = parse_kinds(spec_src) return defines + def emit_specialization_stats(opcode_stats, defines): with Section("Specialization stats", summary="specialization stats by family"): for name, opcode_stat in opcode_stats.items(): print_specialization_stats(name, opcode_stat, defines) -def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats, defines): + +def emit_comparative_specialization_stats( + base_opcode_stats, head_opcode_stats, defines +): with Section("Specialization stats", summary="specialization stats by family"): opcodes = set(base_opcode_stats.keys()) & set(head_opcode_stats.keys()) for opcode in opcodes: @@ -399,6 +480,7 @@ def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats, opcode, base_opcode_stats[opcode], head_opcode_stats[opcode], defines ) + def calculate_specialization_effectiveness( opcode_stats, total, specialized_instructions ): @@ -411,11 +493,17 @@ def calculate_specialization_effectiveness( ("Specialized", specialized, format_ratio(specialized, total)), ] + def emit_specialization_overview(opcode_stats, total, specialized_instructions): with Section("Specialization effectiveness"): - rows = calculate_specialization_effectiveness(opcode_stats, total, specialized_instructions) + rows = calculate_specialization_effectiveness( + opcode_stats, total, specialized_instructions + ) emit_table(("Instructions", "Count:", "Ratio:"), rows) - for title, field in (("Deferred", "specialization.deferred"), ("Misses", "specialization.miss")): + for title, field in ( + ("Deferred", "specialization.deferred"), + ("Misses", "specialization.miss"), + ): total = 0 counts = [] for name, opcode_stat in opcode_stats.items(): @@ -428,11 +516,19 @@ def emit_specialization_overview(opcode_stats, total, specialized_instructions): counts.sort(reverse=True) if total: with Section(f"{title} by instruction", 3): - rows = [ (name, count, format_ratio(count, total)) for (count, name) in counts[:10] ] + rows = [ + (name, count, format_ratio(count, total)) + for (count, name) in counts[:10] + ] emit_table(("Name", "Count:", "Ratio:"), rows) + def emit_comparative_specialization_overview( - base_opcode_stats, base_total, head_opcode_stats, head_total, specialized_instructions + base_opcode_stats, + base_total, + head_opcode_stats, + head_total, + specialized_instructions, ): with Section("Specialization effectiveness"): base_rows = calculate_specialization_effectiveness( @@ -442,16 +538,26 @@ def emit_comparative_specialization_overview( head_opcode_stats, head_total, specialized_instructions ) emit_table( - ("Instructions", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), - join_rows(base_rows, head_rows) + ( + "Instructions", + "Base Count:", + "Base Ratio:", + "Head Count:", + "Head Ratio:", + ), + join_rows(base_rows, head_rows), ) + def get_stats_defines(): - stats_path = os.path.join(os.path.dirname(__file__), "../../Include/cpython/pystats.h") + stats_path = os.path.join( + os.path.dirname(__file__), "../../Include/cpython/pystats.h" + ) with open(stats_path) as stats_src: defines = parse_kinds(stats_src, prefix="EVAL_CALL") return defines + def calculate_call_stats(stats, defines): total = 0 for key, value in stats.items(): @@ -463,7 +569,7 @@ def calculate_call_stats(stats, defines): rows.append((key, value, format_ratio(value, total))) elif key.startswith("Calls "): name, index = key[:-1].split("[") - index = int(index) + index = int(index) label = name + " (" + pretty(defines[index][0]) + ")" rows.append((label, value, format_ratio(value, total))) for key, value in stats.items(): @@ -471,11 +577,13 @@ def calculate_call_stats(stats, defines): rows.append((key, value, format_ratio(value, total))) return rows + def emit_call_stats(stats, defines): with Section("Call stats", summary="Inlined calls and frame stats"): rows = calculate_call_stats(stats, defines) emit_table(("", "Count:", "Ratio:"), rows) + def emit_comparative_call_stats(base_stats, head_stats, defines): with Section("Call stats", summary="Inlined calls and frame stats"): base_rows = calculate_call_stats(base_stats, defines) @@ -483,15 +591,21 @@ def emit_comparative_call_stats(base_stats, head_stats, defines): rows = join_rows(base_rows, head_rows) rows.sort(key=lambda x: -percentage_to_float(x[-1])) emit_table( - ("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), - rows + ("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), rows ) + def calculate_object_stats(stats): total_materializations = stats.get("Object new values") - total_allocations = stats.get("Object allocations") + stats.get("Object allocations from freelist") - total_increfs = stats.get("Object interpreter increfs") + stats.get("Object increfs") - total_decrefs = stats.get("Object interpreter decrefs") + stats.get("Object decrefs") + total_allocations = stats.get("Object allocations") + stats.get( + "Object allocations from freelist" + ) + total_increfs = stats.get("Object interpreter increfs") + stats.get( + "Object increfs" + ) + total_decrefs = stats.get("Object interpreter decrefs") + stats.get( + "Object decrefs" + ) rows = [] for key, value in stats.items(): if key.startswith("Object"): @@ -499,9 +613,9 @@ def calculate_object_stats(stats): ratio = format_ratio(value, total_materializations) elif "allocations" in key: ratio = format_ratio(value, total_allocations) - elif "increfs" in key: + elif "increfs" in key: ratio = format_ratio(value, total_increfs) - elif "decrefs" in key: + elif "decrefs" in key: ratio = format_ratio(value, total_decrefs) else: ratio = "" @@ -510,6 +624,7 @@ def calculate_object_stats(stats): rows.append((label, value, ratio)) return rows + def calculate_gc_stats(stats): gc_stats = [] for key, value in stats.items(): @@ -526,40 +641,58 @@ def calculate_gc_stats(stats): for (i, gen) in enumerate(gc_stats) ] + def emit_object_stats(stats): with Section("Object stats", summary="allocations, frees and dict materializatons"): rows = calculate_object_stats(stats) - emit_table(("", "Count:", "Ratio:"), rows) + emit_table(("", "Count:", "Ratio:"), rows) + def emit_comparative_object_stats(base_stats, head_stats): with Section("Object stats", summary="allocations, frees and dict materializatons"): base_rows = calculate_object_stats(base_stats) head_rows = calculate_object_stats(head_stats) - emit_table(("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), join_rows(base_rows, head_rows)) + emit_table( + ("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), + join_rows(base_rows, head_rows), + ) + def emit_gc_stats(stats): with Section("GC stats", summary="GC collections and effectiveness"): rows = calculate_gc_stats(stats) - emit_table(("Generation:", "Collections:", "Objects collected:", "Object visits:"), rows) + emit_table( + ("Generation:", "Collections:", "Objects collected:", "Object visits:"), + rows, + ) + def emit_comparative_gc_stats(base_stats, head_stats): with Section("GC stats", summary="GC collections and effectiveness"): base_rows = calculate_gc_stats(base_stats) head_rows = calculate_gc_stats(head_stats) emit_table( - ("Generation:", - "Base collections:", "Head collections:", - "Base objects collected:", "Head objects collected:", - "Base object visits:", "Head object visits:"), - join_rows(base_rows, head_rows)) + ( + "Generation:", + "Base collections:", + "Head collections:", + "Base objects collected:", + "Head objects collected:", + "Base object visits:", + "Head object visits:", + ), + join_rows(base_rows, head_rows), + ) + def get_total(opcode_stats): total = 0 for opcode_stat in opcode_stats.values(): if "execution_count" in opcode_stat: - total += opcode_stat['execution_count'] + total += opcode_stat["execution_count"] return total + def emit_pair_counts(opcode_stats, total): pair_counts = [] for name_i, opcode_stat in opcode_stats.items(): @@ -572,15 +705,22 @@ def emit_pair_counts(opcode_stats, total): pair_counts.sort(reverse=True) cumulative = 0 rows = [] - for (count, pair) in itertools.islice(pair_counts, 100): + for count, pair in itertools.islice(pair_counts, 100): name_i, name_j = pair cumulative += count - rows.append((f"{name_i} {name_j}", count, format_ratio(count, total), - format_ratio(cumulative, total))) - emit_table(("Pair", "Count:", "Self:", "Cumulative:"), - rows - ) - with Section("Predecessor/Successor Pairs", summary="Top 5 predecessors and successors of each opcode"): + rows.append( + ( + f"{name_i} {name_j}", + count, + format_ratio(count, total), + format_ratio(cumulative, total), + ) + ) + emit_table(("Pair", "Count:", "Self:", "Cumulative:"), rows) + with Section( + "Predecessor/Successor Pairs", + summary="Top 5 predecessors and successors of each opcode", + ): predecessors = collections.defaultdict(collections.Counter) successors = collections.defaultdict(collections.Counter) total_predecessors = collections.Counter() @@ -598,38 +738,135 @@ def emit_pair_counts(opcode_stats, total): continue pred_rows = succ_rows = () if total1: - pred_rows = [(pred, count, f"{count/total1:.1%}") - for (pred, count) in predecessors[name].most_common(5)] + pred_rows = [ + (pred, count, f"{count/total1:.1%}") + for (pred, count) in predecessors[name].most_common(5) + ] if total2: - succ_rows = [(succ, count, f"{count/total2:.1%}") - for (succ, count) in successors[name].most_common(5)] + succ_rows = [ + (succ, count, f"{count/total2:.1%}") + for (succ, count) in successors[name].most_common(5) + ] with Section(name, 3, f"Successors and predecessors for {name}"): - emit_table(("Predecessors", "Count:", "Percentage:"), - pred_rows - ) - emit_table(("Successors", "Count:", "Percentage:"), - succ_rows - ) + emit_table(("Predecessors", "Count:", "Percentage:"), pred_rows) + emit_table(("Successors", "Count:", "Percentage:"), succ_rows) + + +def calculate_optimization_stats(stats): + attempts = stats["Optimization attempts"] + created = stats["Optimization traces created"] + executed = stats["Optimization traces executed"] + uops = stats["Optimization uops executed"] + trace_stack_overflow = stats["Optimization trace stack overflow"] + trace_stack_underflow = stats["Optimization trace stack underflow"] + trace_too_long = stats["Optimization trace too long"] + inner_loop = stats["Optimization inner loop"] + recursive_call = stats["Optimization recursive call"] + + return [ + ("Optimization attempts", attempts, ""), + ("Traces created", created, format_ratio(created, attempts)), + ("Traces executed", executed, ""), + ("Uops executed", uops, int(uops / (executed or 1))), + ("Trace stack overflow", trace_stack_overflow, ""), + ("Trace stack underflow", trace_stack_underflow, ""), + ("Trace too long", trace_too_long, ""), + ("Inner loop found", inner_loop, ""), + ("Recursive call", recursive_call, ""), + ] + + +def calculate_uop_execution_counts(opcode_stats): + total = 0 + counts = [] + for name, opcode_stat in opcode_stats.items(): + if "execution_count" in opcode_stat: + count = opcode_stat["execution_count"] + counts.append((count, name)) + total += count + counts.sort(reverse=True) + cumulative = 0 + rows = [] + for count, name in counts: + cumulative += count + rows.append( + (name, count, format_ratio(count, total), format_ratio(cumulative, total)) + ) + return rows + + +def emit_optimization_stats(stats): + if "Optimization attempts" not in stats: + return + + uop_stats = extract_opcode_stats(stats, "uops") + + with Section( + "Optimization (Tier 2) stats", summary="statistics about the Tier 2 optimizer" + ): + with Section("Overall stats", level=3): + rows = calculate_optimization_stats(stats) + emit_table(("", "Count:", "Ratio:"), rows) + + emit_histogram( + "Trace length histogram", + stats, + "Trace length", + stats["Optimization traces created"], + ) + emit_histogram( + "Optimized trace length histogram", + stats, + "Optimized trace length", + stats["Optimization traces created"], + ) + emit_histogram( + "Trace run length histogram", + stats, + "Trace run length", + stats["Optimization traces executed"], + ) + + with Section("Uop stats", level=3): + rows = calculate_uop_execution_counts(uop_stats) + emit_table(("Uop", "Count:", "Self:", "Cumulative:"), rows) + + with Section("Unsupported opcodes", level=3): + unsupported_opcodes = extract_opcode_stats(stats, "unsupported_opcode") + data = [] + for opcode, entry in unsupported_opcodes.items(): + data.append((entry["count"], opcode)) + data.sort(reverse=True) + rows = [(x[1], x[0]) for x in data] + emit_table(("Opcode", "Count"), rows) + + +def emit_comparative_optimization_stats(base_stats, head_stats): + print("## Comparative optimization stats not implemented\n\n") + def output_single_stats(stats): - opcode_stats = extract_opcode_stats(stats) + opcode_stats = extract_opcode_stats(stats, "opcode") total = get_total(opcode_stats) emit_execution_counts(opcode_stats, total) emit_pair_counts(opcode_stats, total) emit_specialization_stats(opcode_stats, stats["_defines"]) - emit_specialization_overview(opcode_stats, total, stats["_specialized_instructions"]) + emit_specialization_overview( + opcode_stats, total, stats["_specialized_instructions"] + ) emit_call_stats(stats, stats["_stats_defines"]) emit_object_stats(stats) emit_gc_stats(stats) + emit_optimization_stats(stats) with Section("Meta stats", summary="Meta statistics"): - emit_table(("", "Count:"), [('Number of data files', stats['__nfiles__'])]) + emit_table(("", "Count:"), [("Number of data files", stats["__nfiles__"])]) def output_comparative_stats(base_stats, head_stats): - base_opcode_stats = extract_opcode_stats(base_stats) + base_opcode_stats = extract_opcode_stats(base_stats, "opcode") base_total = get_total(base_opcode_stats) - head_opcode_stats = extract_opcode_stats(head_stats) + head_opcode_stats = extract_opcode_stats(head_stats, "opcode") head_total = get_total(head_opcode_stats) emit_comparative_execution_counts( @@ -639,12 +876,17 @@ def output_comparative_stats(base_stats, head_stats): base_opcode_stats, head_opcode_stats, head_stats["_defines"] ) emit_comparative_specialization_overview( - base_opcode_stats, base_total, head_opcode_stats, head_total, - head_stats["_specialized_instructions"] + base_opcode_stats, + base_total, + head_opcode_stats, + head_total, + head_stats["_specialized_instructions"], ) emit_comparative_call_stats(base_stats, head_stats, head_stats["_stats_defines"]) emit_comparative_object_stats(base_stats, head_stats) emit_comparative_gc_stats(base_stats, head_stats) + emit_comparative_optimization_stats(base_stats, head_stats) + def output_stats(inputs, json_output=None): if len(inputs) == 1: @@ -654,9 +896,7 @@ def output_stats(inputs, json_output=None): output_single_stats(stats) elif len(inputs) == 2: if json_output is not None: - raise ValueError( - "Can not output to JSON when there are multiple inputs" - ) + raise ValueError("Can not output to JSON when there are multiple inputs") base_stats = gather_stats(inputs[0]) head_stats = gather_stats(inputs[1]) @@ -665,6 +905,7 @@ def output_stats(inputs, json_output=None): print("---") print("Stats gathered on:", date.today()) + def main(): parser = argparse.ArgumentParser(description="Summarize pystats results") @@ -680,14 +921,14 @@ def main(): If one source is provided, its stats are printed. If two sources are provided, comparative stats are printed. Default is {DEFAULT_DIR}. - """ + """, ) parser.add_argument( "--json-output", nargs="?", type=argparse.FileType("w"), - help="Output complete raw results to the given JSON file." + help="Output complete raw results to the given JSON file.", ) args = parser.parse_args() @@ -697,5 +938,6 @@ def main(): output_stats(args.inputs, json_output=args.json_output) + if __name__ == "__main__": main() -- cgit v0.12