summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/cpython/pystats.h28
-rw-r--r--Include/internal/pycore_code.h15
-rw-r--r--Python/bytecodes.c3
-rw-r--r--Python/executor.c25
-rw-r--r--Python/executor_cases.c.h1
-rw-r--r--Python/generated_cases.c.h2
-rw-r--r--Python/optimizer.c13
-rw-r--r--Python/specialize.c54
-rw-r--r--Tools/scripts/summarize_stats.py470
9 files changed, 483 insertions, 128 deletions
diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h
index 150e16f..056406e 100644
--- a/Include/cpython/pystats.h
+++ b/Include/cpython/pystats.h
@@ -86,10 +86,6 @@ typedef struct _object_stats {
uint64_t type_cache_dunder_hits;
uint64_t type_cache_dunder_misses;
uint64_t type_cache_collisions;
- uint64_t optimization_attempts;
- uint64_t optimization_traces_created;
- uint64_t optimization_traces_executed;
- uint64_t optimization_uops_executed;
/* Temporary value used during GC */
uint64_t object_visits;
} ObjectStats;
@@ -100,10 +96,34 @@ typedef struct _gc_stats {
uint64_t objects_collected;
} GCStats;
+typedef struct _uop_stats {
+ uint64_t execution_count;
+} UOpStats;
+
+#define _Py_UOP_HIST_SIZE 32
+
+typedef struct _optimization_stats {
+ uint64_t attempts;
+ uint64_t traces_created;
+ uint64_t traces_executed;
+ uint64_t uops_executed;
+ uint64_t trace_stack_overflow;
+ uint64_t trace_stack_underflow;
+ uint64_t trace_too_long;
+ uint64_t inner_loop;
+ uint64_t recursive_call;
+ UOpStats opcode[512];
+ uint64_t unsupported_opcode[256];
+ uint64_t trace_length_hist[_Py_UOP_HIST_SIZE];
+ uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE];
+ uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE];
+} OptimizationStats;
+
typedef struct _stats {
OpcodeStats opcode_stats[256];
CallStats call_stats;
ObjectStats object_stats;
+ OptimizationStats optimization_stats;
GCStats *gc_stats;
} PyStats;
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index a77fa11..d31d836 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -282,6 +282,17 @@ extern int _PyStaticCode_Init(PyCodeObject *co);
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \
do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0)
#define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0)
+#define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0)
+#define UOP_EXE_INC(opname) do { if (_Py_stats) _Py_stats->optimization_stats.opcode[opname].execution_count++; } while (0)
+#define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0)
+#define OPT_HIST(length, name) \
+ do { \
+ if (_Py_stats) { \
+ int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \
+ bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \
+ _Py_stats->optimization_stats.name[bucket]++; \
+ } \
+ } while (0)
// Export for '_opcode' shared extension
PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
@@ -296,6 +307,10 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
#define EVAL_CALL_STAT_INC(name) ((void)0)
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0)
#define GC_STAT_ADD(gen, name, n) ((void)0)
+#define OPT_STAT_INC(name) ((void)0)
+#define UOP_EXE_INC(opname) ((void)0)
+#define OPT_UNSUPPORTED_OPCODE(opname) ((void)0)
+#define OPT_HIST(length, name) ((void)0)
#endif // !Py_STATS
// Utility functions for reading/writing 32/64-bit values in the inline caches.
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 6482252..9b733ce 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -2278,7 +2278,7 @@ dummy_func(
// Double-check that the opcode isn't instrumented or something:
here->op.code == JUMP_BACKWARD)
{
- OBJECT_STAT_INC(optimization_attempts);
+ OPT_STAT_INC(attempts);
int optimized = _PyOptimizer_BackEdge(frame, here, next_instr, stack_pointer);
ERROR_IF(optimized < 0, error);
if (optimized) {
@@ -3965,6 +3965,7 @@ dummy_func(
frame->prev_instr--; // Back up to just before destination
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
+ OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
return frame;
}
diff --git a/Python/executor.c b/Python/executor.c
index ac91042..6da3af0 100644
--- a/Python/executor.c
+++ b/Python/executor.c
@@ -2,6 +2,7 @@
#include "opcode.h"
+#include "pycore_bitutils.h"
#include "pycore_call.h"
#include "pycore_ceval.h"
#include "pycore_dict.h"
@@ -30,6 +31,16 @@
goto deoptimize; \
}
+#ifdef Py_STATS
+// Disable these macros that apply to Tier 1 stats when we are in Tier 2
+#undef STAT_INC
+#define STAT_INC(opname, name) ((void)0)
+#undef STAT_DEC
+#define STAT_DEC(opname, name) ((void)0)
+#undef CALL_STAT_INC
+#define CALL_STAT_INC(name) ((void)0)
+#endif
+
#undef ENABLE_SPECIALIZATION
#define ENABLE_SPECIALIZATION 0
@@ -62,12 +73,15 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
CHECK_EVAL_BREAKER();
- OBJECT_STAT_INC(optimization_traces_executed);
+ OPT_STAT_INC(traces_executed);
_Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
int pc = 0;
int opcode;
int oparg;
uint64_t operand;
+#ifdef Py_STATS
+ uint64_t trace_uop_execution_counter = 0;
+#endif
for (;;) {
opcode = self->trace[pc].opcode;
@@ -81,7 +95,12 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
operand,
(int)(stack_pointer - _PyFrame_Stackbase(frame)));
pc++;
- OBJECT_STAT_INC(optimization_uops_executed);
+ OPT_STAT_INC(uops_executed);
+ UOP_EXE_INC(opcode);
+#ifdef Py_STATS
+ trace_uop_execution_counter++;
+#endif
+
switch (opcode) {
#include "executor_cases.c.h"
@@ -114,6 +133,7 @@ error:
// On ERROR_IF we return NULL as the frame.
// The caller recovers the frame from tstate->current_frame.
DPRINTF(2, "Error: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
+ OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
return NULL;
@@ -122,6 +142,7 @@ deoptimize:
// On DEOPT_IF we just repeat the last instruction.
// This presumes nothing was popped from the stack (nor pushed).
DPRINTF(2, "DEOPT: [Opcode %d, operand %" PRIu64 "]\n", opcode, operand);
+ OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
frame->prev_instr--; // Back up to just before destination
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index c6eeee4..e2f4f98 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -3289,6 +3289,7 @@
frame->prev_instr--; // Back up to just before destination
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(self);
+ OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
return frame;
break;
}
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index 221d477..eac1368 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -2988,7 +2988,7 @@
// Double-check that the opcode isn't instrumented or something:
here->op.code == JUMP_BACKWARD)
{
- OBJECT_STAT_INC(optimization_attempts);
+ OPT_STAT_INC(attempts);
int optimized = _PyOptimizer_BackEdge(frame, here, next_instr, stack_pointer);
if (optimized < 0) goto error;
if (optimized) {
diff --git a/Python/optimizer.c b/Python/optimizer.c
index fbdbf72..f8796eb 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -461,6 +461,7 @@ translate_bytecode_to_trace(
if (trace_length + (n) > max_length) { \
DPRINTF(2, "No room for %s (need %d, got %d)\n", \
(opname), (n), max_length - trace_length); \
+ OPT_STAT_INC(trace_too_long); \
goto done; \
} \
reserved = (n); // Keep ADD_TO_TRACE / ADD_TO_STUB honest
@@ -472,6 +473,7 @@ translate_bytecode_to_trace(
#define TRACE_STACK_PUSH() \
if (trace_stack_depth >= TRACE_STACK_SIZE) { \
DPRINTF(2, "Trace stack overflow\n"); \
+ OPT_STAT_INC(trace_stack_overflow); \
ADD_TO_TRACE(_SET_IP, 0, 0); \
goto done; \
} \
@@ -572,6 +574,7 @@ pop_jump_if_bool:
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0);
}
else {
+ OPT_STAT_INC(inner_loop);
DPRINTF(2, "JUMP_BACKWARD not to top ends trace\n");
}
goto done;
@@ -638,7 +641,9 @@ pop_jump_if_bool:
// LOAD_CONST + _POP_FRAME.
if (trace_stack_depth == 0) {
DPRINTF(2, "Trace stack underflow\n");
- goto done;}
+ OPT_STAT_INC(trace_stack_underflow);
+ goto done;
+ }
}
uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM
for (int i = 0; i < nuops; i++) {
@@ -713,6 +718,7 @@ pop_jump_if_bool:
PyUnicode_AsUTF8(new_code->co_qualname),
PyUnicode_AsUTF8(new_code->co_filename),
new_code->co_firstlineno);
+ OPT_STAT_INC(recursive_call);
ADD_TO_TRACE(_SET_IP, 0, 0);
goto done;
}
@@ -744,6 +750,7 @@ pop_jump_if_bool:
break;
}
DPRINTF(2, "Unsupported opcode %s\n", uop_name(opcode));
+ OPT_UNSUPPORTED_OPCODE(opcode);
goto done; // Break out of loop
} // End default
@@ -891,7 +898,8 @@ uop_optimize(
// Error or nothing translated
return trace_length;
}
- OBJECT_STAT_INC(optimization_traces_created);
+ OPT_HIST(trace_length, trace_length_hist);
+ OPT_STAT_INC(traces_created);
char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE");
if (uop_optimize != NULL && *uop_optimize > '0') {
trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries);
@@ -901,6 +909,7 @@ uop_optimize(
if (executor == NULL) {
return -1;
}
+ OPT_HIST(trace_length, optimized_trace_length_hist);
executor->base.execute = _PyUopExecute;
memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction));
*exec_ptr = (_PyExecutorObject *)executor;
diff --git a/Python/specialize.c b/Python/specialize.c
index d9b748c..ff732eb 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -199,10 +199,6 @@ print_object_stats(FILE *out, ObjectStats *stats)
fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions);
fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits);
fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses);
- fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->optimization_attempts);
- fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->optimization_traces_created);
- fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->optimization_traces_executed);
- fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->optimization_uops_executed);
}
static void
@@ -216,12 +212,62 @@ print_gc_stats(FILE *out, GCStats *stats)
}
static void
+print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE])
+{
+ for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) {
+ fprintf(out, "%s[%" PRIu64"]: %" PRIu64 "\n", name, (uint64_t)1 << i, hist[i]);
+ }
+}
+
+static void
+print_optimization_stats(FILE *out, OptimizationStats *stats)
+{
+ fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->attempts);
+ fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->traces_created);
+ fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->traces_executed);
+ fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->uops_executed);
+ fprintf(out, "Optimization trace stack overflow: %" PRIu64 "\n", stats->trace_stack_overflow);
+ fprintf(out, "Optimization trace stack underflow: %" PRIu64 "\n", stats->trace_stack_underflow);
+ fprintf(out, "Optimization trace too long: %" PRIu64 "\n", stats->trace_too_long);
+ fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop);
+ fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call);
+
+ print_histogram(out, "Trace length", stats->trace_length_hist);
+ print_histogram(out, "Trace run length", stats->trace_run_length_hist);
+ print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist);
+
+ const char* const* names;
+ for (int i = 0; i < 512; i++) {
+ if (i < 256) {
+ names = _PyOpcode_OpName;
+ } else {
+ names = _PyOpcode_uop_name;
+ }
+ if (stats->opcode[i].execution_count) {
+ fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", names[i], stats->opcode[i].execution_count);
+ }
+ }
+
+ for (int i = 0; i < 256; i++) {
+ if (stats->unsupported_opcode[i]) {
+ fprintf(
+ out,
+ "unsupported_opcode[%s].count : %" PRIu64 "\n",
+ _PyOpcode_OpName[i],
+ stats->unsupported_opcode[i]
+ );
+ }
+ }
+}
+
+static void
print_stats(FILE *out, PyStats *stats)
{
print_spec_stats(out, stats->opcode_stats);
print_call_stats(out, &stats->call_stats);
print_object_stats(out, &stats->object_stats);
print_gc_stats(out, stats->gc_stats);
+ print_optimization_stats(out, &stats->optimization_stats);
}
void
diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py
index 3b2bdd8..b9cc2f9 100644
--- a/Tools/scripts/summarize_stats.py
+++ b/Tools/scripts/summarize_stats.py
@@ -13,6 +13,7 @@ import os.path
from datetime import date
import itertools
import sys
+import re
if os.name == "nt":
DEFAULT_DIR = "c:\\temp\\py_stats\\"
@@ -21,6 +22,7 @@ else:
TOTAL = "specialization.hit", "specialization.miss", "execution_count"
+
def format_ratio(num, den):
"""
Format a ratio as a percentage. When the denominator is 0, returns the empty
@@ -31,6 +33,7 @@ def format_ratio(num, den):
else:
return f"{num/den:.01%}"
+
def percentage_to_float(s):
"""
Converts a percentage string to a float. The empty string is returned as 0.0
@@ -41,6 +44,7 @@ def percentage_to_float(s):
assert s[-1] == "%"
return float(s[:-1])
+
def join_rows(a_rows, b_rows):
"""
Joins two tables together, side-by-side, where the first column in each is a
@@ -79,40 +83,53 @@ def join_rows(a_rows, b_rows):
keys = list(a_data.keys()) + [k for k in b_data.keys() if k not in a_data]
return [(k, *a_data.get(k, default), *b_data.get(k, default)) for k in keys]
+
def calculate_specialization_stats(family_stats, total):
rows = []
for key in sorted(family_stats):
if key.startswith("specialization.failure_kinds"):
continue
if key in ("specialization.hit", "specialization.miss"):
- label = key[len("specialization."):]
+ label = key[len("specialization.") :]
elif key == "execution_count":
continue
- elif key in ("specialization.success", "specialization.failure", "specializable"):
+ elif key in (
+ "specialization.success",
+ "specialization.failure",
+ "specializable",
+ ):
continue
elif key.startswith("pair"):
continue
else:
label = key
- rows.append((f"{label:>12}", f"{family_stats[key]:>12}", format_ratio(family_stats[key], total)))
+ rows.append(
+ (
+ f"{label:>12}",
+ f"{family_stats[key]:>12}",
+ format_ratio(family_stats[key], total),
+ )
+ )
return rows
+
def calculate_specialization_success_failure(family_stats):
total_attempts = 0
- for key in ("specialization.success", "specialization.failure"):
+ for key in ("specialization.success", "specialization.failure"):
total_attempts += family_stats.get(key, 0)
rows = []
if total_attempts:
- for key in ("specialization.success", "specialization.failure"):
- label = key[len("specialization."):]
+ for key in ("specialization.success", "specialization.failure"):
+ label = key[len("specialization.") :]
label = label[0].upper() + label[1:]
val = family_stats.get(key, 0)
rows.append((label, val, format_ratio(val, total_attempts)))
return rows
+
def calculate_specialization_failure_kinds(name, family_stats, defines):
total_failures = family_stats.get("specialization.failure", 0)
- failure_kinds = [ 0 ] * 40
+ failure_kinds = [0] * 40
for key in family_stats:
if not key.startswith("specialization.failure_kind"):
continue
@@ -125,9 +142,16 @@ def calculate_specialization_failure_kinds(name, family_stats, defines):
for value, index in failures:
if not value:
continue
- rows.append((kind_to_text(index, defines, name), value, format_ratio(value, total_failures)))
+ rows.append(
+ (
+ kind_to_text(index, defines, name),
+ value,
+ format_ratio(value, total_failures),
+ )
+ )
return rows
+
def print_specialization_stats(name, family_stats, defines):
if "specializable" not in family_stats:
return
@@ -144,7 +168,10 @@ def print_specialization_stats(name, family_stats, defines):
rows = calculate_specialization_failure_kinds(name, family_stats, defines)
emit_table(("Failure kind", "Count:", "Ratio:"), rows)
-def print_comparative_specialization_stats(name, base_family_stats, head_family_stats, defines):
+
+def print_comparative_specialization_stats(
+ name, base_family_stats, head_family_stats, defines
+):
if "specializable" not in base_family_stats:
return
@@ -157,21 +184,34 @@ def print_comparative_specialization_stats(name, base_family_stats, head_family_
head_rows = calculate_specialization_stats(head_family_stats, head_total)
emit_table(
("Kind", "Base Count", "Base Ratio", "Head Count", "Head Ratio"),
- join_rows(base_rows, head_rows)
+ join_rows(base_rows, head_rows),
)
base_rows = calculate_specialization_success_failure(base_family_stats)
head_rows = calculate_specialization_success_failure(head_family_stats)
rows = join_rows(base_rows, head_rows)
if rows:
print_title("Specialization attempts", 4)
- emit_table(("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), rows)
- base_rows = calculate_specialization_failure_kinds(name, base_family_stats, defines)
- head_rows = calculate_specialization_failure_kinds(name, head_family_stats, defines)
emit_table(
- ("Failure kind", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
- join_rows(base_rows, head_rows)
+ ("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), rows
+ )
+ base_rows = calculate_specialization_failure_kinds(
+ name, base_family_stats, defines
+ )
+ head_rows = calculate_specialization_failure_kinds(
+ name, head_family_stats, defines
+ )
+ emit_table(
+ (
+ "Failure kind",
+ "Base Count:",
+ "Base Ratio:",
+ "Head Count:",
+ "Head Ratio:",
+ ),
+ join_rows(base_rows, head_rows),
)
+
def gather_stats(input):
# Note the output of this function must be JSON-serializable
@@ -179,7 +219,9 @@ def gather_stats(input):
with open(input, "r") as fd:
stats = json.load(fd)
- stats["_stats_defines"] = {int(k): v for k, v in stats["_stats_defines"].items()}
+ stats["_stats_defines"] = {
+ int(k): v for k, v in stats["_stats_defines"].items()
+ }
stats["_defines"] = {int(k): v for k, v in stats["_defines"].items()}
return stats
@@ -191,18 +233,20 @@ def gather_stats(input):
try:
key, value = line.split(":")
except ValueError:
- print(f"Unparsable line: '{line.strip()}' in {filename}", file=sys.stderr)
+ print(
+ f"Unparsable line: '{line.strip()}' in {filename}",
+ file=sys.stderr,
+ )
continue
key = key.strip()
value = int(value)
stats[key] += value
- stats['__nfiles__'] += 1
+ stats["__nfiles__"] += 1
import opcode
stats["_specialized_instructions"] = [
- op for op in opcode._specialized_opmap.keys()
- if "__" not in op
+ op for op in opcode._specialized_opmap.keys() if "__" not in op
]
stats["_stats_defines"] = get_stats_defines()
stats["_defines"] = get_defines()
@@ -211,15 +255,17 @@ def gather_stats(input):
else:
raise ValueError(f"{input:r} is not a file or directory path")
-def extract_opcode_stats(stats):
+
+def extract_opcode_stats(stats, prefix):
opcode_stats = collections.defaultdict(dict)
for key, value in stats.items():
- if not key.startswith("opcode"):
+ if not key.startswith(prefix):
continue
- name, _, rest = key[7:].partition("]")
+ name, _, rest = key[len(prefix) + 1 :].partition("]")
opcode_stats[name][rest.strip(".")] = value
return opcode_stats
+
def parse_kinds(spec_src, prefix="SPEC_FAIL"):
defines = collections.defaultdict(list)
start = "#define " + prefix + "_"
@@ -227,14 +273,16 @@ def parse_kinds(spec_src, prefix="SPEC_FAIL"):
line = line.strip()
if not line.startswith(start):
continue
- line = line[len(start):]
+ line = line[len(start) :]
name, val = line.split()
defines[int(val.strip())].append(name.strip())
return defines
+
def pretty(defname):
return defname.replace("_", " ").lower()
+
def kind_to_text(kind, defines, opname):
if kind <= 8:
return pretty(defines[kind][0])
@@ -248,9 +296,10 @@ def kind_to_text(kind, defines, opname):
opname = "SUBSCR"
for name in defines[kind]:
if name.startswith(opname):
- return pretty(name[len(opname)+1:])
+ return pretty(name[len(opname) + 1 :])
return "kind " + str(kind)
+
def categorized_counts(opcode_stats, specialized_instructions):
basic = 0
specialized = 0
@@ -258,7 +307,7 @@ def categorized_counts(opcode_stats, specialized_instructions):
for name, opcode_stat in opcode_stats.items():
if "execution_count" not in opcode_stat:
continue
- count = opcode_stat['execution_count']
+ count = opcode_stat["execution_count"]
if "specializable" in opcode_stat:
not_specialized += count
elif name in specialized_instructions:
@@ -269,12 +318,13 @@ def categorized_counts(opcode_stats, specialized_instructions):
basic += count
return basic, not_specialized, specialized
+
def print_title(name, level=2):
- print("#"*level, name)
+ print("#" * level, name)
print()
-class Section:
+class Section:
def __init__(self, title, level=2, summary=None):
self.title = title
self.level = level
@@ -295,12 +345,14 @@ class Section:
print("</details>")
print()
+
def to_str(x):
if isinstance(x, int):
return format(x, ",d")
else:
return str(x)
+
def emit_table(header, rows):
width = len(header)
header_line = "|"
@@ -320,11 +372,28 @@ def emit_table(header, rows):
print("|", " | ".join(to_str(i) for i in row), "|")
print()
+
+def emit_histogram(title, stats, key, total):
+ rows = []
+ for k, v in stats.items():
+ if k.startswith(key):
+ entry = int(re.match(r".+\[([0-9]+)\]", k).groups()[0])
+ rows.append((f"<= {entry}", int(v), format_ratio(int(v), total)))
+ # Don't include larger buckets with 0 entries
+ for j in range(len(rows) - 1, -1, -1):
+ if rows[j][1] != 0:
+ break
+ rows = rows[: j + 1]
+
+ print(f"**{title}**\n")
+ emit_table(("Range", "Count:", "Ratio:"), rows)
+
+
def calculate_execution_counts(opcode_stats, total):
counts = []
for name, opcode_stat in opcode_stats.items():
if "execution_count" in opcode_stat:
- count = opcode_stat['execution_count']
+ count = opcode_stat["execution_count"]
miss = 0
if "specializable" not in opcode_stat:
miss = opcode_stat.get("specialization.miss")
@@ -332,53 +401,61 @@ def calculate_execution_counts(opcode_stats, total):
counts.sort(reverse=True)
cumulative = 0
rows = []
- for (count, name, miss) in counts:
+ for count, name, miss in counts:
cumulative += count
if miss:
miss = format_ratio(miss, count)
else:
miss = ""
- rows.append((name, count, format_ratio(count, total),
- format_ratio(cumulative, total), miss))
+ rows.append(
+ (
+ name,
+ count,
+ format_ratio(count, total),
+ format_ratio(cumulative, total),
+ miss,
+ )
+ )
return rows
+
def emit_execution_counts(opcode_stats, total):
with Section("Execution counts", summary="execution counts for all instructions"):
rows = calculate_execution_counts(opcode_stats, total)
- emit_table(
- ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"),
- rows
- )
+ emit_table(("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"), rows)
+
+
+def _emit_comparative_execution_counts(base_rows, head_rows):
+ base_data = {x[0]: x[1:] for x in base_rows}
+ head_data = {x[0]: x[1:] for x in head_rows}
+ opcodes = base_data.keys() | head_data.keys()
+
+ rows = []
+ default = [0, "0.0%", "0.0%", 0]
+ for opcode in opcodes:
+ base_entry = base_data.get(opcode, default)
+ head_entry = head_data.get(opcode, default)
+ if base_entry[0] == 0:
+ change = 1
+ else:
+ change = (head_entry[0] - base_entry[0]) / base_entry[0]
+ rows.append((opcode, base_entry[0], head_entry[0], f"{change:0.1%}"))
+
+ rows.sort(key=lambda x: abs(percentage_to_float(x[-1])), reverse=True)
+
+ emit_table(("Name", "Base Count:", "Head Count:", "Change:"), rows)
+
def emit_comparative_execution_counts(
- base_opcode_stats, base_total, head_opcode_stats, head_total
+ base_opcode_stats, base_total, head_opcode_stats, head_total, level=2
):
- with Section("Execution counts", summary="execution counts for all instructions"):
+ with Section(
+ "Execution counts", summary="execution counts for all instructions", level=level
+ ):
base_rows = calculate_execution_counts(base_opcode_stats, base_total)
head_rows = calculate_execution_counts(head_opcode_stats, head_total)
- base_data = dict((x[0], x[1:]) for x in base_rows)
- head_data = dict((x[0], x[1:]) for x in head_rows)
- opcodes = set(base_data.keys()) | set(head_data.keys())
-
- rows = []
- default = [0, "0.0%", "0.0%", 0]
- for opcode in opcodes:
- base_entry = base_data.get(opcode, default)
- head_entry = head_data.get(opcode, default)
- if base_entry[0] == 0:
- change = 1
- else:
- change = (head_entry[0] - base_entry[0]) / base_entry[0]
- rows.append(
- (opcode, base_entry[0], head_entry[0],
- f"{100*change:0.1f}%"))
-
- rows.sort(key=lambda x: -abs(percentage_to_float(x[-1])))
+ _emit_comparative_execution_counts(base_rows, head_rows)
- emit_table(
- ("Name", "Base Count:", "Head Count:", "Change:"),
- rows
- )
def get_defines():
spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c")
@@ -386,12 +463,16 @@ def get_defines():
defines = parse_kinds(spec_src)
return defines
+
def emit_specialization_stats(opcode_stats, defines):
with Section("Specialization stats", summary="specialization stats by family"):
for name, opcode_stat in opcode_stats.items():
print_specialization_stats(name, opcode_stat, defines)
-def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats, defines):
+
+def emit_comparative_specialization_stats(
+ base_opcode_stats, head_opcode_stats, defines
+):
with Section("Specialization stats", summary="specialization stats by family"):
opcodes = set(base_opcode_stats.keys()) & set(head_opcode_stats.keys())
for opcode in opcodes:
@@ -399,6 +480,7 @@ def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats,
opcode, base_opcode_stats[opcode], head_opcode_stats[opcode], defines
)
+
def calculate_specialization_effectiveness(
opcode_stats, total, specialized_instructions
):
@@ -411,11 +493,17 @@ def calculate_specialization_effectiveness(
("Specialized", specialized, format_ratio(specialized, total)),
]
+
def emit_specialization_overview(opcode_stats, total, specialized_instructions):
with Section("Specialization effectiveness"):
- rows = calculate_specialization_effectiveness(opcode_stats, total, specialized_instructions)
+ rows = calculate_specialization_effectiveness(
+ opcode_stats, total, specialized_instructions
+ )
emit_table(("Instructions", "Count:", "Ratio:"), rows)
- for title, field in (("Deferred", "specialization.deferred"), ("Misses", "specialization.miss")):
+ for title, field in (
+ ("Deferred", "specialization.deferred"),
+ ("Misses", "specialization.miss"),
+ ):
total = 0
counts = []
for name, opcode_stat in opcode_stats.items():
@@ -428,11 +516,19 @@ def emit_specialization_overview(opcode_stats, total, specialized_instructions):
counts.sort(reverse=True)
if total:
with Section(f"{title} by instruction", 3):
- rows = [ (name, count, format_ratio(count, total)) for (count, name) in counts[:10] ]
+ rows = [
+ (name, count, format_ratio(count, total))
+ for (count, name) in counts[:10]
+ ]
emit_table(("Name", "Count:", "Ratio:"), rows)
+
def emit_comparative_specialization_overview(
- base_opcode_stats, base_total, head_opcode_stats, head_total, specialized_instructions
+ base_opcode_stats,
+ base_total,
+ head_opcode_stats,
+ head_total,
+ specialized_instructions,
):
with Section("Specialization effectiveness"):
base_rows = calculate_specialization_effectiveness(
@@ -442,16 +538,26 @@ def emit_comparative_specialization_overview(
head_opcode_stats, head_total, specialized_instructions
)
emit_table(
- ("Instructions", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
- join_rows(base_rows, head_rows)
+ (
+ "Instructions",
+ "Base Count:",
+ "Base Ratio:",
+ "Head Count:",
+ "Head Ratio:",
+ ),
+ join_rows(base_rows, head_rows),
)
+
def get_stats_defines():
- stats_path = os.path.join(os.path.dirname(__file__), "../../Include/cpython/pystats.h")
+ stats_path = os.path.join(
+ os.path.dirname(__file__), "../../Include/cpython/pystats.h"
+ )
with open(stats_path) as stats_src:
defines = parse_kinds(stats_src, prefix="EVAL_CALL")
return defines
+
def calculate_call_stats(stats, defines):
total = 0
for key, value in stats.items():
@@ -463,7 +569,7 @@ def calculate_call_stats(stats, defines):
rows.append((key, value, format_ratio(value, total)))
elif key.startswith("Calls "):
name, index = key[:-1].split("[")
- index = int(index)
+ index = int(index)
label = name + " (" + pretty(defines[index][0]) + ")"
rows.append((label, value, format_ratio(value, total)))
for key, value in stats.items():
@@ -471,11 +577,13 @@ def calculate_call_stats(stats, defines):
rows.append((key, value, format_ratio(value, total)))
return rows
+
def emit_call_stats(stats, defines):
with Section("Call stats", summary="Inlined calls and frame stats"):
rows = calculate_call_stats(stats, defines)
emit_table(("", "Count:", "Ratio:"), rows)
+
def emit_comparative_call_stats(base_stats, head_stats, defines):
with Section("Call stats", summary="Inlined calls and frame stats"):
base_rows = calculate_call_stats(base_stats, defines)
@@ -483,15 +591,21 @@ def emit_comparative_call_stats(base_stats, head_stats, defines):
rows = join_rows(base_rows, head_rows)
rows.sort(key=lambda x: -percentage_to_float(x[-1]))
emit_table(
- ("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
- rows
+ ("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), rows
)
+
def calculate_object_stats(stats):
total_materializations = stats.get("Object new values")
- total_allocations = stats.get("Object allocations") + stats.get("Object allocations from freelist")
- total_increfs = stats.get("Object interpreter increfs") + stats.get("Object increfs")
- total_decrefs = stats.get("Object interpreter decrefs") + stats.get("Object decrefs")
+ total_allocations = stats.get("Object allocations") + stats.get(
+ "Object allocations from freelist"
+ )
+ total_increfs = stats.get("Object interpreter increfs") + stats.get(
+ "Object increfs"
+ )
+ total_decrefs = stats.get("Object interpreter decrefs") + stats.get(
+ "Object decrefs"
+ )
rows = []
for key, value in stats.items():
if key.startswith("Object"):
@@ -499,9 +613,9 @@ def calculate_object_stats(stats):
ratio = format_ratio(value, total_materializations)
elif "allocations" in key:
ratio = format_ratio(value, total_allocations)
- elif "increfs" in key:
+ elif "increfs" in key:
ratio = format_ratio(value, total_increfs)
- elif "decrefs" in key:
+ elif "decrefs" in key:
ratio = format_ratio(value, total_decrefs)
else:
ratio = ""
@@ -510,6 +624,7 @@ def calculate_object_stats(stats):
rows.append((label, value, ratio))
return rows
+
def calculate_gc_stats(stats):
gc_stats = []
for key, value in stats.items():
@@ -526,40 +641,58 @@ def calculate_gc_stats(stats):
for (i, gen) in enumerate(gc_stats)
]
+
def emit_object_stats(stats):
with Section("Object stats", summary="allocations, frees and dict materializatons"):
rows = calculate_object_stats(stats)
- emit_table(("", "Count:", "Ratio:"), rows)
+ emit_table(("", "Count:", "Ratio:"), rows)
+
def emit_comparative_object_stats(base_stats, head_stats):
with Section("Object stats", summary="allocations, frees and dict materializatons"):
base_rows = calculate_object_stats(base_stats)
head_rows = calculate_object_stats(head_stats)
- emit_table(("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), join_rows(base_rows, head_rows))
+ emit_table(
+ ("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"),
+ join_rows(base_rows, head_rows),
+ )
+
def emit_gc_stats(stats):
with Section("GC stats", summary="GC collections and effectiveness"):
rows = calculate_gc_stats(stats)
- emit_table(("Generation:", "Collections:", "Objects collected:", "Object visits:"), rows)
+ emit_table(
+ ("Generation:", "Collections:", "Objects collected:", "Object visits:"),
+ rows,
+ )
+
def emit_comparative_gc_stats(base_stats, head_stats):
with Section("GC stats", summary="GC collections and effectiveness"):
base_rows = calculate_gc_stats(base_stats)
head_rows = calculate_gc_stats(head_stats)
emit_table(
- ("Generation:",
- "Base collections:", "Head collections:",
- "Base objects collected:", "Head objects collected:",
- "Base object visits:", "Head object visits:"),
- join_rows(base_rows, head_rows))
+ (
+ "Generation:",
+ "Base collections:",
+ "Head collections:",
+ "Base objects collected:",
+ "Head objects collected:",
+ "Base object visits:",
+ "Head object visits:",
+ ),
+ join_rows(base_rows, head_rows),
+ )
+
def get_total(opcode_stats):
total = 0
for opcode_stat in opcode_stats.values():
if "execution_count" in opcode_stat:
- total += opcode_stat['execution_count']
+ total += opcode_stat["execution_count"]
return total
+
def emit_pair_counts(opcode_stats, total):
pair_counts = []
for name_i, opcode_stat in opcode_stats.items():
@@ -572,15 +705,22 @@ def emit_pair_counts(opcode_stats, total):
pair_counts.sort(reverse=True)
cumulative = 0
rows = []
- for (count, pair) in itertools.islice(pair_counts, 100):
+ for count, pair in itertools.islice(pair_counts, 100):
name_i, name_j = pair
cumulative += count
- rows.append((f"{name_i} {name_j}", count, format_ratio(count, total),
- format_ratio(cumulative, total)))
- emit_table(("Pair", "Count:", "Self:", "Cumulative:"),
- rows
- )
- with Section("Predecessor/Successor Pairs", summary="Top 5 predecessors and successors of each opcode"):
+ rows.append(
+ (
+ f"{name_i} {name_j}",
+ count,
+ format_ratio(count, total),
+ format_ratio(cumulative, total),
+ )
+ )
+ emit_table(("Pair", "Count:", "Self:", "Cumulative:"), rows)
+ with Section(
+ "Predecessor/Successor Pairs",
+ summary="Top 5 predecessors and successors of each opcode",
+ ):
predecessors = collections.defaultdict(collections.Counter)
successors = collections.defaultdict(collections.Counter)
total_predecessors = collections.Counter()
@@ -598,38 +738,135 @@ def emit_pair_counts(opcode_stats, total):
continue
pred_rows = succ_rows = ()
if total1:
- pred_rows = [(pred, count, f"{count/total1:.1%}")
- for (pred, count) in predecessors[name].most_common(5)]
+ pred_rows = [
+ (pred, count, f"{count/total1:.1%}")
+ for (pred, count) in predecessors[name].most_common(5)
+ ]
if total2:
- succ_rows = [(succ, count, f"{count/total2:.1%}")
- for (succ, count) in successors[name].most_common(5)]
+ succ_rows = [
+ (succ, count, f"{count/total2:.1%}")
+ for (succ, count) in successors[name].most_common(5)
+ ]
with Section(name, 3, f"Successors and predecessors for {name}"):
- emit_table(("Predecessors", "Count:", "Percentage:"),
- pred_rows
- )
- emit_table(("Successors", "Count:", "Percentage:"),
- succ_rows
- )
+ emit_table(("Predecessors", "Count:", "Percentage:"), pred_rows)
+ emit_table(("Successors", "Count:", "Percentage:"), succ_rows)
+
+
+def calculate_optimization_stats(stats):
+ attempts = stats["Optimization attempts"]
+ created = stats["Optimization traces created"]
+ executed = stats["Optimization traces executed"]
+ uops = stats["Optimization uops executed"]
+ trace_stack_overflow = stats["Optimization trace stack overflow"]
+ trace_stack_underflow = stats["Optimization trace stack underflow"]
+ trace_too_long = stats["Optimization trace too long"]
+ inner_loop = stats["Optimization inner loop"]
+ recursive_call = stats["Optimization recursive call"]
+
+ return [
+ ("Optimization attempts", attempts, ""),
+ ("Traces created", created, format_ratio(created, attempts)),
+ ("Traces executed", executed, ""),
+ ("Uops executed", uops, int(uops / (executed or 1))),
+ ("Trace stack overflow", trace_stack_overflow, ""),
+ ("Trace stack underflow", trace_stack_underflow, ""),
+ ("Trace too long", trace_too_long, ""),
+ ("Inner loop found", inner_loop, ""),
+ ("Recursive call", recursive_call, ""),
+ ]
+
+
+def calculate_uop_execution_counts(opcode_stats):
+ total = 0
+ counts = []
+ for name, opcode_stat in opcode_stats.items():
+ if "execution_count" in opcode_stat:
+ count = opcode_stat["execution_count"]
+ counts.append((count, name))
+ total += count
+ counts.sort(reverse=True)
+ cumulative = 0
+ rows = []
+ for count, name in counts:
+ cumulative += count
+ rows.append(
+ (name, count, format_ratio(count, total), format_ratio(cumulative, total))
+ )
+ return rows
+
+
+def emit_optimization_stats(stats):
+ if "Optimization attempts" not in stats:
+ return
+
+ uop_stats = extract_opcode_stats(stats, "uops")
+
+ with Section(
+ "Optimization (Tier 2) stats", summary="statistics about the Tier 2 optimizer"
+ ):
+ with Section("Overall stats", level=3):
+ rows = calculate_optimization_stats(stats)
+ emit_table(("", "Count:", "Ratio:"), rows)
+
+ emit_histogram(
+ "Trace length histogram",
+ stats,
+ "Trace length",
+ stats["Optimization traces created"],
+ )
+ emit_histogram(
+ "Optimized trace length histogram",
+ stats,
+ "Optimized trace length",
+ stats["Optimization traces created"],
+ )
+ emit_histogram(
+ "Trace run length histogram",
+ stats,
+ "Trace run length",
+ stats["Optimization traces executed"],
+ )
+
+ with Section("Uop stats", level=3):
+ rows = calculate_uop_execution_counts(uop_stats)
+ emit_table(("Uop", "Count:", "Self:", "Cumulative:"), rows)
+
+ with Section("Unsupported opcodes", level=3):
+ unsupported_opcodes = extract_opcode_stats(stats, "unsupported_opcode")
+ data = []
+ for opcode, entry in unsupported_opcodes.items():
+ data.append((entry["count"], opcode))
+ data.sort(reverse=True)
+ rows = [(x[1], x[0]) for x in data]
+ emit_table(("Opcode", "Count"), rows)
+
+
+def emit_comparative_optimization_stats(base_stats, head_stats):
+ print("## Comparative optimization stats not implemented\n\n")
+
def output_single_stats(stats):
- opcode_stats = extract_opcode_stats(stats)
+ opcode_stats = extract_opcode_stats(stats, "opcode")
total = get_total(opcode_stats)
emit_execution_counts(opcode_stats, total)
emit_pair_counts(opcode_stats, total)
emit_specialization_stats(opcode_stats, stats["_defines"])
- emit_specialization_overview(opcode_stats, total, stats["_specialized_instructions"])
+ emit_specialization_overview(
+ opcode_stats, total, stats["_specialized_instructions"]
+ )
emit_call_stats(stats, stats["_stats_defines"])
emit_object_stats(stats)
emit_gc_stats(stats)
+ emit_optimization_stats(stats)
with Section("Meta stats", summary="Meta statistics"):
- emit_table(("", "Count:"), [('Number of data files', stats['__nfiles__'])])
+ emit_table(("", "Count:"), [("Number of data files", stats["__nfiles__"])])
def output_comparative_stats(base_stats, head_stats):
- base_opcode_stats = extract_opcode_stats(base_stats)
+ base_opcode_stats = extract_opcode_stats(base_stats, "opcode")
base_total = get_total(base_opcode_stats)
- head_opcode_stats = extract_opcode_stats(head_stats)
+ head_opcode_stats = extract_opcode_stats(head_stats, "opcode")
head_total = get_total(head_opcode_stats)
emit_comparative_execution_counts(
@@ -639,12 +876,17 @@ def output_comparative_stats(base_stats, head_stats):
base_opcode_stats, head_opcode_stats, head_stats["_defines"]
)
emit_comparative_specialization_overview(
- base_opcode_stats, base_total, head_opcode_stats, head_total,
- head_stats["_specialized_instructions"]
+ base_opcode_stats,
+ base_total,
+ head_opcode_stats,
+ head_total,
+ head_stats["_specialized_instructions"],
)
emit_comparative_call_stats(base_stats, head_stats, head_stats["_stats_defines"])
emit_comparative_object_stats(base_stats, head_stats)
emit_comparative_gc_stats(base_stats, head_stats)
+ emit_comparative_optimization_stats(base_stats, head_stats)
+
def output_stats(inputs, json_output=None):
if len(inputs) == 1:
@@ -654,9 +896,7 @@ def output_stats(inputs, json_output=None):
output_single_stats(stats)
elif len(inputs) == 2:
if json_output is not None:
- raise ValueError(
- "Can not output to JSON when there are multiple inputs"
- )
+ raise ValueError("Can not output to JSON when there are multiple inputs")
base_stats = gather_stats(inputs[0])
head_stats = gather_stats(inputs[1])
@@ -665,6 +905,7 @@ def output_stats(inputs, json_output=None):
print("---")
print("Stats gathered on:", date.today())
+
def main():
parser = argparse.ArgumentParser(description="Summarize pystats results")
@@ -680,14 +921,14 @@ def main():
If one source is provided, its stats are printed.
If two sources are provided, comparative stats are printed.
Default is {DEFAULT_DIR}.
- """
+ """,
)
parser.add_argument(
"--json-output",
nargs="?",
type=argparse.FileType("w"),
- help="Output complete raw results to the given JSON file."
+ help="Output complete raw results to the given JSON file.",
)
args = parser.parse_args()
@@ -697,5 +938,6 @@ def main():
output_stats(args.inputs, json_output=args.json_output)
+
if __name__ == "__main__":
main()