"""Print a summary of specialization stats for all files in the default stats folders. """ import collections import os.path import opcode from datetime import date import itertools import argparse if os.name == "nt": DEFAULT_DIR = "c:\\temp\\py_stats\\" else: DEFAULT_DIR = "/tmp/py_stats/" #Create list of all instruction names specialized = iter(opcode._specialized_instructions) opname = ["<0>"] for name in opcode.opname[1:]: if name.startswith("<"): try: name = next(specialized) except StopIteration: pass opname.append(name) TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count" def print_specialization_stats(name, family_stats, defines): if "specializable" not in family_stats: return total = sum(family_stats.get(kind, 0) for kind in TOTAL) if total == 0: return with Section(name, 3, f"specialization stats for {name} family"): rows = [] for key in sorted(family_stats): if key.startswith("specialization.failure_kinds"): continue if key in ("specialization.hit", "specialization.miss"): label = key[len("specialization."):] elif key == "execution_count": label = "unquickened" elif key in ("specialization.success", "specialization.failure", "specializable"): continue elif key.startswith("pair"): continue else: label = key rows.append((f"{label:>12}", f"{family_stats[key]:>12}", f"{100*family_stats[key]/total:0.1f}%")) emit_table(("Kind", "Count", "Ratio"), rows) print_title("Specialization attempts", 4) total_attempts = 0 for key in ("specialization.success", "specialization.failure"): total_attempts += family_stats.get(key, 0) rows = [] for key in ("specialization.success", "specialization.failure"): label = key[len("specialization."):] label = label[0].upper() + label[1:] val = family_stats.get(key, 0) rows.append((label, val, f"{100*val/total_attempts:0.1f}%")) emit_table(("", "Count:", "Ratio:"), rows) total_failures = family_stats.get("specialization.failure", 0) failure_kinds = [ 0 ] * 30 for key in family_stats: if not key.startswith("specialization.failure_kind"): continue _, index = key[:-1].split("[") index = int(index) failure_kinds[index] = family_stats[key] failures = [(value, index) for (index, value) in enumerate(failure_kinds)] failures.sort(reverse=True) rows = [] for value, index in failures: if not value: continue rows.append((kind_to_text(index, defines, name), value, f"{100*value/total_failures:0.1f}%")) emit_table(("Failure kind", "Count:", "Ratio:"), rows) def gather_stats(): stats = collections.Counter() for filename in os.listdir(DEFAULT_DIR): with open(os.path.join(DEFAULT_DIR, filename)) as fd: for line in fd: key, value = line.split(":") key = key.strip() value = int(value) stats[key] += value return stats def extract_opcode_stats(stats): opcode_stats = [ {} for _ in range(256) ] for key, value in stats.items(): if not key.startswith("opcode"): continue n, _, rest = key[7:].partition("]") opcode_stats[int(n)][rest.strip(".")] = value return opcode_stats def parse_kinds(spec_src): defines = collections.defaultdict(list) for line in spec_src: line = line.strip() if not line.startswith("#define SPEC_FAIL_"): continue line = line[len("#define SPEC_FAIL_"):] name, val = line.split() defines[int(val.strip())].append(name.strip()) return defines def pretty(defname): return defname.replace("_", " ").lower() def kind_to_text(kind, defines, opname): if kind < 7: return pretty(defines[kind][0]) if opname.endswith("ATTR"): opname = "ATTR" if opname.endswith("SUBSCR"): opname = "SUBSCR" if opname.startswith("PRECALL"): opname = "CALL" for name in defines[kind]: if name.startswith(opname): return pretty(name[len(opname)+1:]) return "kind " + str(kind) def categorized_counts(opcode_stats): basic = 0 specialized = 0 not_specialized = 0 specialized_instructions = { op for op in opcode._specialized_instructions if "__" not in op and "ADAPTIVE" not in op} adaptive_instructions = { op for op in opcode._specialized_instructions if "ADAPTIVE" in op} for i, opcode_stat in enumerate(opcode_stats): if "execution_count" not in opcode_stat: continue count = opcode_stat['execution_count'] name = opname[i] if "specializable" in opcode_stat: not_specialized += count elif name in adaptive_instructions: not_specialized += count elif name in specialized_instructions: miss = opcode_stat.get("specialization.miss", 0) not_specialized += miss specialized += count - miss else: basic += count return basic, not_specialized, specialized def print_title(name, level=2): print("#"*level, name) print() class Section: def __init__(self, title, level=2, summary=None): self.title = title self.level = level if summary is None: self.summary = title.lower() else: self.summary = summary def __enter__(self): print_title(self.title, self.level) print("
") print("", self.summary, "") print() return self def __exit__(*args): print() print("
") print() def emit_table(header, rows): width = len(header) header_line = "|" under_line = "|" for item in header: under = "---" if item.endswith(":"): item = item[:-1] under += ":" header_line += item + " | " under_line += under + "|" print(header_line) print(under_line) for row in rows: if width is not None and len(row) != width: raise ValueError("Wrong number of elements in row '" + str(rows) + "'") print("|", " | ".join(str(i) for i in row), "|") print() def emit_execution_counts(opcode_stats, total): with Section("Execution counts", summary="execution counts for all instructions"): counts = [] for i, opcode_stat in enumerate(opcode_stats): if "execution_count" in opcode_stat: count = opcode_stat['execution_count'] miss = 0 if "specializable" not in opcode_stat: miss = opcode_stat.get("specialization.miss") counts.append((count, opname[i], miss)) counts.sort(reverse=True) cumulative = 0 rows = [] for (count, name, miss) in counts: cumulative += count if miss: miss = f"{100*miss/count:0.1f}%" else: miss = "" rows.append((name, count, f"{100*count/total:0.1f}%", f"{100*cumulative/total:0.1f}%", miss)) emit_table( ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"), rows ) def emit_specialization_stats(opcode_stats): spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c") with open(spec_path) as spec_src: defines = parse_kinds(spec_src) with Section("Specialization stats", summary="specialization stats by family"): for i, opcode_stat in enumerate(opcode_stats): name = opname[i] print_specialization_stats(name, opcode_stat, defines) def emit_specialization_overview(opcode_stats, total): basic, not_specialized, specialized = categorized_counts(opcode_stats) with Section("Specialization effectiveness"): emit_table(("Instructions", "Count:", "Ratio:"), ( ("Basic", basic, f"{basic*100/total:0.1f}%"), ("Not specialized", not_specialized, f"{not_specialized*100/total:0.1f}%"), ("Specialized", specialized, f"{specialized*100/total:0.1f}%"), )) def emit_call_stats(stats): with Section("Call stats", summary="Inlined calls and frame stats"): total = 0 for key, value in stats.items(): if "Calls to" in key: total += value rows = [] for key, value in stats.items(): if "Calls to" in key: rows.append((key, value, f"{100*value/total:0.1f}%")) for key, value in stats.items(): if key.startswith("Frame"): rows.append((key, value, f"{100*value/total:0.1f}%")) emit_table(("", "Count:", "Ratio:"), rows) def emit_object_stats(stats): with Section("Object stats", summary="allocations, frees and dict materializatons"): total = stats.get("Object new values") rows = [] for key, value in stats.items(): if key.startswith("Object"): if "materialize" in key: materialize = f"{100*value/total:0.1f}%" else: materialize = "" label = key[6:].strip() label = label[0].upper() + label[1:] rows.append((label, value, materialize)) emit_table(("", "Count:", "Ratio:"), rows) def get_total(opcode_stats): total = 0 for opcode_stat in opcode_stats: if "execution_count" in opcode_stat: total += opcode_stat['execution_count'] return total def emit_pair_counts(opcode_stats, total): with Section("Pair counts", summary="Pair counts for top 100 pairs"): pair_counts = [] for i, opcode_stat in enumerate(opcode_stats): if i == 0: continue for key, value in opcode_stat.items(): if key.startswith("pair_count"): x, _, _ = key[11:].partition("]") if value: pair_counts.append((value, (i, int(x)))) pair_counts.sort(reverse=True) cumulative = 0 rows = [] for (count, pair) in itertools.islice(pair_counts, 100): i, j = pair cumulative += count rows.append((opname[i] + " " + opname[j], count, f"{100*count/total:0.1f}%", f"{100*cumulative/total:0.1f}%")) emit_table(("Pair", "Count:", "Self:", "Cumulative:"), rows ) def main(): stats = gather_stats() opcode_stats = extract_opcode_stats(stats) total = get_total(opcode_stats) emit_execution_counts(opcode_stats, total) emit_pair_counts(opcode_stats, total) emit_specialization_stats(opcode_stats) emit_specialization_overview(opcode_stats, total) emit_call_stats(stats) emit_object_stats(stats) print("---") print("Stats gathered on:", date.today()) if __name__ == "__main__": main()