summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Droettboom <mdboom@gmail.com>2022-12-12 14:50:43 (GMT)
committerGitHub <noreply@github.com>2022-12-12 14:50:43 (GMT)
commit1583c6e326a8454d3c806763620e1329bf6b7cbe (patch)
treed18e4187fa1f991f73d730439325e1532f6ee001
parente4ea33b17807d99ed737f800d9b0006957c008d2 (diff)
downloadcpython-1583c6e326a8454d3c806763620e1329bf6b7cbe.zip
cpython-1583c6e326a8454d3c806763620e1329bf6b7cbe.tar.gz
cpython-1583c6e326a8454d3c806763620e1329bf6b7cbe.tar.bz2
GH-100143: Improve collecting pystats for parts of runs (GH-100144)
* pystats off by default * Add -Xpystats flag * Always dump pystats, even if turned off
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2022-12-09-14-27-36.gh-issue-100143.5g9rb4.rst3
-rw-r--r--Python/initconfig.c15
-rw-r--r--Python/specialize.c7
-rw-r--r--Tools/scripts/summarize_stats.py48
4 files changed, 48 insertions, 25 deletions
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-12-09-14-27-36.gh-issue-100143.5g9rb4.rst b/Misc/NEWS.d/next/Core and Builtins/2022-12-09-14-27-36.gh-issue-100143.5g9rb4.rst
new file mode 100644
index 0000000..20a25f8
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-12-09-14-27-36.gh-issue-100143.5g9rb4.rst
@@ -0,0 +1,3 @@
+When built with ``--enable-pystats``, stats collection is now off by
+default. To enable it early at startup, pass the ``-Xpystats`` flag. Stats
+are now always dumped, even if switched off.
diff --git a/Python/initconfig.c b/Python/initconfig.c
index 64ae987..d05099c 100644
--- a/Python/initconfig.c
+++ b/Python/initconfig.c
@@ -129,7 +129,14 @@ The following implementation-specific options are available:\n\
\n\
-X int_max_str_digits=number: limit the size of int<->str conversions.\n\
This helps avoid denial of service attacks when parsing untrusted data.\n\
- The default is sys.int_info.default_max_str_digits. 0 disables.";
+ The default is sys.int_info.default_max_str_digits. 0 disables."
+
+#ifdef Py_STATS
+"\n\
+\n\
+-X pystats: Enable pystats collection at startup."
+#endif
+;
/* Envvars that don't have equivalent command-line options are listed first */
static const char usage_envvars[] =
@@ -2186,6 +2193,12 @@ config_read(PyConfig *config, int compute_path_config)
config->show_ref_count = 1;
}
+#ifdef Py_STATS
+ if (config_get_xoption(config, L"pystats")) {
+ _py_stats = &_py_stats_struct;
+ }
+#endif
+
status = config_read_complex_options(config);
if (_PyStatus_EXCEPTION(status)) {
return status;
diff --git a/Python/specialize.c b/Python/specialize.c
index 7545a77..785088e 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -18,7 +18,7 @@
#ifdef Py_STATS
PyStats _py_stats_struct = { 0 };
-PyStats *_py_stats = &_py_stats_struct;
+PyStats *_py_stats = NULL;
#define ADD_STAT_TO_DICT(res, field) \
do { \
@@ -205,9 +205,6 @@ _Py_StatsClear(void)
void
_Py_PrintSpecializationStats(int to_file)
{
- if (_py_stats == NULL) {
- return;
- }
FILE *out = stderr;
if (to_file) {
/* Write to a file instead of stderr. */
@@ -238,7 +235,7 @@ _Py_PrintSpecializationStats(int to_file)
else {
fprintf(out, "Specialization stats:\n");
}
- print_stats(out, _py_stats);
+ print_stats(out, &_py_stats_struct);
if (out != stderr) {
fclose(out);
}
diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py
index c15501b..c30a60e 100644
--- a/Tools/scripts/summarize_stats.py
+++ b/Tools/scripts/summarize_stats.py
@@ -34,6 +34,16 @@ opmap = dict(sorted(opmap.items()))
TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count"
+def format_ratio(num, den):
+ """
+ Format a ratio as a percentage. When the denominator is 0, returns the empty
+ string.
+ """
+ if den == 0:
+ return ""
+ else:
+ return f"{num/den:.01%}"
+
def join_rows(a_rows, b_rows):
"""
Joins two tables together, side-by-side, where the first column in each is a
@@ -87,7 +97,7 @@ def calculate_specialization_stats(family_stats, total):
continue
else:
label = key
- rows.append((f"{label:>12}", f"{family_stats[key]:>12}", f"{100*family_stats[key]/total:0.1f}%"))
+ rows.append((f"{label:>12}", f"{family_stats[key]:>12}", format_ratio(family_stats[key], total)))
return rows
def calculate_specialization_success_failure(family_stats):
@@ -100,7 +110,7 @@ def calculate_specialization_success_failure(family_stats):
label = key[len("specialization."):]
label = label[0].upper() + label[1:]
val = family_stats.get(key, 0)
- rows.append((label, val, f"{100*val/total_attempts:0.1f}%"))
+ rows.append((label, val, format_ratio(val, total_attempts)))
return rows
def calculate_specialization_failure_kinds(name, family_stats, defines):
@@ -118,7 +128,7 @@ def calculate_specialization_failure_kinds(name, family_stats, defines):
for value, index in failures:
if not value:
continue
- rows.append((kind_to_text(index, defines, name), value, f"{100*value/total_failures:0.1f}%"))
+ rows.append((kind_to_text(index, defines, name), value, format_ratio(value, total_failures)))
return rows
def print_specialization_stats(name, family_stats, defines):
@@ -318,11 +328,11 @@ def calculate_execution_counts(opcode_stats, total):
for (count, name, miss) in counts:
cumulative += count
if miss:
- miss = f"{100*miss/count:0.1f}%"
+ miss = format_ratio(miss, count)
else:
miss = ""
- rows.append((name, count, f"{100*count/total:0.1f}%",
- f"{100*cumulative/total:0.1f}%", miss))
+ rows.append((name, count, format_ratio(count, total),
+ format_ratio(cumulative, total), miss))
return rows
def emit_execution_counts(opcode_stats, total):
@@ -386,9 +396,9 @@ def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats):
def calculate_specialization_effectiveness(opcode_stats, total):
basic, not_specialized, specialized = categorized_counts(opcode_stats)
return [
- ("Basic", basic, f"{basic*100/total:0.1f}%"),
- ("Not specialized", not_specialized, f"{not_specialized*100/total:0.1f}%"),
- ("Specialized", specialized, f"{specialized*100/total:0.1f}%"),
+ ("Basic", basic, format_ratio(basic, total)),
+ ("Not specialized", not_specialized, format_ratio(not_specialized, total)),
+ ("Specialized", specialized, format_ratio(specialized, total)),
]
def emit_specialization_overview(opcode_stats, total):
@@ -405,7 +415,7 @@ def emit_specialization_overview(opcode_stats, total):
counts.sort(reverse=True)
if total:
with Section(f"{title} by instruction", 3):
- rows = [ (name, count, f"{100*count/total:0.1f}%") for (count, name) in counts[:10] ]
+ rows = [ (name, count, format_ratio(count, total)) for (count, name) in counts[:10] ]
emit_table(("Name", "Count:", "Ratio:"), rows)
def emit_comparative_specialization_overview(base_opcode_stats, base_total, head_opcode_stats, head_total):
@@ -432,15 +442,15 @@ def calculate_call_stats(stats):
rows = []
for key, value in stats.items():
if "Calls to" in key:
- rows.append((key, value, f"{100*value/total:0.1f}%"))
+ rows.append((key, value, format_ratio(value, total)))
elif key.startswith("Calls "):
name, index = key[:-1].split("[")
index = int(index)
label = name + " (" + pretty(defines[index][0]) + ")"
- rows.append((label, value, f"{100*value/total:0.1f}%"))
+ rows.append((label, value, format_ratio(value, total)))
for key, value in stats.items():
if key.startswith("Frame"):
- rows.append((key, value, f"{100*value/total:0.1f}%"))
+ rows.append((key, value, format_ratio(value, total)))
return rows
def emit_call_stats(stats):
@@ -468,13 +478,13 @@ def calculate_object_stats(stats):
for key, value in stats.items():
if key.startswith("Object"):
if "materialize" in key:
- ratio = f"{100*value/total_materializations:0.1f}%"
+ ratio = format_ratio(value, total_materializations)
elif "allocations" in key:
- ratio = f"{100*value/total_allocations:0.1f}%"
+ ratio = format_ratio(value, total_allocations)
elif "increfs" in key:
- ratio = f"{100*value/total_increfs:0.1f}%"
+ ratio = format_ratio(value, total_increfs)
elif "decrefs" in key:
- ratio = f"{100*value/total_decrefs:0.1f}%"
+ ratio = format_ratio(value, total_decrefs)
else:
ratio = ""
label = key[6:].strip()
@@ -517,8 +527,8 @@ def emit_pair_counts(opcode_stats, total):
for (count, pair) in itertools.islice(pair_counts, 100):
i, j = pair
cumulative += count
- rows.append((opname[i] + " " + opname[j], count, f"{100*count/total:0.1f}%",
- f"{100*cumulative/total:0.1f}%"))
+ rows.append((opname[i] + " " + opname[j], count, format_ratio(count, total),
+ format_ratio(cumulative, total)))
emit_table(("Pair", "Count:", "Self:", "Cumulative:"),
rows
)