summaryrefslogtreecommitdiffstats
path: root/Python
diff options
context:
space:
mode:
authorMark Shannon <mark@hotpy.org>2024-02-13 14:16:37 (GMT)
committerGitHub <noreply@github.com>2024-02-13 14:16:37 (GMT)
commitf9f6156c5affc039d4ee6b6f4999daf0d5896428 (patch)
tree063a0d3da50ec32d1bdd265d0b305f9365ced7da /Python
parent7cce8576226249461baa91c4a89770a1823b44a4 (diff)
downloadcpython-f9f6156c5affc039d4ee6b6f4999daf0d5896428.zip
cpython-f9f6156c5affc039d4ee6b6f4999daf0d5896428.tar.gz
cpython-f9f6156c5affc039d4ee6b6f4999daf0d5896428.tar.bz2
GH-113710: Backedge counter improvements. (GH-115166)
Diffstat (limited to 'Python')
-rw-r--r--Python/bytecodes.c29
-rw-r--r--Python/generated_cases.c.h29
-rw-r--r--Python/optimizer.c48
-rw-r--r--Python/pylifecycle.c4
-rw-r--r--Python/pystate.c10
5 files changed, 70 insertions, 50 deletions
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index f7c7e36..2ad5878 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -2318,13 +2318,16 @@ dummy_func(
assert(oparg <= INSTR_OFFSET());
JUMPBY(-oparg);
#if ENABLE_SPECIALIZATION
- this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER);
+ uint16_t counter = this_instr[1].cache;
+ this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER);
/* We are using unsigned values, but we really want signed values, so
- * do the 2s complement comparison manually */
- uint16_t ucounter = this_instr[1].cache + (1 << 15);
- uint16_t threshold = tstate->interp->optimizer_backedge_threshold + (1 << 15);
+ * do the 2s complement adjustment manually */
+ uint32_t offset_counter = counter ^ (1 << 15);
+ uint32_t threshold = tstate->interp->optimizer_backedge_threshold;
+ assert((threshold & OPTIMIZER_BITS_MASK) == 0);
+ // Use '>=' not '>' so that the optimizer/backoff bits do not effect the result.
// Double-check that the opcode isn't instrumented or something:
- if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
+ if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) {
OPT_STAT_INC(attempts);
_Py_CODEUNIT *start = this_instr;
/* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
@@ -2338,18 +2341,18 @@ dummy_func(
// Rewind and enter the executor:
assert(start->op.code == ENTER_EXECUTOR);
next_instr = start;
- this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
+ this_instr[1].cache &= OPTIMIZER_BITS_MASK;
}
else {
- int backoff = this_instr[1].cache & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
- if (backoff < MINIMUM_TIER2_BACKOFF) {
- backoff = MINIMUM_TIER2_BACKOFF;
+ int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
+ backoff++;
+ if (backoff < MIN_TIER2_BACKOFF) {
+ backoff = MIN_TIER2_BACKOFF;
}
- else if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) {
- backoff++;
+ else if (backoff > MAX_TIER2_BACKOFF) {
+ backoff = MAX_TIER2_BACKOFF;
}
- assert(backoff <= 15 - OPTIMIZER_BITS_IN_COUNTER);
- this_instr[1].cache = ((1 << 16) - ((1 << OPTIMIZER_BITS_IN_COUNTER) << backoff)) | backoff;
+ this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff;
}
}
#endif /* ENABLE_SPECIALIZATION */
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index afb6650..a49223e 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -3263,13 +3263,16 @@
assert(oparg <= INSTR_OFFSET());
JUMPBY(-oparg);
#if ENABLE_SPECIALIZATION
- this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER);
+ uint16_t counter = this_instr[1].cache;
+ this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER);
/* We are using unsigned values, but we really want signed values, so
- * do the 2s complement comparison manually */
- uint16_t ucounter = this_instr[1].cache + (1 << 15);
- uint16_t threshold = tstate->interp->optimizer_backedge_threshold + (1 << 15);
+ * do the 2s complement adjustment manually */
+ uint32_t offset_counter = counter ^ (1 << 15);
+ uint32_t threshold = tstate->interp->optimizer_backedge_threshold;
+ assert((threshold & OPTIMIZER_BITS_MASK) == 0);
+ // Use '>=' not '>' so that the optimizer/backoff bits do not effect the result.
// Double-check that the opcode isn't instrumented or something:
- if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
+ if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) {
OPT_STAT_INC(attempts);
_Py_CODEUNIT *start = this_instr;
/* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
@@ -3283,18 +3286,18 @@
// Rewind and enter the executor:
assert(start->op.code == ENTER_EXECUTOR);
next_instr = start;
- this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
+ this_instr[1].cache &= OPTIMIZER_BITS_MASK;
}
else {
- int backoff = this_instr[1].cache & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
- if (backoff < MINIMUM_TIER2_BACKOFF) {
- backoff = MINIMUM_TIER2_BACKOFF;
+ int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
+ backoff++;
+ if (backoff < MIN_TIER2_BACKOFF) {
+ backoff = MIN_TIER2_BACKOFF;
}
- else if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) {
- backoff++;
+ else if (backoff > MAX_TIER2_BACKOFF) {
+ backoff = MAX_TIER2_BACKOFF;
}
- assert(backoff <= 15 - OPTIMIZER_BITS_IN_COUNTER);
- this_instr[1].cache = ((1 << 16) - ((1 << OPTIMIZER_BITS_IN_COUNTER) << backoff)) | backoff;
+ this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff;
}
}
#endif /* ENABLE_SPECIALIZATION */
diff --git a/Python/optimizer.c b/Python/optimizer.c
index f31f831..13df8c1 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -109,6 +109,9 @@ never_optimize(
_PyExecutorObject **exec,
int Py_UNUSED(stack_entries))
{
+ /* Although it should be benign for this to be called,
+ * it shouldn't happen, so fail in debug builds. */
+ assert(0 && "never optimize should never be called");
return 0;
}
@@ -120,13 +123,19 @@ PyTypeObject _PyDefaultOptimizer_Type = {
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
};
-_PyOptimizerObject _PyOptimizer_Default = {
+static _PyOptimizerObject _PyOptimizer_Default = {
PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type)
.optimize = never_optimize,
- .resume_threshold = INT16_MAX,
- .backedge_threshold = INT16_MAX,
+ .resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
+ .backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
};
+static uint32_t
+shift_and_offset_threshold(uint16_t threshold)
+{
+ return (threshold << OPTIMIZER_BITS_IN_COUNTER) + (1 << 15);
+}
+
_PyOptimizerObject *
PyUnstable_GetOptimizer(void)
{
@@ -134,24 +143,33 @@ PyUnstable_GetOptimizer(void)
if (interp->optimizer == &_PyOptimizer_Default) {
return NULL;
}
- assert(interp->optimizer_backedge_threshold == interp->optimizer->backedge_threshold);
- assert(interp->optimizer_resume_threshold == interp->optimizer->resume_threshold);
+ assert(interp->optimizer_backedge_threshold ==
+ shift_and_offset_threshold(interp->optimizer->backedge_threshold));
+ assert(interp->optimizer_resume_threshold ==
+ shift_and_offset_threshold(interp->optimizer->resume_threshold));
Py_INCREF(interp->optimizer);
return interp->optimizer;
}
-void
-PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
+_PyOptimizerObject *
+_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer)
{
- PyInterpreterState *interp = _PyInterpreterState_GET();
if (optimizer == NULL) {
optimizer = &_PyOptimizer_Default;
}
_PyOptimizerObject *old = interp->optimizer;
Py_INCREF(optimizer);
interp->optimizer = optimizer;
- interp->optimizer_backedge_threshold = optimizer->backedge_threshold;
- interp->optimizer_resume_threshold = optimizer->resume_threshold;
+ interp->optimizer_backedge_threshold = shift_and_offset_threshold(optimizer->backedge_threshold);
+ interp->optimizer_resume_threshold = shift_and_offset_threshold(optimizer->resume_threshold);
+ return old;
+}
+
+void
+PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
+{
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ _PyOptimizerObject *old = _Py_SetOptimizer(interp, optimizer);
Py_DECREF(old);
}
@@ -860,10 +878,10 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
return NULL;
}
opt->optimize = uop_optimize;
- opt->resume_threshold = INT16_MAX;
- // Need at least 3 iterations to settle specializations.
- // A few lower bits of the counter are reserved for other flags.
- opt->backedge_threshold = 16 << OPTIMIZER_BITS_IN_COUNTER;
+ opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
+ // Need a few iterations to settle specializations,
+ // and to ammortize the cost of optimization.
+ opt->backedge_threshold = 16;
return (PyObject *)opt;
}
@@ -950,7 +968,7 @@ PyUnstable_Optimizer_NewCounter(void)
return NULL;
}
opt->base.optimize = counter_optimize;
- opt->base.resume_threshold = INT16_MAX;
+ opt->base.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
opt->base.backedge_threshold = 0;
opt->count = 0;
return (PyObject *)opt;
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 2300180..7e4c07b 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -1627,8 +1627,8 @@ finalize_modules(PyThreadState *tstate)
// Invalidate all executors and turn off tier 2 optimizer
_Py_Executors_InvalidateAll(interp);
- Py_XDECREF(interp->optimizer);
- interp->optimizer = &_PyOptimizer_Default;
+ _PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL);
+ Py_XDECREF(old);
// Stop watching __builtin__ modifications
PyDict_Unwatch(0, interp->builtins);
diff --git a/Python/pystate.c b/Python/pystate.c
index 937c430..996f465 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -625,9 +625,7 @@ init_interpreter(PyInterpreterState *interp,
}
interp->sys_profile_initialized = false;
interp->sys_trace_initialized = false;
- interp->optimizer = &_PyOptimizer_Default;
- interp->optimizer_backedge_threshold = _PyOptimizer_Default.backedge_threshold;
- interp->optimizer_resume_threshold = _PyOptimizer_Default.backedge_threshold;
+ (void)_Py_SetOptimizer(interp, NULL);
interp->next_func_version = 1;
interp->executor_list_head = NULL;
if (interp != &runtime->_main_interpreter) {
@@ -780,10 +778,8 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
tstate->_status.cleared = 0;
}
- Py_CLEAR(interp->optimizer);
- interp->optimizer = &_PyOptimizer_Default;
- interp->optimizer_backedge_threshold = _PyOptimizer_Default.backedge_threshold;
- interp->optimizer_resume_threshold = _PyOptimizer_Default.backedge_threshold;
+ _PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL);
+ Py_DECREF(old);
/* It is possible that any of the objects below have a finalizer
that runs Python code or otherwise relies on a thread state