summaryrefslogtreecommitdiffstats
path: root/Python
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2024-04-04 15:03:27 (GMT)
committerGitHub <noreply@github.com>2024-04-04 15:03:27 (GMT)
commit060a96f1a9a901b01ed304aa82b886d248ca1cb6 (patch)
treecb3e95ecac1f90440b7d3752c4aad015ea734bf0 /Python
parent63bbe77d9bb2be4db83ed09b96dd22f2a44ef55b (diff)
downloadcpython-060a96f1a9a901b01ed304aa82b886d248ca1cb6.zip
cpython-060a96f1a9a901b01ed304aa82b886d248ca1cb6.tar.gz
cpython-060a96f1a9a901b01ed304aa82b886d248ca1cb6.tar.bz2
gh-116968: Reimplement Tier 2 counters (#117144)
Introduce a unified 16-bit backoff counter type (``_Py_BackoffCounter``), shared between the Tier 1 adaptive specializer and the Tier 2 optimizer. The API used for adaptive specialization counters is changed but the behavior is (supposed to be) identical. The behavior of the Tier 2 counters is changed: - There are no longer dynamic thresholds (we never varied these). - All counters now use the same exponential backoff. - The counter for ``JUMP_BACKWARD`` starts counting down from 16. - The ``temperature`` in side exits starts counting down from 64.
Diffstat (limited to 'Python')
-rw-r--r--Python/bytecodes.c101
-rw-r--r--Python/ceval.c5
-rw-r--r--Python/ceval_macros.h31
-rw-r--r--Python/executor_cases.c.h11
-rw-r--r--Python/generated_cases.c.h87
-rw-r--r--Python/instrumentation.c8
-rw-r--r--Python/optimizer.c37
-rw-r--r--Python/specialize.c8
8 files changed, 117 insertions, 171 deletions
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index fa53c96..8af48d9 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -8,6 +8,7 @@
#include "Python.h"
#include "pycore_abstract.h" // _PyIndex_Check()
+#include "pycore_backoff.h"
#include "pycore_cell.h" // PyCell_GetRef()
#include "pycore_code.h"
#include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS
@@ -326,13 +327,13 @@ dummy_func(
specializing op(_SPECIALIZE_TO_BOOL, (counter/1, value -- value)) {
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ToBool(value, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(TO_BOOL, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@@ -551,13 +552,13 @@ dummy_func(
specializing op(_SPECIALIZE_BINARY_SUBSCR, (counter/1, container, sub -- container, sub)) {
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_BinarySubscr(container, sub, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(BINARY_SUBSCR, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@@ -698,13 +699,13 @@ dummy_func(
specializing op(_SPECIALIZE_STORE_SUBSCR, (counter/1, container, sub -- container, sub)) {
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_StoreSubscr(container, sub, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(STORE_SUBSCR, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@@ -982,13 +983,13 @@ dummy_func(
specializing op(_SPECIALIZE_SEND, (counter/1, receiver, unused -- receiver, unused)) {
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_Send(receiver, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(SEND, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@@ -1211,13 +1212,13 @@ dummy_func(
specializing op(_SPECIALIZE_UNPACK_SEQUENCE, (counter/1, seq -- seq)) {
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_UnpackSequence(seq, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(UNPACK_SEQUENCE, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
(void)seq;
(void)counter;
@@ -1280,14 +1281,14 @@ dummy_func(
specializing op(_SPECIALIZE_STORE_ATTR, (counter/1, owner -- owner)) {
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
next_instr = this_instr;
_Py_Specialize_StoreAttr(owner, next_instr, name);
DISPATCH_SAME_OPARG();
}
STAT_INC(STORE_ATTR, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@@ -1398,14 +1399,14 @@ dummy_func(
specializing op(_SPECIALIZE_LOAD_GLOBAL, (counter/1 -- )) {
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
next_instr = this_instr;
_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name);
DISPATCH_SAME_OPARG();
}
STAT_INC(LOAD_GLOBAL, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@@ -1711,7 +1712,7 @@ dummy_func(
inst(INSTRUMENTED_LOAD_SUPER_ATTR, (unused/1, unused, unused, unused -- unused, unused if (oparg & 1))) {
// cancel out the decrement that will happen in LOAD_SUPER_ATTR; we
// don't want to specialize instrumented instructions
- INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
GO_TO_INSTRUCTION(LOAD_SUPER_ATTR);
}
@@ -1723,13 +1724,13 @@ dummy_func(
specializing op(_SPECIALIZE_LOAD_SUPER_ATTR, (counter/1, global_super, class, unused -- global_super, class, unused)) {
#if ENABLE_SPECIALIZATION
int load_method = oparg & 1;
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_LoadSuperAttr(global_super, class, next_instr, load_method);
DISPATCH_SAME_OPARG();
}
STAT_INC(LOAD_SUPER_ATTR, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@@ -1836,14 +1837,14 @@ dummy_func(
specializing op(_SPECIALIZE_LOAD_ATTR, (counter/1, owner -- owner)) {
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
next_instr = this_instr;
_Py_Specialize_LoadAttr(owner, next_instr, name);
DISPATCH_SAME_OPARG();
}
STAT_INC(LOAD_ATTR, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@@ -2157,13 +2158,13 @@ dummy_func(
specializing op(_SPECIALIZE_COMPARE_OP, (counter/1, left, right -- left, right)) {
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_CompareOp(left, right, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(COMPARE_OP, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@@ -2254,13 +2255,13 @@ dummy_func(
specializing op(_SPECIALIZE_CONTAINS_OP, (counter/1, left, right -- left, right)) {
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ContainsOp(right, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(CONTAINS_OP, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@@ -2340,16 +2341,8 @@ dummy_func(
assert(oparg <= INSTR_OFFSET());
JUMPBY(-oparg);
#if ENABLE_SPECIALIZATION
- uint16_t counter = this_instr[1].cache;
- this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER);
- /* We are using unsigned values, but we really want signed values, so
- * do the 2s complement adjustment manually */
- uint32_t offset_counter = counter ^ (1 << 15);
- uint32_t threshold = tstate->interp->optimizer_backedge_threshold;
- assert((threshold & OPTIMIZER_BITS_MASK) == 0);
- // Use '>=' not '>' so that the optimizer/backoff bits do not effect the result.
- // Double-check that the opcode isn't instrumented or something:
- if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) {
+ _Py_BackoffCounter counter = this_instr[1].counter;
+ if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD) {
_Py_CODEUNIT *start = this_instr;
/* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
while (oparg > 255) {
@@ -2365,17 +2358,12 @@ dummy_func(
GOTO_TIER_TWO(executor);
}
else {
- int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
- backoff++;
- if (backoff < MIN_TIER2_BACKOFF) {
- backoff = MIN_TIER2_BACKOFF;
- }
- else if (backoff > MAX_TIER2_BACKOFF) {
- backoff = MAX_TIER2_BACKOFF;
- }
- this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff;
+ this_instr[1].counter = restart_backoff_counter(counter);
}
}
+ else {
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
+ }
#endif /* ENABLE_SPECIALIZATION */
}
@@ -2535,13 +2523,13 @@ dummy_func(
specializing op(_SPECIALIZE_FOR_ITER, (counter/1, iter -- iter)) {
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ForIter(iter, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(FOR_ITER, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@@ -3001,7 +2989,7 @@ dummy_func(
tstate, PY_MONITORING_EVENT_CALL,
frame, this_instr, function, arg);
ERROR_IF(err, error);
- INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
GO_TO_INSTRUCTION(CALL);
}
@@ -3030,13 +3018,13 @@ dummy_func(
specializing op(_SPECIALIZE_CALL, (counter/1, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) {
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_Call(callable, next_instr, oparg + (self_or_null != NULL));
DISPATCH_SAME_OPARG();
}
STAT_INC(CALL, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
@@ -3933,13 +3921,13 @@ dummy_func(
specializing op(_SPECIALIZE_BINARY_OP, (counter/1, lhs, rhs -- lhs, rhs)) {
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY);
DISPATCH_SAME_OPARG();
}
STAT_INC(BINARY_OP, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
assert(NB_ADD <= oparg);
assert(oparg <= NB_INPLACE_XOR);
@@ -3965,7 +3953,7 @@ dummy_func(
ERROR_IF(next_opcode < 0, error);
next_instr = this_instr;
if (_PyOpcode_Caches[next_opcode]) {
- INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ PAUSE_ADAPTIVE_COUNTER(next_instr[1].counter);
}
assert(next_opcode > 0 && next_opcode < 256);
opcode = next_opcode;
@@ -4157,21 +4145,22 @@ dummy_func(
tier2 op(_COLD_EXIT, (--)) {
_PyExecutorObject *previous = (_PyExecutorObject *)tstate->previous_executor;
_PyExitData *exit = &previous->exits[oparg];
- exit->temperature++;
PyCodeObject *code = _PyFrame_GetCode(frame);
_Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
- if (exit->temperature < (int32_t)tstate->interp->optimizer_side_threshold) {
+ _Py_BackoffCounter temperature = exit->temperature;
+ if (!backoff_counter_triggers(temperature)) {
+ exit->temperature = advance_backoff_counter(temperature);
GOTO_TIER_ONE(target);
}
_PyExecutorObject *executor;
if (target->op.code == ENTER_EXECUTOR) {
executor = code->co_executors->executors[target->op.arg];
Py_INCREF(executor);
- } else {
+ }
+ else {
int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor);
if (optimized <= 0) {
- int32_t new_temp = -1 * tstate->interp->optimizer_side_threshold;
- exit->temperature = (new_temp < INT16_MIN) ? INT16_MIN : new_temp;
+ exit->temperature = restart_backoff_counter(temperature);
if (optimized < 0) {
Py_DECREF(previous);
tstate->previous_executor = Py_None;
@@ -4181,7 +4170,7 @@ dummy_func(
}
}
/* We need two references. One to store in exit->executor and
- * one to keep the executor alive when executing. */
+ * one to keep the executor alive when executing. */
Py_INCREF(executor);
exit->executor = executor;
GOTO_TIER_TWO(executor);
diff --git a/Python/ceval.c b/Python/ceval.c
index f3b7316..57ae08e 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -4,6 +4,7 @@
#include "Python.h"
#include "pycore_abstract.h" // _PyIndex_Check()
+#include "pycore_backoff.h"
#include "pycore_call.h" // _PyObject_CallNoArgs()
#include "pycore_cell.h" // PyCell_GetRef()
#include "pycore_ceval.h"
@@ -822,7 +823,7 @@ resume_frame:
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)(next_instr+1);
/* Prevent the underlying instruction from specializing
* and overwriting the instrumentation. */
- INCREMENT_ADAPTIVE_COUNTER(cache->counter);
+ PAUSE_ADAPTIVE_COUNTER(cache->counter);
}
opcode = original_opcode;
DISPATCH_GOTO();
@@ -1099,7 +1100,7 @@ exit_to_trace:
printf("SIDE EXIT: [UOp ");
_PyUOpPrint(&next_uop[-1]);
printf(", exit %u, temp %d, target %d -> %s]\n",
- exit_index, exit->temperature, exit->target,
+ exit_index, exit->temperature.as_counter, exit->target,
_PyOpcode_OpName[_PyCode_CODE(_PyFrame_GetCode(frame))[exit->target].op.code]);
}
#endif
diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h
index 1194c11..224cd1d 100644
--- a/Python/ceval_macros.h
+++ b/Python/ceval_macros.h
@@ -262,7 +262,7 @@ GETITEM(PyObject *v, Py_ssize_t i) {
STAT_INC(opcode, miss); \
STAT_INC((INSTNAME), miss); \
/* The counter is always the first cache entry: */ \
- if (ADAPTIVE_COUNTER_IS_ZERO(next_instr->cache)) { \
+ if (ADAPTIVE_COUNTER_TRIGGERS(next_instr->cache)) { \
STAT_INC((INSTNAME), deopt); \
} \
} while (0)
@@ -290,29 +290,28 @@ GETITEM(PyObject *v, Py_ssize_t i) {
dtrace_function_entry(frame); \
}
-#define ADAPTIVE_COUNTER_IS_ZERO(COUNTER) \
- (((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == 0)
-
-#define ADAPTIVE_COUNTER_IS_MAX(COUNTER) \
- (((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == ((1 << MAX_BACKOFF_VALUE) - 1))
+/* This takes a uint16_t instead of a _Py_BackoffCounter,
+ * because it is used directly on the cache entry in generated code,
+ * which is always an integral type. */
+#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \
+ backoff_counter_triggers(forge_backoff_counter((COUNTER)))
#ifdef Py_GIL_DISABLED
-#define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \
- do { \
- /* gh-115999 tracks progress on addressing this. */ \
+#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
+ do { \
+ /* gh-115999 tracks progress on addressing this. */ \
static_assert(0, "The specializing interpreter is not yet thread-safe"); \
} while (0);
#else
-#define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \
- do { \
- assert(!ADAPTIVE_COUNTER_IS_ZERO((COUNTER))); \
- (COUNTER) -= (1 << ADAPTIVE_BACKOFF_BITS); \
+#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
+ do { \
+ (COUNTER) = advance_backoff_counter((COUNTER)); \
} while (0);
#endif
-#define INCREMENT_ADAPTIVE_COUNTER(COUNTER) \
- do { \
- (COUNTER) += (1 << ADAPTIVE_BACKOFF_BITS); \
+#define PAUSE_ADAPTIVE_COUNTER(COUNTER) \
+ do { \
+ (COUNTER) = pause_backoff_counter((COUNTER)); \
} while (0);
#define UNBOUNDLOCAL_ERROR_MSG \
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 9847679..8c3d41b 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -3694,21 +3694,22 @@
oparg = CURRENT_OPARG();
_PyExecutorObject *previous = (_PyExecutorObject *)tstate->previous_executor;
_PyExitData *exit = &previous->exits[oparg];
- exit->temperature++;
PyCodeObject *code = _PyFrame_GetCode(frame);
_Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
- if (exit->temperature < (int32_t)tstate->interp->optimizer_side_threshold) {
+ _Py_BackoffCounter temperature = exit->temperature;
+ if (!backoff_counter_triggers(temperature)) {
+ exit->temperature = advance_backoff_counter(temperature);
GOTO_TIER_ONE(target);
}
_PyExecutorObject *executor;
if (target->op.code == ENTER_EXECUTOR) {
executor = code->co_executors->executors[target->op.arg];
Py_INCREF(executor);
- } else {
+ }
+ else {
int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor);
if (optimized <= 0) {
- int32_t new_temp = -1 * tstate->interp->optimizer_side_threshold;
- exit->temperature = (new_temp < INT16_MIN) ? INT16_MIN : new_temp;
+ exit->temperature = restart_backoff_counter(temperature);
if (optimized < 0) {
Py_DECREF(previous);
tstate->previous_executor = Py_None;
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index 6ee794a..0116acd 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -115,13 +115,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY);
DISPATCH_SAME_OPARG();
}
STAT_INC(BINARY_OP, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
assert(NB_ADD <= oparg);
assert(oparg <= NB_INPLACE_XOR);
@@ -432,13 +432,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_BinarySubscr(container, sub, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(BINARY_SUBSCR, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _BINARY_SUBSCR
@@ -760,13 +760,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_Call(callable, next_instr, oparg + (self_or_null != NULL));
DISPATCH_SAME_OPARG();
}
STAT_INC(CALL, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
/* Skip 2 cache entries */
@@ -2036,13 +2036,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_CompareOp(left, right, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(COMPARE_OP, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _COMPARE_OP
@@ -2185,13 +2185,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ContainsOp(right, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(CONTAINS_OP, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _CONTAINS_OP
@@ -2596,13 +2596,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ForIter(iter, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(FOR_ITER, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _FOR_ITER
@@ -3026,7 +3026,7 @@
tstate, PY_MONITORING_EVENT_CALL,
frame, this_instr, function, arg);
if (err) goto error;
- INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
GO_TO_INSTRUCTION(CALL);
}
@@ -3142,7 +3142,7 @@
if (next_opcode < 0) goto error;
next_instr = this_instr;
if (_PyOpcode_Caches[next_opcode]) {
- INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ PAUSE_ADAPTIVE_COUNTER(next_instr[1].counter);
}
assert(next_opcode > 0 && next_opcode < 256);
opcode = next_opcode;
@@ -3177,7 +3177,7 @@
/* Skip 1 cache entry */
// cancel out the decrement that will happen in LOAD_SUPER_ATTR; we
// don't want to specialize instrumented instructions
- INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
GO_TO_INSTRUCTION(LOAD_SUPER_ATTR);
}
@@ -3415,16 +3415,8 @@
assert(oparg <= INSTR_OFFSET());
JUMPBY(-oparg);
#if ENABLE_SPECIALIZATION
- uint16_t counter = this_instr[1].cache;
- this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER);
- /* We are using unsigned values, but we really want signed values, so
- * do the 2s complement adjustment manually */
- uint32_t offset_counter = counter ^ (1 << 15);
- uint32_t threshold = tstate->interp->optimizer_backedge_threshold;
- assert((threshold & OPTIMIZER_BITS_MASK) == 0);
- // Use '>=' not '>' so that the optimizer/backoff bits do not effect the result.
- // Double-check that the opcode isn't instrumented or something:
- if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) {
+ _Py_BackoffCounter counter = this_instr[1].counter;
+ if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD) {
_Py_CODEUNIT *start = this_instr;
/* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
while (oparg > 255) {
@@ -3440,17 +3432,12 @@
GOTO_TIER_TWO(executor);
}
else {
- int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
- backoff++;
- if (backoff < MIN_TIER2_BACKOFF) {
- backoff = MIN_TIER2_BACKOFF;
- }
- else if (backoff > MAX_TIER2_BACKOFF) {
- backoff = MAX_TIER2_BACKOFF;
- }
- this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff;
+ this_instr[1].counter = restart_backoff_counter(counter);
}
}
+ else {
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
+ }
#endif /* ENABLE_SPECIALIZATION */
DISPATCH();
}
@@ -3543,14 +3530,14 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
next_instr = this_instr;
_Py_Specialize_LoadAttr(owner, next_instr, name);
DISPATCH_SAME_OPARG();
}
STAT_INC(LOAD_ATTR, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
/* Skip 8 cache entries */
@@ -4238,14 +4225,14 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1);
next_instr = this_instr;
_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name);
DISPATCH_SAME_OPARG();
}
STAT_INC(LOAD_GLOBAL, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
/* Skip 1 cache entry */
@@ -4442,13 +4429,13 @@
(void)counter;
#if ENABLE_SPECIALIZATION
int load_method = oparg & 1;
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_LoadSuperAttr(global_super, class, next_instr, load_method);
DISPATCH_SAME_OPARG();
}
STAT_INC(LOAD_SUPER_ATTR, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _LOAD_SUPER_ATTR
@@ -5083,13 +5070,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_Send(receiver, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(SEND, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _SEND
@@ -5271,14 +5258,14 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
next_instr = this_instr;
_Py_Specialize_StoreAttr(owner, next_instr, name);
DISPATCH_SAME_OPARG();
}
STAT_INC(STORE_ATTR, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
/* Skip 3 cache entries */
@@ -5562,13 +5549,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_StoreSubscr(container, sub, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(STORE_SUBSCR, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
// _STORE_SUBSCR
@@ -5665,13 +5652,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_ToBool(value, next_instr);
DISPATCH_SAME_OPARG();
}
STAT_INC(TO_BOOL, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
}
/* Skip 2 cache entries */
@@ -5882,13 +5869,13 @@
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
#if ENABLE_SPECIALIZATION
- if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
+ if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_UnpackSequence(seq, next_instr, oparg);
DISPATCH_SAME_OPARG();
}
STAT_INC(UNPACK_SEQUENCE, deferred);
- DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
+ ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
(void)seq;
(void)counter;
diff --git a/Python/instrumentation.c b/Python/instrumentation.c
index 018cd66..0f60290 100644
--- a/Python/instrumentation.c
+++ b/Python/instrumentation.c
@@ -590,7 +590,7 @@ de_instrument(PyCodeObject *code, int i, int event)
CHECK(_PyOpcode_Deopt[deinstrumented] == deinstrumented);
*opcode_ptr = deinstrumented;
if (_PyOpcode_Caches[deinstrumented]) {
- instr[1].cache = adaptive_counter_warmup();
+ instr[1].counter = adaptive_counter_warmup();
}
}
@@ -611,7 +611,7 @@ de_instrument_line(PyCodeObject *code, int i)
CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]);
instr->op.code = original_opcode;
if (_PyOpcode_Caches[original_opcode]) {
- instr[1].cache = adaptive_counter_warmup();
+ instr[1].counter = adaptive_counter_warmup();
}
assert(instr->op.code != INSTRUMENTED_LINE);
}
@@ -634,7 +634,7 @@ de_instrument_per_instruction(PyCodeObject *code, int i)
CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]);
*opcode_ptr = original_opcode;
if (_PyOpcode_Caches[original_opcode]) {
- instr[1].cache = adaptive_counter_warmup();
+ instr[1].counter = adaptive_counter_warmup();
}
assert(*opcode_ptr != INSTRUMENTED_INSTRUCTION);
assert(instr->op.code != INSTRUMENTED_INSTRUCTION);
@@ -667,7 +667,7 @@ instrument(PyCodeObject *code, int i)
assert(instrumented);
*opcode_ptr = instrumented;
if (_PyOpcode_Caches[deopt]) {
- instr[1].cache = adaptive_counter_warmup();
+ instr[1].counter = adaptive_counter_warmup();
}
}
}
diff --git a/Python/optimizer.c b/Python/optimizer.c
index 38ab6d3..5c69d9d 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -1,6 +1,7 @@
#include "Python.h"
#include "opcode.h"
#include "pycore_interp.h"
+#include "pycore_backoff.h"
#include "pycore_bitutils.h" // _Py_popcount32()
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
#include "pycore_opcode_metadata.h" // _PyOpcode_OpName[]
@@ -110,9 +111,7 @@ never_optimize(
_PyExecutorObject **exec,
int Py_UNUSED(stack_entries))
{
- /* Although it should be benign for this to be called,
- * it shouldn't happen, so fail in debug builds. */
- assert(0 && "never optimize should never be called");
+ // This may be called if the optimizer is reset
return 0;
}
@@ -127,25 +126,12 @@ PyTypeObject _PyDefaultOptimizer_Type = {
static _PyOptimizerObject _PyOptimizer_Default = {
PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type)
.optimize = never_optimize,
- .resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
- .backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
- .side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
};
-static uint32_t
-shift_and_offset_threshold(uint32_t threshold)
-{
- return (threshold << OPTIMIZER_BITS_IN_COUNTER) + (1 << 15);
-}
-
_PyOptimizerObject *
PyUnstable_GetOptimizer(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
- assert(interp->optimizer_backedge_threshold ==
- shift_and_offset_threshold(interp->optimizer->backedge_threshold));
- assert(interp->optimizer_resume_threshold ==
- shift_and_offset_threshold(interp->optimizer->resume_threshold));
if (interp->optimizer == &_PyOptimizer_Default) {
return NULL;
}
@@ -190,13 +176,6 @@ _Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer)
}
Py_INCREF(optimizer);
interp->optimizer = optimizer;
- interp->optimizer_backedge_threshold = shift_and_offset_threshold(optimizer->backedge_threshold);
- interp->optimizer_resume_threshold = shift_and_offset_threshold(optimizer->resume_threshold);
- interp->optimizer_side_threshold = optimizer->side_threshold;
- if (optimizer == &_PyOptimizer_Default) {
- assert(interp->optimizer_backedge_threshold > (1 << 16));
- assert(interp->optimizer_resume_threshold > (1 << 16));
- }
return old;
}
@@ -1109,7 +1088,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil
assert(exit_count < COLD_EXIT_COUNT);
for (int i = 0; i < exit_count; i++) {
executor->exits[i].executor = &COLD_EXITS[i];
- executor->exits[i].temperature = 0;
+ executor->exits[i].temperature = initial_temperature_backoff_counter();
}
int next_exit = exit_count-1;
_PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length];
@@ -1291,11 +1270,6 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
return NULL;
}
opt->optimize = uop_optimize;
- opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
- // Need a few iterations to settle specializations,
- // and to ammortize the cost of optimization.
- opt->side_threshold = 16;
- opt->backedge_threshold = 16;
return (PyObject *)opt;
}
@@ -1385,9 +1359,6 @@ PyUnstable_Optimizer_NewCounter(void)
return NULL;
}
opt->base.optimize = counter_optimize;
- opt->base.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
- opt->base.side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
- opt->base.backedge_threshold = 0;
opt->count = 0;
return (PyObject *)opt;
}
@@ -1554,7 +1525,7 @@ _Py_ExecutorClear(_PyExecutorObject *executor)
for (uint32_t i = 0; i < executor->exit_count; i++) {
Py_DECREF(executor->exits[i].executor);
executor->exits[i].executor = &COLD_EXITS[i];
- executor->exits[i].temperature = INT16_MIN;
+ executor->exits[i].temperature = initial_unreachable_backoff_counter();
}
_Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index];
assert(instruction->op.code == ENTER_EXECUTOR);
diff --git a/Python/specialize.c b/Python/specialize.c
index f1e32d0..0b4b199 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -419,22 +419,20 @@ _PyCode_Quicken(PyCodeObject *code)
int caches = _PyOpcode_Caches[opcode];
if (caches) {
// The initial value depends on the opcode
- int initial_value;
switch (opcode) {
case JUMP_BACKWARD:
- initial_value = 0;
+ instructions[i + 1].counter = initial_jump_backoff_counter();
break;
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
case POP_JUMP_IF_NONE:
case POP_JUMP_IF_NOT_NONE:
- initial_value = 0x5555; // Alternating 0, 1 bits
+ instructions[i + 1].cache = 0x5555; // Alternating 0, 1 bits
break;
default:
- initial_value = adaptive_counter_warmup();
+ instructions[i + 1].counter = adaptive_counter_warmup();
break;
}
- instructions[i + 1].cache = initial_value;
i += caches;
}
}