diff options
author | Guido van Rossum <guido@python.org> | 2024-04-04 15:03:27 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-04 15:03:27 (GMT) |
commit | 060a96f1a9a901b01ed304aa82b886d248ca1cb6 (patch) | |
tree | cb3e95ecac1f90440b7d3752c4aad015ea734bf0 /Python | |
parent | 63bbe77d9bb2be4db83ed09b96dd22f2a44ef55b (diff) | |
download | cpython-060a96f1a9a901b01ed304aa82b886d248ca1cb6.zip cpython-060a96f1a9a901b01ed304aa82b886d248ca1cb6.tar.gz cpython-060a96f1a9a901b01ed304aa82b886d248ca1cb6.tar.bz2 |
gh-116968: Reimplement Tier 2 counters (#117144)
Introduce a unified 16-bit backoff counter type (``_Py_BackoffCounter``),
shared between the Tier 1 adaptive specializer and the Tier 2 optimizer. The
API used for adaptive specialization counters is changed but the behavior is
(supposed to be) identical.
The behavior of the Tier 2 counters is changed:
- There are no longer dynamic thresholds (we never varied these).
- All counters now use the same exponential backoff.
- The counter for ``JUMP_BACKWARD`` starts counting down from 16.
- The ``temperature`` in side exits starts counting down from 64.
Diffstat (limited to 'Python')
-rw-r--r-- | Python/bytecodes.c | 101 | ||||
-rw-r--r-- | Python/ceval.c | 5 | ||||
-rw-r--r-- | Python/ceval_macros.h | 31 | ||||
-rw-r--r-- | Python/executor_cases.c.h | 11 | ||||
-rw-r--r-- | Python/generated_cases.c.h | 87 | ||||
-rw-r--r-- | Python/instrumentation.c | 8 | ||||
-rw-r--r-- | Python/optimizer.c | 37 | ||||
-rw-r--r-- | Python/specialize.c | 8 |
8 files changed, 117 insertions, 171 deletions
diff --git a/Python/bytecodes.c b/Python/bytecodes.c index fa53c96..8af48d9 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -8,6 +8,7 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() +#include "pycore_backoff.h" #include "pycore_cell.h" // PyCell_GetRef() #include "pycore_code.h" #include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS @@ -326,13 +327,13 @@ dummy_func( specializing op(_SPECIALIZE_TO_BOOL, (counter/1, value -- value)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_ToBool(value, next_instr); DISPATCH_SAME_OPARG(); } STAT_INC(TO_BOOL, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -551,13 +552,13 @@ dummy_func( specializing op(_SPECIALIZE_BINARY_SUBSCR, (counter/1, container, sub -- container, sub)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_BinarySubscr(container, sub, next_instr); DISPATCH_SAME_OPARG(); } STAT_INC(BINARY_SUBSCR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -698,13 +699,13 @@ dummy_func( specializing op(_SPECIALIZE_STORE_SUBSCR, (counter/1, container, sub -- container, sub)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_StoreSubscr(container, sub, next_instr); DISPATCH_SAME_OPARG(); } STAT_INC(STORE_SUBSCR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -982,13 +983,13 @@ dummy_func( specializing op(_SPECIALIZE_SEND, (counter/1, receiver, unused -- receiver, unused)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_Send(receiver, next_instr); DISPATCH_SAME_OPARG(); } STAT_INC(SEND, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -1211,13 +1212,13 @@ dummy_func( specializing op(_SPECIALIZE_UNPACK_SEQUENCE, (counter/1, seq -- seq)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_UnpackSequence(seq, next_instr, oparg); DISPATCH_SAME_OPARG(); } STAT_INC(UNPACK_SEQUENCE, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ (void)seq; (void)counter; @@ -1280,14 +1281,14 @@ dummy_func( specializing op(_SPECIALIZE_STORE_ATTR, (counter/1, owner -- owner)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); next_instr = this_instr; _Py_Specialize_StoreAttr(owner, next_instr, name); DISPATCH_SAME_OPARG(); } STAT_INC(STORE_ATTR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -1398,14 +1399,14 @@ dummy_func( specializing op(_SPECIALIZE_LOAD_GLOBAL, (counter/1 -- )) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; _Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name); DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_GLOBAL, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -1711,7 +1712,7 @@ dummy_func( inst(INSTRUMENTED_LOAD_SUPER_ATTR, (unused/1, unused, unused, unused -- unused, unused if (oparg & 1))) { // cancel out the decrement that will happen in LOAD_SUPER_ATTR; we // don't want to specialize instrumented instructions - INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); GO_TO_INSTRUCTION(LOAD_SUPER_ATTR); } @@ -1723,13 +1724,13 @@ dummy_func( specializing op(_SPECIALIZE_LOAD_SUPER_ATTR, (counter/1, global_super, class, unused -- global_super, class, unused)) { #if ENABLE_SPECIALIZATION int load_method = oparg & 1; - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_LoadSuperAttr(global_super, class, next_instr, load_method); DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_SUPER_ATTR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -1836,14 +1837,14 @@ dummy_func( specializing op(_SPECIALIZE_LOAD_ATTR, (counter/1, owner -- owner)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; _Py_Specialize_LoadAttr(owner, next_instr, name); DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_ATTR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -2157,13 +2158,13 @@ dummy_func( specializing op(_SPECIALIZE_COMPARE_OP, (counter/1, left, right -- left, right)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_CompareOp(left, right, next_instr, oparg); DISPATCH_SAME_OPARG(); } STAT_INC(COMPARE_OP, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -2254,13 +2255,13 @@ dummy_func( specializing op(_SPECIALIZE_CONTAINS_OP, (counter/1, left, right -- left, right)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_ContainsOp(right, next_instr); DISPATCH_SAME_OPARG(); } STAT_INC(CONTAINS_OP, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -2340,16 +2341,8 @@ dummy_func( assert(oparg <= INSTR_OFFSET()); JUMPBY(-oparg); #if ENABLE_SPECIALIZATION - uint16_t counter = this_instr[1].cache; - this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER); - /* We are using unsigned values, but we really want signed values, so - * do the 2s complement adjustment manually */ - uint32_t offset_counter = counter ^ (1 << 15); - uint32_t threshold = tstate->interp->optimizer_backedge_threshold; - assert((threshold & OPTIMIZER_BITS_MASK) == 0); - // Use '>=' not '>' so that the optimizer/backoff bits do not effect the result. - // Double-check that the opcode isn't instrumented or something: - if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) { + _Py_BackoffCounter counter = this_instr[1].counter; + if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD) { _Py_CODEUNIT *start = this_instr; /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ while (oparg > 255) { @@ -2365,17 +2358,12 @@ dummy_func( GOTO_TIER_TWO(executor); } else { - int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK; - backoff++; - if (backoff < MIN_TIER2_BACKOFF) { - backoff = MIN_TIER2_BACKOFF; - } - else if (backoff > MAX_TIER2_BACKOFF) { - backoff = MAX_TIER2_BACKOFF; - } - this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff; + this_instr[1].counter = restart_backoff_counter(counter); } } + else { + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); + } #endif /* ENABLE_SPECIALIZATION */ } @@ -2535,13 +2523,13 @@ dummy_func( specializing op(_SPECIALIZE_FOR_ITER, (counter/1, iter -- iter)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_ForIter(iter, next_instr, oparg); DISPATCH_SAME_OPARG(); } STAT_INC(FOR_ITER, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -3001,7 +2989,7 @@ dummy_func( tstate, PY_MONITORING_EVENT_CALL, frame, this_instr, function, arg); ERROR_IF(err, error); - INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); GO_TO_INSTRUCTION(CALL); } @@ -3030,13 +3018,13 @@ dummy_func( specializing op(_SPECIALIZE_CALL, (counter/1, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_Call(callable, next_instr, oparg + (self_or_null != NULL)); DISPATCH_SAME_OPARG(); } STAT_INC(CALL, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -3933,13 +3921,13 @@ dummy_func( specializing op(_SPECIALIZE_BINARY_OP, (counter/1, lhs, rhs -- lhs, rhs)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY); DISPATCH_SAME_OPARG(); } STAT_INC(BINARY_OP, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ assert(NB_ADD <= oparg); assert(oparg <= NB_INPLACE_XOR); @@ -3965,7 +3953,7 @@ dummy_func( ERROR_IF(next_opcode < 0, error); next_instr = this_instr; if (_PyOpcode_Caches[next_opcode]) { - INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + PAUSE_ADAPTIVE_COUNTER(next_instr[1].counter); } assert(next_opcode > 0 && next_opcode < 256); opcode = next_opcode; @@ -4157,21 +4145,22 @@ dummy_func( tier2 op(_COLD_EXIT, (--)) { _PyExecutorObject *previous = (_PyExecutorObject *)tstate->previous_executor; _PyExitData *exit = &previous->exits[oparg]; - exit->temperature++; PyCodeObject *code = _PyFrame_GetCode(frame); _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; - if (exit->temperature < (int32_t)tstate->interp->optimizer_side_threshold) { + _Py_BackoffCounter temperature = exit->temperature; + if (!backoff_counter_triggers(temperature)) { + exit->temperature = advance_backoff_counter(temperature); GOTO_TIER_ONE(target); } _PyExecutorObject *executor; if (target->op.code == ENTER_EXECUTOR) { executor = code->co_executors->executors[target->op.arg]; Py_INCREF(executor); - } else { + } + else { int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); if (optimized <= 0) { - int32_t new_temp = -1 * tstate->interp->optimizer_side_threshold; - exit->temperature = (new_temp < INT16_MIN) ? INT16_MIN : new_temp; + exit->temperature = restart_backoff_counter(temperature); if (optimized < 0) { Py_DECREF(previous); tstate->previous_executor = Py_None; @@ -4181,7 +4170,7 @@ dummy_func( } } /* We need two references. One to store in exit->executor and - * one to keep the executor alive when executing. */ + * one to keep the executor alive when executing. */ Py_INCREF(executor); exit->executor = executor; GOTO_TIER_TWO(executor); diff --git a/Python/ceval.c b/Python/ceval.c index f3b7316..57ae08e 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4,6 +4,7 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() +#include "pycore_backoff.h" #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_cell.h" // PyCell_GetRef() #include "pycore_ceval.h" @@ -822,7 +823,7 @@ resume_frame: _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(next_instr+1); /* Prevent the underlying instruction from specializing * and overwriting the instrumentation. */ - INCREMENT_ADAPTIVE_COUNTER(cache->counter); + PAUSE_ADAPTIVE_COUNTER(cache->counter); } opcode = original_opcode; DISPATCH_GOTO(); @@ -1099,7 +1100,7 @@ exit_to_trace: printf("SIDE EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); printf(", exit %u, temp %d, target %d -> %s]\n", - exit_index, exit->temperature, exit->target, + exit_index, exit->temperature.as_counter, exit->target, _PyOpcode_OpName[_PyCode_CODE(_PyFrame_GetCode(frame))[exit->target].op.code]); } #endif diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 1194c11..224cd1d 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -262,7 +262,7 @@ GETITEM(PyObject *v, Py_ssize_t i) { STAT_INC(opcode, miss); \ STAT_INC((INSTNAME), miss); \ /* The counter is always the first cache entry: */ \ - if (ADAPTIVE_COUNTER_IS_ZERO(next_instr->cache)) { \ + if (ADAPTIVE_COUNTER_TRIGGERS(next_instr->cache)) { \ STAT_INC((INSTNAME), deopt); \ } \ } while (0) @@ -290,29 +290,28 @@ GETITEM(PyObject *v, Py_ssize_t i) { dtrace_function_entry(frame); \ } -#define ADAPTIVE_COUNTER_IS_ZERO(COUNTER) \ - (((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == 0) - -#define ADAPTIVE_COUNTER_IS_MAX(COUNTER) \ - (((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == ((1 << MAX_BACKOFF_VALUE) - 1)) +/* This takes a uint16_t instead of a _Py_BackoffCounter, + * because it is used directly on the cache entry in generated code, + * which is always an integral type. */ +#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \ + backoff_counter_triggers(forge_backoff_counter((COUNTER))) #ifdef Py_GIL_DISABLED -#define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \ - do { \ - /* gh-115999 tracks progress on addressing this. */ \ +#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \ + do { \ + /* gh-115999 tracks progress on addressing this. */ \ static_assert(0, "The specializing interpreter is not yet thread-safe"); \ } while (0); #else -#define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \ - do { \ - assert(!ADAPTIVE_COUNTER_IS_ZERO((COUNTER))); \ - (COUNTER) -= (1 << ADAPTIVE_BACKOFF_BITS); \ +#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \ + do { \ + (COUNTER) = advance_backoff_counter((COUNTER)); \ } while (0); #endif -#define INCREMENT_ADAPTIVE_COUNTER(COUNTER) \ - do { \ - (COUNTER) += (1 << ADAPTIVE_BACKOFF_BITS); \ +#define PAUSE_ADAPTIVE_COUNTER(COUNTER) \ + do { \ + (COUNTER) = pause_backoff_counter((COUNTER)); \ } while (0); #define UNBOUNDLOCAL_ERROR_MSG \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 9847679..8c3d41b 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3694,21 +3694,22 @@ oparg = CURRENT_OPARG(); _PyExecutorObject *previous = (_PyExecutorObject *)tstate->previous_executor; _PyExitData *exit = &previous->exits[oparg]; - exit->temperature++; PyCodeObject *code = _PyFrame_GetCode(frame); _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; - if (exit->temperature < (int32_t)tstate->interp->optimizer_side_threshold) { + _Py_BackoffCounter temperature = exit->temperature; + if (!backoff_counter_triggers(temperature)) { + exit->temperature = advance_backoff_counter(temperature); GOTO_TIER_ONE(target); } _PyExecutorObject *executor; if (target->op.code == ENTER_EXECUTOR) { executor = code->co_executors->executors[target->op.arg]; Py_INCREF(executor); - } else { + } + else { int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); if (optimized <= 0) { - int32_t new_temp = -1 * tstate->interp->optimizer_side_threshold; - exit->temperature = (new_temp < INT16_MIN) ? INT16_MIN : new_temp; + exit->temperature = restart_backoff_counter(temperature); if (optimized < 0) { Py_DECREF(previous); tstate->previous_executor = Py_None; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 6ee794a..0116acd 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -115,13 +115,13 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY); DISPATCH_SAME_OPARG(); } STAT_INC(BINARY_OP, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ assert(NB_ADD <= oparg); assert(oparg <= NB_INPLACE_XOR); @@ -432,13 +432,13 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_BinarySubscr(container, sub, next_instr); DISPATCH_SAME_OPARG(); } STAT_INC(BINARY_SUBSCR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } // _BINARY_SUBSCR @@ -760,13 +760,13 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_Call(callable, next_instr, oparg + (self_or_null != NULL)); DISPATCH_SAME_OPARG(); } STAT_INC(CALL, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } /* Skip 2 cache entries */ @@ -2036,13 +2036,13 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_CompareOp(left, right, next_instr, oparg); DISPATCH_SAME_OPARG(); } STAT_INC(COMPARE_OP, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } // _COMPARE_OP @@ -2185,13 +2185,13 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_ContainsOp(right, next_instr); DISPATCH_SAME_OPARG(); } STAT_INC(CONTAINS_OP, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } // _CONTAINS_OP @@ -2596,13 +2596,13 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_ForIter(iter, next_instr, oparg); DISPATCH_SAME_OPARG(); } STAT_INC(FOR_ITER, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } // _FOR_ITER @@ -3026,7 +3026,7 @@ tstate, PY_MONITORING_EVENT_CALL, frame, this_instr, function, arg); if (err) goto error; - INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); GO_TO_INSTRUCTION(CALL); } @@ -3142,7 +3142,7 @@ if (next_opcode < 0) goto error; next_instr = this_instr; if (_PyOpcode_Caches[next_opcode]) { - INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + PAUSE_ADAPTIVE_COUNTER(next_instr[1].counter); } assert(next_opcode > 0 && next_opcode < 256); opcode = next_opcode; @@ -3177,7 +3177,7 @@ /* Skip 1 cache entry */ // cancel out the decrement that will happen in LOAD_SUPER_ATTR; we // don't want to specialize instrumented instructions - INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); GO_TO_INSTRUCTION(LOAD_SUPER_ATTR); } @@ -3415,16 +3415,8 @@ assert(oparg <= INSTR_OFFSET()); JUMPBY(-oparg); #if ENABLE_SPECIALIZATION - uint16_t counter = this_instr[1].cache; - this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER); - /* We are using unsigned values, but we really want signed values, so - * do the 2s complement adjustment manually */ - uint32_t offset_counter = counter ^ (1 << 15); - uint32_t threshold = tstate->interp->optimizer_backedge_threshold; - assert((threshold & OPTIMIZER_BITS_MASK) == 0); - // Use '>=' not '>' so that the optimizer/backoff bits do not effect the result. - // Double-check that the opcode isn't instrumented or something: - if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) { + _Py_BackoffCounter counter = this_instr[1].counter; + if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD) { _Py_CODEUNIT *start = this_instr; /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ while (oparg > 255) { @@ -3440,17 +3432,12 @@ GOTO_TIER_TWO(executor); } else { - int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK; - backoff++; - if (backoff < MIN_TIER2_BACKOFF) { - backoff = MIN_TIER2_BACKOFF; - } - else if (backoff > MAX_TIER2_BACKOFF) { - backoff = MAX_TIER2_BACKOFF; - } - this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff; + this_instr[1].counter = restart_backoff_counter(counter); } } + else { + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); + } #endif /* ENABLE_SPECIALIZATION */ DISPATCH(); } @@ -3543,14 +3530,14 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; _Py_Specialize_LoadAttr(owner, next_instr, name); DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_ATTR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } /* Skip 8 cache entries */ @@ -4238,14 +4225,14 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; _Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name); DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_GLOBAL, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } /* Skip 1 cache entry */ @@ -4442,13 +4429,13 @@ (void)counter; #if ENABLE_SPECIALIZATION int load_method = oparg & 1; - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_LoadSuperAttr(global_super, class, next_instr, load_method); DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_SUPER_ATTR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } // _LOAD_SUPER_ATTR @@ -5083,13 +5070,13 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_Send(receiver, next_instr); DISPATCH_SAME_OPARG(); } STAT_INC(SEND, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } // _SEND @@ -5271,14 +5258,14 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); next_instr = this_instr; _Py_Specialize_StoreAttr(owner, next_instr, name); DISPATCH_SAME_OPARG(); } STAT_INC(STORE_ATTR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } /* Skip 3 cache entries */ @@ -5562,13 +5549,13 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_StoreSubscr(container, sub, next_instr); DISPATCH_SAME_OPARG(); } STAT_INC(STORE_SUBSCR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } // _STORE_SUBSCR @@ -5665,13 +5652,13 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_ToBool(value, next_instr); DISPATCH_SAME_OPARG(); } STAT_INC(TO_BOOL, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } /* Skip 2 cache entries */ @@ -5882,13 +5869,13 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_UnpackSequence(seq, next_instr, oparg); DISPATCH_SAME_OPARG(); } STAT_INC(UNPACK_SEQUENCE, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ (void)seq; (void)counter; diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 018cd66..0f60290 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -590,7 +590,7 @@ de_instrument(PyCodeObject *code, int i, int event) CHECK(_PyOpcode_Deopt[deinstrumented] == deinstrumented); *opcode_ptr = deinstrumented; if (_PyOpcode_Caches[deinstrumented]) { - instr[1].cache = adaptive_counter_warmup(); + instr[1].counter = adaptive_counter_warmup(); } } @@ -611,7 +611,7 @@ de_instrument_line(PyCodeObject *code, int i) CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]); instr->op.code = original_opcode; if (_PyOpcode_Caches[original_opcode]) { - instr[1].cache = adaptive_counter_warmup(); + instr[1].counter = adaptive_counter_warmup(); } assert(instr->op.code != INSTRUMENTED_LINE); } @@ -634,7 +634,7 @@ de_instrument_per_instruction(PyCodeObject *code, int i) CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]); *opcode_ptr = original_opcode; if (_PyOpcode_Caches[original_opcode]) { - instr[1].cache = adaptive_counter_warmup(); + instr[1].counter = adaptive_counter_warmup(); } assert(*opcode_ptr != INSTRUMENTED_INSTRUCTION); assert(instr->op.code != INSTRUMENTED_INSTRUCTION); @@ -667,7 +667,7 @@ instrument(PyCodeObject *code, int i) assert(instrumented); *opcode_ptr = instrumented; if (_PyOpcode_Caches[deopt]) { - instr[1].cache = adaptive_counter_warmup(); + instr[1].counter = adaptive_counter_warmup(); } } } diff --git a/Python/optimizer.c b/Python/optimizer.c index 38ab6d3..5c69d9d 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1,6 +1,7 @@ #include "Python.h" #include "opcode.h" #include "pycore_interp.h" +#include "pycore_backoff.h" #include "pycore_bitutils.h" // _Py_popcount32() #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_opcode_metadata.h" // _PyOpcode_OpName[] @@ -110,9 +111,7 @@ never_optimize( _PyExecutorObject **exec, int Py_UNUSED(stack_entries)) { - /* Although it should be benign for this to be called, - * it shouldn't happen, so fail in debug builds. */ - assert(0 && "never optimize should never be called"); + // This may be called if the optimizer is reset return 0; } @@ -127,25 +126,12 @@ PyTypeObject _PyDefaultOptimizer_Type = { static _PyOptimizerObject _PyOptimizer_Default = { PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type) .optimize = never_optimize, - .resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD, - .backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD, - .side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD, }; -static uint32_t -shift_and_offset_threshold(uint32_t threshold) -{ - return (threshold << OPTIMIZER_BITS_IN_COUNTER) + (1 << 15); -} - _PyOptimizerObject * PyUnstable_GetOptimizer(void) { PyInterpreterState *interp = _PyInterpreterState_GET(); - assert(interp->optimizer_backedge_threshold == - shift_and_offset_threshold(interp->optimizer->backedge_threshold)); - assert(interp->optimizer_resume_threshold == - shift_and_offset_threshold(interp->optimizer->resume_threshold)); if (interp->optimizer == &_PyOptimizer_Default) { return NULL; } @@ -190,13 +176,6 @@ _Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer) } Py_INCREF(optimizer); interp->optimizer = optimizer; - interp->optimizer_backedge_threshold = shift_and_offset_threshold(optimizer->backedge_threshold); - interp->optimizer_resume_threshold = shift_and_offset_threshold(optimizer->resume_threshold); - interp->optimizer_side_threshold = optimizer->side_threshold; - if (optimizer == &_PyOptimizer_Default) { - assert(interp->optimizer_backedge_threshold > (1 << 16)); - assert(interp->optimizer_resume_threshold > (1 << 16)); - } return old; } @@ -1109,7 +1088,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil assert(exit_count < COLD_EXIT_COUNT); for (int i = 0; i < exit_count; i++) { executor->exits[i].executor = &COLD_EXITS[i]; - executor->exits[i].temperature = 0; + executor->exits[i].temperature = initial_temperature_backoff_counter(); } int next_exit = exit_count-1; _PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length]; @@ -1291,11 +1270,6 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) return NULL; } opt->optimize = uop_optimize; - opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD; - // Need a few iterations to settle specializations, - // and to ammortize the cost of optimization. - opt->side_threshold = 16; - opt->backedge_threshold = 16; return (PyObject *)opt; } @@ -1385,9 +1359,6 @@ PyUnstable_Optimizer_NewCounter(void) return NULL; } opt->base.optimize = counter_optimize; - opt->base.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD; - opt->base.side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD; - opt->base.backedge_threshold = 0; opt->count = 0; return (PyObject *)opt; } @@ -1554,7 +1525,7 @@ _Py_ExecutorClear(_PyExecutorObject *executor) for (uint32_t i = 0; i < executor->exit_count; i++) { Py_DECREF(executor->exits[i].executor); executor->exits[i].executor = &COLD_EXITS[i]; - executor->exits[i].temperature = INT16_MIN; + executor->exits[i].temperature = initial_unreachable_backoff_counter(); } _Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index]; assert(instruction->op.code == ENTER_EXECUTOR); diff --git a/Python/specialize.c b/Python/specialize.c index f1e32d0..0b4b199 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -419,22 +419,20 @@ _PyCode_Quicken(PyCodeObject *code) int caches = _PyOpcode_Caches[opcode]; if (caches) { // The initial value depends on the opcode - int initial_value; switch (opcode) { case JUMP_BACKWARD: - initial_value = 0; + instructions[i + 1].counter = initial_jump_backoff_counter(); break; case POP_JUMP_IF_FALSE: case POP_JUMP_IF_TRUE: case POP_JUMP_IF_NONE: case POP_JUMP_IF_NOT_NONE: - initial_value = 0x5555; // Alternating 0, 1 bits + instructions[i + 1].cache = 0x5555; // Alternating 0, 1 bits break; default: - initial_value = adaptive_counter_warmup(); + instructions[i + 1].counter = adaptive_counter_warmup(); break; } - instructions[i + 1].cache = initial_value; i += caches; } } |