diff options
author | Mark Shannon <mark@hotpy.org> | 2024-02-13 14:16:37 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-13 14:16:37 (GMT) |
commit | f9f6156c5affc039d4ee6b6f4999daf0d5896428 (patch) | |
tree | 063a0d3da50ec32d1bdd265d0b305f9365ced7da /Python | |
parent | 7cce8576226249461baa91c4a89770a1823b44a4 (diff) | |
download | cpython-f9f6156c5affc039d4ee6b6f4999daf0d5896428.zip cpython-f9f6156c5affc039d4ee6b6f4999daf0d5896428.tar.gz cpython-f9f6156c5affc039d4ee6b6f4999daf0d5896428.tar.bz2 |
GH-113710: Backedge counter improvements. (GH-115166)
Diffstat (limited to 'Python')
-rw-r--r-- | Python/bytecodes.c | 29 | ||||
-rw-r--r-- | Python/generated_cases.c.h | 29 | ||||
-rw-r--r-- | Python/optimizer.c | 48 | ||||
-rw-r--r-- | Python/pylifecycle.c | 4 | ||||
-rw-r--r-- | Python/pystate.c | 10 |
5 files changed, 70 insertions, 50 deletions
diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f7c7e36..2ad5878 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2318,13 +2318,16 @@ dummy_func( assert(oparg <= INSTR_OFFSET()); JUMPBY(-oparg); #if ENABLE_SPECIALIZATION - this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER); + uint16_t counter = this_instr[1].cache; + this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER); /* We are using unsigned values, but we really want signed values, so - * do the 2s complement comparison manually */ - uint16_t ucounter = this_instr[1].cache + (1 << 15); - uint16_t threshold = tstate->interp->optimizer_backedge_threshold + (1 << 15); + * do the 2s complement adjustment manually */ + uint32_t offset_counter = counter ^ (1 << 15); + uint32_t threshold = tstate->interp->optimizer_backedge_threshold; + assert((threshold & OPTIMIZER_BITS_MASK) == 0); + // Use '>=' not '>' so that the optimizer/backoff bits do not effect the result. // Double-check that the opcode isn't instrumented or something: - if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) { + if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) { OPT_STAT_INC(attempts); _Py_CODEUNIT *start = this_instr; /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ @@ -2338,18 +2341,18 @@ dummy_func( // Rewind and enter the executor: assert(start->op.code == ENTER_EXECUTOR); next_instr = start; - this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); + this_instr[1].cache &= OPTIMIZER_BITS_MASK; } else { - int backoff = this_instr[1].cache & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); - if (backoff < MINIMUM_TIER2_BACKOFF) { - backoff = MINIMUM_TIER2_BACKOFF; + int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK; + backoff++; + if (backoff < MIN_TIER2_BACKOFF) { + backoff = MIN_TIER2_BACKOFF; } - else if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) { - backoff++; + else if (backoff > MAX_TIER2_BACKOFF) { + backoff = MAX_TIER2_BACKOFF; } - assert(backoff <= 15 - OPTIMIZER_BITS_IN_COUNTER); - this_instr[1].cache = ((1 << 16) - ((1 << OPTIMIZER_BITS_IN_COUNTER) << backoff)) | backoff; + this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff; } } #endif /* ENABLE_SPECIALIZATION */ diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index afb6650..a49223e 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3263,13 +3263,16 @@ assert(oparg <= INSTR_OFFSET()); JUMPBY(-oparg); #if ENABLE_SPECIALIZATION - this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER); + uint16_t counter = this_instr[1].cache; + this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER); /* We are using unsigned values, but we really want signed values, so - * do the 2s complement comparison manually */ - uint16_t ucounter = this_instr[1].cache + (1 << 15); - uint16_t threshold = tstate->interp->optimizer_backedge_threshold + (1 << 15); + * do the 2s complement adjustment manually */ + uint32_t offset_counter = counter ^ (1 << 15); + uint32_t threshold = tstate->interp->optimizer_backedge_threshold; + assert((threshold & OPTIMIZER_BITS_MASK) == 0); + // Use '>=' not '>' so that the optimizer/backoff bits do not effect the result. // Double-check that the opcode isn't instrumented or something: - if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) { + if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) { OPT_STAT_INC(attempts); _Py_CODEUNIT *start = this_instr; /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ @@ -3283,18 +3286,18 @@ // Rewind and enter the executor: assert(start->op.code == ENTER_EXECUTOR); next_instr = start; - this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); + this_instr[1].cache &= OPTIMIZER_BITS_MASK; } else { - int backoff = this_instr[1].cache & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); - if (backoff < MINIMUM_TIER2_BACKOFF) { - backoff = MINIMUM_TIER2_BACKOFF; + int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK; + backoff++; + if (backoff < MIN_TIER2_BACKOFF) { + backoff = MIN_TIER2_BACKOFF; } - else if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) { - backoff++; + else if (backoff > MAX_TIER2_BACKOFF) { + backoff = MAX_TIER2_BACKOFF; } - assert(backoff <= 15 - OPTIMIZER_BITS_IN_COUNTER); - this_instr[1].cache = ((1 << 16) - ((1 << OPTIMIZER_BITS_IN_COUNTER) << backoff)) | backoff; + this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff; } } #endif /* ENABLE_SPECIALIZATION */ diff --git a/Python/optimizer.c b/Python/optimizer.c index f31f831..13df8c1 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -109,6 +109,9 @@ never_optimize( _PyExecutorObject **exec, int Py_UNUSED(stack_entries)) { + /* Although it should be benign for this to be called, + * it shouldn't happen, so fail in debug builds. */ + assert(0 && "never optimize should never be called"); return 0; } @@ -120,13 +123,19 @@ PyTypeObject _PyDefaultOptimizer_Type = { .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, }; -_PyOptimizerObject _PyOptimizer_Default = { +static _PyOptimizerObject _PyOptimizer_Default = { PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type) .optimize = never_optimize, - .resume_threshold = INT16_MAX, - .backedge_threshold = INT16_MAX, + .resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD, + .backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD, }; +static uint32_t +shift_and_offset_threshold(uint16_t threshold) +{ + return (threshold << OPTIMIZER_BITS_IN_COUNTER) + (1 << 15); +} + _PyOptimizerObject * PyUnstable_GetOptimizer(void) { @@ -134,24 +143,33 @@ PyUnstable_GetOptimizer(void) if (interp->optimizer == &_PyOptimizer_Default) { return NULL; } - assert(interp->optimizer_backedge_threshold == interp->optimizer->backedge_threshold); - assert(interp->optimizer_resume_threshold == interp->optimizer->resume_threshold); + assert(interp->optimizer_backedge_threshold == + shift_and_offset_threshold(interp->optimizer->backedge_threshold)); + assert(interp->optimizer_resume_threshold == + shift_and_offset_threshold(interp->optimizer->resume_threshold)); Py_INCREF(interp->optimizer); return interp->optimizer; } -void -PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer) +_PyOptimizerObject * +_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer) { - PyInterpreterState *interp = _PyInterpreterState_GET(); if (optimizer == NULL) { optimizer = &_PyOptimizer_Default; } _PyOptimizerObject *old = interp->optimizer; Py_INCREF(optimizer); interp->optimizer = optimizer; - interp->optimizer_backedge_threshold = optimizer->backedge_threshold; - interp->optimizer_resume_threshold = optimizer->resume_threshold; + interp->optimizer_backedge_threshold = shift_and_offset_threshold(optimizer->backedge_threshold); + interp->optimizer_resume_threshold = shift_and_offset_threshold(optimizer->resume_threshold); + return old; +} + +void +PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyOptimizerObject *old = _Py_SetOptimizer(interp, optimizer); Py_DECREF(old); } @@ -860,10 +878,10 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) return NULL; } opt->optimize = uop_optimize; - opt->resume_threshold = INT16_MAX; - // Need at least 3 iterations to settle specializations. - // A few lower bits of the counter are reserved for other flags. - opt->backedge_threshold = 16 << OPTIMIZER_BITS_IN_COUNTER; + opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD; + // Need a few iterations to settle specializations, + // and to ammortize the cost of optimization. + opt->backedge_threshold = 16; return (PyObject *)opt; } @@ -950,7 +968,7 @@ PyUnstable_Optimizer_NewCounter(void) return NULL; } opt->base.optimize = counter_optimize; - opt->base.resume_threshold = INT16_MAX; + opt->base.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD; opt->base.backedge_threshold = 0; opt->count = 0; return (PyObject *)opt; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 2300180..7e4c07b 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1627,8 +1627,8 @@ finalize_modules(PyThreadState *tstate) // Invalidate all executors and turn off tier 2 optimizer _Py_Executors_InvalidateAll(interp); - Py_XDECREF(interp->optimizer); - interp->optimizer = &_PyOptimizer_Default; + _PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL); + Py_XDECREF(old); // Stop watching __builtin__ modifications PyDict_Unwatch(0, interp->builtins); diff --git a/Python/pystate.c b/Python/pystate.c index 937c430..996f465 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -625,9 +625,7 @@ init_interpreter(PyInterpreterState *interp, } interp->sys_profile_initialized = false; interp->sys_trace_initialized = false; - interp->optimizer = &_PyOptimizer_Default; - interp->optimizer_backedge_threshold = _PyOptimizer_Default.backedge_threshold; - interp->optimizer_resume_threshold = _PyOptimizer_Default.backedge_threshold; + (void)_Py_SetOptimizer(interp, NULL); interp->next_func_version = 1; interp->executor_list_head = NULL; if (interp != &runtime->_main_interpreter) { @@ -780,10 +778,8 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) tstate->_status.cleared = 0; } - Py_CLEAR(interp->optimizer); - interp->optimizer = &_PyOptimizer_Default; - interp->optimizer_backedge_threshold = _PyOptimizer_Default.backedge_threshold; - interp->optimizer_resume_threshold = _PyOptimizer_Default.backedge_threshold; + _PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL); + Py_DECREF(old); /* It is possible that any of the objects below have a finalizer that runs Python code or otherwise relies on a thread state |