diff options
author | Mark Shannon <mark@hotpy.org> | 2021-10-20 18:53:48 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-10-20 18:53:48 (GMT) |
commit | bc85eb7a4f16e9e2b6fb713be2466ebb132fd7f2 (patch) | |
tree | fb3984e35d68d41b840f4b431df18772e75742bd | |
parent | d89fb9a5a610a257014d112bdceef73d7df14082 (diff) | |
download | cpython-bc85eb7a4f16e9e2b6fb713be2466ebb132fd7f2.zip cpython-bc85eb7a4f16e9e2b6fb713be2466ebb132fd7f2.tar.gz cpython-bc85eb7a4f16e9e2b6fb713be2466ebb132fd7f2.tar.bz2 |
bpo-45527: Don't count cache hits, just misses. (GH-29092)
-rw-r--r-- | Include/internal/pycore_code.h | 47 | ||||
-rw-r--r-- | Python/ceval.c | 53 | ||||
-rw-r--r-- | Python/specialize.c | 40 |
3 files changed, 33 insertions, 107 deletions
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 482bd7e..622829f 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -253,53 +253,6 @@ PyAPI_FUNC(PyObject *) _PyCode_GetVarnames(PyCodeObject *); PyAPI_FUNC(PyObject *) _PyCode_GetCellvars(PyCodeObject *); PyAPI_FUNC(PyObject *) _PyCode_GetFreevars(PyCodeObject *); - -/* Cache hits and misses */ - -static inline uint8_t -saturating_increment(uint8_t c) -{ - return c<<1; -} - -static inline uint8_t -saturating_decrement(uint8_t c) -{ - return (c>>1) + 128; -} - -static inline uint8_t -saturating_zero(void) -{ - return 255; -} - -/* Starting value for saturating counter. - * Technically this should be 1, but that is likely to - * cause a bit of thrashing when we optimize then get an immediate miss. - * We want to give the counter a change to stabilize, so we start at 3. - */ -static inline uint8_t -saturating_start(void) -{ - return saturating_zero()<<3; -} - -static inline void -record_cache_hit(_PyAdaptiveEntry *entry) { - entry->counter = saturating_increment(entry->counter); -} - -static inline void -record_cache_miss(_PyAdaptiveEntry *entry) { - entry->counter = saturating_decrement(entry->counter); -} - -static inline int -too_many_cache_misses(_PyAdaptiveEntry *entry) { - return entry->counter == saturating_zero(); -} - #define ADAPTIVE_CACHE_BACKOFF 64 static inline void diff --git a/Python/ceval.c b/Python/ceval.c index f4186da..adc7b53 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -13,7 +13,7 @@ #include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_call.h" // _PyObject_FastCallDictTstate() #include "pycore_ceval.h" // _PyEval_SignalAsyncExc() -#include "pycore_code.h" // saturating_increment() +#include "pycore_code.h" #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_long.h" // _PyLong_GetZero() #include "pycore_object.h" // _PyObject_GC_TRACK() @@ -1452,11 +1452,6 @@ eval_frame_handle_pending(PyThreadState *tstate) #define UPDATE_PREV_INSTR_OPARG(instr, oparg) ((uint8_t*)(instr))[-1] = (oparg) -static inline void -record_hit_inline(_Py_CODEUNIT *next_instr, int oparg) -{ - UPDATE_PREV_INSTR_OPARG(next_instr, saturating_increment(oparg)); -} #define GLOBALS() frame->f_globals #define BUILTINS() frame->f_builtins @@ -1480,7 +1475,6 @@ record_hit_inline(_Py_CODEUNIT *next_instr, int oparg) res = ep->me_value; \ DEOPT_IF(res == NULL, LOAD_##attr_or_method); \ STAT_INC(LOAD_##attr_or_method, hit); \ - record_cache_hit(cache0); \ Py_INCREF(res); static int @@ -1976,7 +1970,6 @@ check_eval_breaker: DEOPT_IF(!PyLong_CheckExact(left), BINARY_MULTIPLY); DEOPT_IF(!PyLong_CheckExact(right), BINARY_MULTIPLY); STAT_INC(BINARY_MULTIPLY, hit); - record_hit_inline(next_instr, oparg); PyObject *prod = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); SET_SECOND(prod); Py_DECREF(right); @@ -1994,7 +1987,6 @@ check_eval_breaker: DEOPT_IF(!PyFloat_CheckExact(left), BINARY_MULTIPLY); DEOPT_IF(!PyFloat_CheckExact(right), BINARY_MULTIPLY); STAT_INC(BINARY_MULTIPLY, hit); - record_hit_inline(next_instr, oparg); double dprod = ((PyFloatObject *)left)->ob_fval * ((PyFloatObject *)right)->ob_fval; PyObject *prod = PyFloat_FromDouble(dprod); @@ -2103,7 +2095,6 @@ check_eval_breaker: DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_ADD); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD); STAT_INC(BINARY_ADD, hit); - record_hit_inline(next_instr, oparg); PyObject *res = PyUnicode_Concat(left, right); STACK_SHRINK(1); SET_TOP(res); @@ -2132,7 +2123,6 @@ check_eval_breaker: PyObject *var = GETLOCAL(next_oparg); DEOPT_IF(var != left, BINARY_ADD); STAT_INC(BINARY_ADD, hit); - record_hit_inline(next_instr, oparg); GETLOCAL(next_oparg) = NULL; Py_DECREF(left); STACK_SHRINK(1); @@ -2150,7 +2140,6 @@ check_eval_breaker: DEOPT_IF(!PyFloat_CheckExact(left), BINARY_ADD); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD); STAT_INC(BINARY_ADD, hit); - record_hit_inline(next_instr, oparg); double dsum = ((PyFloatObject *)left)->ob_fval + ((PyFloatObject *)right)->ob_fval; PyObject *sum = PyFloat_FromDouble(dsum); @@ -2170,7 +2159,6 @@ check_eval_breaker: DEOPT_IF(!PyLong_CheckExact(left), BINARY_ADD); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD); STAT_INC(BINARY_ADD, hit); - record_hit_inline(next_instr, oparg); PyObject *sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); SET_SECOND(sum); Py_DECREF(right); @@ -2241,8 +2229,6 @@ check_eval_breaker: assert(((PyLongObject *)_PyLong_GetZero())->ob_digit[0] == 0); Py_ssize_t index = ((PyLongObject*)sub)->ob_digit[0]; DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR); - - record_hit_inline(next_instr, oparg); STAT_INC(BINARY_SUBSCR, hit); PyObject *res = PyList_GET_ITEM(list, index); assert(res != NULL); @@ -2266,8 +2252,6 @@ check_eval_breaker: assert(((PyLongObject *)_PyLong_GetZero())->ob_digit[0] == 0); Py_ssize_t index = ((PyLongObject*)sub)->ob_digit[0]; DEOPT_IF(index >= PyTuple_GET_SIZE(tuple), BINARY_SUBSCR); - - record_hit_inline(next_instr, oparg); STAT_INC(BINARY_SUBSCR, hit); PyObject *res = PyTuple_GET_ITEM(tuple, index); assert(res != NULL); @@ -2282,7 +2266,6 @@ check_eval_breaker: TARGET(BINARY_SUBSCR_DICT) { PyObject *dict = SECOND(); DEOPT_IF(!PyDict_CheckExact(SECOND()), BINARY_SUBSCR); - record_hit_inline(next_instr, oparg); STAT_INC(BINARY_SUBSCR, hit); PyObject *sub = TOP(); PyObject *res = PyDict_GetItemWithError(dict, sub); @@ -3258,7 +3241,6 @@ check_eval_breaker: PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index; PyObject *res = ep->me_value; DEOPT_IF(res == NULL, LOAD_GLOBAL); - record_cache_hit(cache0); STAT_INC(LOAD_GLOBAL, hit); Py_INCREF(res); PUSH(res); @@ -3279,7 +3261,6 @@ check_eval_breaker: PyDictKeyEntry *ep = DK_ENTRIES(bdict->ma_keys) + cache0->index; PyObject *res = ep->me_value; DEOPT_IF(res == NULL, LOAD_GLOBAL); - record_cache_hit(cache0); STAT_INC(LOAD_GLOBAL, hit); Py_INCREF(res); PUSH(res); @@ -3702,7 +3683,6 @@ check_eval_breaker: res = values->values[cache0->index]; DEOPT_IF(res == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - record_cache_hit(cache0); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); @@ -3742,7 +3722,6 @@ check_eval_breaker: res = ep->me_value; DEOPT_IF(res == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - record_cache_hit(cache0); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); @@ -3763,7 +3742,6 @@ check_eval_breaker: res = *(PyObject **)addr; DEOPT_IF(res == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - record_cache_hit(cache0); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); @@ -3805,7 +3783,6 @@ check_eval_breaker: PyDictValues *values = *(PyDictValues **)(((char *)owner) + tp->tp_inline_values_offset); DEOPT_IF(values == NULL, STORE_ATTR); STAT_INC(STORE_ATTR, hit); - record_cache_hit(cache0); int index = cache0->index; STACK_SHRINK(1); PyObject *value = POP(); @@ -3843,7 +3820,6 @@ check_eval_breaker: PyObject *old_value = ep->me_value; DEOPT_IF(old_value == NULL, STORE_ATTR); STAT_INC(STORE_ATTR, hit); - record_cache_hit(cache0); STACK_SHRINK(1); PyObject *value = POP(); ep->me_value = value; @@ -3869,7 +3845,6 @@ check_eval_breaker: DEOPT_IF(tp->tp_version_tag != cache1->tp_version, STORE_ATTR); char *addr = (char *)owner + cache0->index; STAT_INC(STORE_ATTR, hit); - record_cache_hit(cache0); STACK_SHRINK(1); PyObject *value = POP(); PyObject *old_value = *(PyObject **)addr; @@ -4527,7 +4502,6 @@ check_eval_breaker: PyObject *self = TOP(); PyTypeObject *self_cls = Py_TYPE(self); SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; _PyAttrCache *cache1 = &caches[-1].attr; _PyObjectCache *cache2 = &caches[-2].obj; @@ -4538,7 +4512,6 @@ check_eval_breaker: DEOPT_IF(dict != NULL, LOAD_METHOD); DEOPT_IF(((PyHeapTypeObject *)self_cls)->ht_cached_keys->dk_version != cache1->dk_version_or_hint, LOAD_METHOD); STAT_INC(LOAD_METHOD, hit); - record_cache_hit(cache0); PyObject *res = cache2->obj; assert(res != NULL); assert(_PyType_HasFeature(Py_TYPE(res), Py_TPFLAGS_METHOD_DESCRIPTOR)); @@ -4552,13 +4525,11 @@ check_eval_breaker: PyObject *self = TOP(); PyTypeObject *self_cls = Py_TYPE(self); SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; _PyAttrCache *cache1 = &caches[-1].attr; _PyObjectCache *cache2 = &caches[-2].obj; DEOPT_IF(self_cls->tp_version_tag != cache1->tp_version, LOAD_METHOD); assert(self_cls->tp_dictoffset == 0); STAT_INC(LOAD_METHOD, hit); - record_cache_hit(cache0); PyObject *res = cache2->obj; assert(res != NULL); assert(_PyType_HasFeature(Py_TYPE(res), Py_TPFLAGS_METHOD_DESCRIPTOR)); @@ -4584,7 +4555,6 @@ check_eval_breaker: /* LOAD_METHOD, for class methods */ assert(cframe.use_tracing == 0); SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; _PyAttrCache *cache1 = &caches[-1].attr; _PyObjectCache *cache2 = &caches[-2].obj; @@ -4595,7 +4565,6 @@ check_eval_breaker: assert(cache1->tp_version != 0); STAT_INC(LOAD_METHOD, hit); - record_cache_hit(cache0); PyObject *res = cache2->obj; assert(res != NULL); Py_INCREF(res); @@ -4751,7 +4720,6 @@ check_eval_breaker: /* PEP 523 */ DEOPT_IF(tstate->interp->eval_frame != NULL, CALL_FUNCTION); STAT_INC(CALL_FUNCTION, hit); - record_cache_hit(cache0); InterpreterFrame *new_frame = _PyThreadState_PushFrame( tstate, PyFunction_AS_FRAME_CONSTRUCTOR(func), NULL); if (new_frame == NULL) { @@ -4783,8 +4751,6 @@ check_eval_breaker: PyObject *callable = SECOND(); DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION); DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_O, CALL_FUNCTION); - _PyAdaptiveEntry *cache0 = &GET_CACHE()[0].adaptive; - record_cache_hit(cache0); STAT_INC(CALL_FUNCTION, hit); PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable); @@ -4813,7 +4779,6 @@ check_eval_breaker: DEOPT_IF(!PyCFunction_CheckExact(callable), CALL_FUNCTION); DEOPT_IF(PyCFunction_GET_FLAGS(callable) != METH_FASTCALL, CALL_FUNCTION); - record_cache_hit(cache0); STAT_INC(CALL_FUNCTION, hit); PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable); @@ -4845,13 +4810,11 @@ check_eval_breaker: assert(cframe.use_tracing == 0); /* len(o) */ SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + assert(caches[0].adaptive.original_oparg == 1); _PyObjectCache *cache1 = &caches[-1].obj; - assert(cache0->original_oparg == 1); PyObject *callable = SECOND(); DEOPT_IF(callable != cache1->obj, CALL_FUNCTION); - record_cache_hit(cache0); STAT_INC(CALL_FUNCTION, hit); Py_ssize_t len_i = PyObject_Length(TOP()); @@ -4875,13 +4838,11 @@ check_eval_breaker: assert(cframe.use_tracing == 0); /* isinstance(o, o2) */ SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + assert(caches[0].adaptive.original_oparg == 2); _PyObjectCache *cache1 = &caches[-1].obj; - assert(cache0->original_oparg == 2); PyObject *callable = THIRD(); DEOPT_IF(callable != cache1->obj, CALL_FUNCTION); - record_cache_hit(cache0); STAT_INC(CALL_FUNCTION, hit); int retval = PyObject_IsInstance(SECOND(), TOP()); @@ -5139,8 +5100,8 @@ opname ## _miss: \ { \ STAT_INC(opname, miss); \ _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; \ - record_cache_miss(cache); \ - if (too_many_cache_misses(cache)) { \ + cache->counter--; \ + if (cache->counter == 0) { \ next_instr[-1] = _Py_MAKECODEUNIT(opname ## _ADAPTIVE, _Py_OPARG(next_instr[-1])); \ STAT_INC(opname, deopt); \ cache_backoff(cache); \ @@ -5154,10 +5115,10 @@ opname ## _miss: \ opname ## _miss: \ { \ STAT_INC(opname, miss); \ - uint8_t oparg = saturating_decrement(_Py_OPARG(next_instr[-1])); \ + uint8_t oparg = _Py_OPARG(next_instr[-1])-1; \ UPDATE_PREV_INSTR_OPARG(next_instr, oparg); \ assert(_Py_OPARG(next_instr[-1]) == oparg); \ - if (oparg == saturating_zero()) /* too many cache misses */ { \ + if (oparg == 0) /* too many cache misses */ { \ oparg = ADAPTIVE_CACHE_BACKOFF; \ next_instr[-1] = _Py_MAKECODEUNIT(opname ## _ADAPTIVE, oparg); \ STAT_INC(opname, deopt); \ diff --git a/Python/specialize.c b/Python/specialize.c index 5cc7082..1627283 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -412,6 +412,18 @@ _Py_Quicken(PyCodeObject *code) { return 0; } +static inline int +initial_counter_value(void) { + /* Starting value for the counter. + * This value needs to be not too low, otherwise + * it would cause excessive de-optimization. + * Neither should it be too high, or that would delay + * de-optimization excessively when it is needed. + * A value around 50 seems to work, and we choose a + * prime number to avoid artifacts. + */ + return 53; +} /* Common */ @@ -770,7 +782,7 @@ fail: success: STAT_INC(LOAD_ATTR, specialization_success); assert(!PyErr_Occurred()); - cache0->counter = saturating_start(); + cache0->counter = initial_counter_value(); return 0; } @@ -852,7 +864,7 @@ fail: success: STAT_INC(STORE_ATTR, specialization_success); assert(!PyErr_Occurred()); - cache0->counter = saturating_start(); + cache0->counter = initial_counter_value(); return 0; } @@ -1010,7 +1022,7 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, success: STAT_INC(LOAD_METHOD, specialization_success); assert(!PyErr_Occurred()); - cache0->counter = saturating_start(); + cache0->counter = initial_counter_value(); return 0; fail: STAT_INC(LOAD_METHOD, specialization_failure); @@ -1086,7 +1098,7 @@ fail: success: STAT_INC(LOAD_GLOBAL, specialization_success); assert(!PyErr_Occurred()); - cache0->counter = saturating_start(); + cache0->counter = initial_counter_value(); return 0; } @@ -1137,7 +1149,7 @@ _Py_Specialize_BinarySubscr( PyTypeObject *container_type = Py_TYPE(container); if (container_type == &PyList_Type) { if (PyLong_CheckExact(sub)) { - *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST_INT, saturating_start()); + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST_INT, initial_counter_value()); goto success; } SPECIALIZATION_FAIL(BINARY_SUBSCR, @@ -1146,7 +1158,7 @@ _Py_Specialize_BinarySubscr( } if (container_type == &PyTuple_Type) { if (PyLong_CheckExact(sub)) { - *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE_INT, saturating_start()); + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE_INT, initial_counter_value()); goto success; } SPECIALIZATION_FAIL(BINARY_SUBSCR, @@ -1154,7 +1166,7 @@ _Py_Specialize_BinarySubscr( goto fail; } if (container_type == &PyDict_Type) { - *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_DICT, saturating_start()); + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_DICT, initial_counter_value()); goto success; } SPECIALIZATION_FAIL(BINARY_SUBSCR, @@ -1182,19 +1194,19 @@ _Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) if (left_type == &PyUnicode_Type) { int next_opcode = _Py_OPCODE(instr[1]); if (next_opcode == STORE_FAST) { - *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE_INPLACE_FAST, saturating_start()); + *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE_INPLACE_FAST, initial_counter_value()); } else { - *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE, saturating_start()); + *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE, initial_counter_value()); } goto success; } else if (left_type == &PyLong_Type) { - *instr = _Py_MAKECODEUNIT(BINARY_ADD_INT, saturating_start()); + *instr = _Py_MAKECODEUNIT(BINARY_ADD_INT, initial_counter_value()); goto success; } else if (left_type == &PyFloat_Type) { - *instr = _Py_MAKECODEUNIT(BINARY_ADD_FLOAT, saturating_start()); + *instr = _Py_MAKECODEUNIT(BINARY_ADD_FLOAT, initial_counter_value()); goto success; } @@ -1220,11 +1232,11 @@ _Py_Specialize_BinaryMultiply(PyObject *left, PyObject *right, _Py_CODEUNIT *ins goto fail; } if (PyLong_CheckExact(left)) { - *instr = _Py_MAKECODEUNIT(BINARY_MULTIPLY_INT, saturating_start()); + *instr = _Py_MAKECODEUNIT(BINARY_MULTIPLY_INT, initial_counter_value()); goto success; } else if (PyFloat_CheckExact(left)) { - *instr = _Py_MAKECODEUNIT(BINARY_MULTIPLY_FLOAT, saturating_start()); + *instr = _Py_MAKECODEUNIT(BINARY_MULTIPLY_FLOAT, initial_counter_value()); goto success; } else { @@ -1432,7 +1444,7 @@ _Py_Specialize_CallFunction( else { STAT_INC(CALL_FUNCTION, specialization_success); assert(!PyErr_Occurred()); - cache0->counter = saturating_start(); + cache0->counter = initial_counter_value(); } return 0; } |