From eecbc7c3900a7f40d8498b151db543a202c72f74 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 14 Jun 2021 11:04:09 +0100 Subject: bpo-44338: Port LOAD_GLOBAL to PEP 659 adaptive interpreter (GH-26638) * Add specializations of LOAD_GLOBAL. * Add more stats. * Remove old opcache; it is no longer used. * Add NEWS --- Include/cpython/code.h | 14 - Include/internal/pycore_code.h | 25 +- Include/opcode.h | 3 + Lib/opcode.py | 3 + .../2021-06-10-10-06-18.bpo-44338.c4Myr4.rst | 7 + Objects/codeobject.c | 74 +---- Python/ceval.c | 314 ++++++--------------- Python/opcode_targets.h | 6 +- Python/specialize.c | 100 ++++++- 9 files changed, 209 insertions(+), 337 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-06-10-10-06-18.bpo-44338.c4Myr4.rst diff --git a/Include/cpython/code.h b/Include/cpython/code.h index f6e789d..df79ddb 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -106,20 +106,6 @@ struct PyCodeObject { interpreter. */ union _cache_or_instruction *co_quickened; - /* Per opcodes just-in-time cache - * - * To reduce cache size, we use indirect mapping from opcode index to - * cache object: - * cache = co_opcache[co_opcache_map[next_instr - first_instr] - 1] - */ - - // co_opcache_map is indexed by (next_instr - first_instr). - // * 0 means there is no cache for this opcode. - // * n > 0 means there is cache in co_opcache[n-1]. - unsigned char *co_opcache_map; - _PyOpcache *co_opcache; - int co_opcache_flag; // used to determine when create a cache. - unsigned char co_opcache_size; // length of co_opcache. }; /* Masks for co_flags above */ diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index bfc2deb..098fbe4 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -48,6 +48,11 @@ typedef struct { uint32_t dk_version_or_hint; } _PyLoadAttrCache; +typedef struct { + uint32_t module_keys_version; + uint32_t builtin_keys_version; +} _PyLoadGlobalCache; + /* Add specialized versions of entries to this union. * * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8 @@ -62,6 +67,7 @@ typedef union { _PyEntryZero zero; _PyAdaptiveEntry adaptive; _PyLoadAttrCache load_attr; + _PyLoadGlobalCache load_global; } SpecializedCacheEntry; #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT)) @@ -254,8 +260,6 @@ PyAPI_FUNC(PyCodeObject *) _PyCode_New(struct _PyCodeConstructor *); /* Private API */ -int _PyCode_InitOpcache(PyCodeObject *co); - /* Getters for internal PyCodeObject data. */ PyAPI_FUNC(PyObject *) _PyCode_GetVarnames(PyCodeObject *); PyAPI_FUNC(PyObject *) _PyCode_GetCellvars(PyCodeObject *); @@ -318,24 +322,25 @@ cache_backoff(_PyAdaptiveEntry *entry) { /* Specialization functions */ int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); +int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); #define SPECIALIZATION_STATS 0 #if SPECIALIZATION_STATS -typedef struct _specialization_stats { +typedef struct _stats { uint64_t specialization_success; uint64_t specialization_failure; - uint64_t loadattr_hit; - uint64_t loadattr_deferred; - uint64_t loadattr_miss; - uint64_t loadattr_deopt; + uint64_t hit; + uint64_t deferred; + uint64_t miss; + uint64_t deopt; } SpecializationStats; -extern SpecializationStats _specialization_stats; -#define STAT_INC(name) _specialization_stats.name++ +extern SpecializationStats _specialization_stats[256]; +#define STAT_INC(opname, name) _specialization_stats[opname].name++ void _Py_PrintSpecializationStats(void); #else -#define STAT_INC(name) ((void)0) +#define STAT_INC(opname, name) ((void)0) #endif diff --git a/Include/opcode.h b/Include/opcode.h index 8f5be99..7f8376f 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -142,6 +142,9 @@ extern "C" { #define LOAD_ATTR_WITH_HINT 14 #define LOAD_ATTR_SLOT 18 #define LOAD_ATTR_MODULE 21 +#define LOAD_GLOBAL_ADAPTIVE 36 +#define LOAD_GLOBAL_MODULE 38 +#define LOAD_GLOBAL_BUILTIN 39 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index 265759e..7e5916a 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -226,4 +226,7 @@ _specialized_instructions = [ "LOAD_ATTR_WITH_HINT", "LOAD_ATTR_SLOT", "LOAD_ATTR_MODULE", + "LOAD_GLOBAL_ADAPTIVE", + "LOAD_GLOBAL_MODULE", + "LOAD_GLOBAL_BUILTIN", ] diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-06-10-10-06-18.bpo-44338.c4Myr4.rst b/Misc/NEWS.d/next/Core and Builtins/2021-06-10-10-06-18.bpo-44338.c4Myr4.rst new file mode 100644 index 0000000..beaa3e5 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-06-10-10-06-18.bpo-44338.c4Myr4.rst @@ -0,0 +1,7 @@ +Implement adaptive specialization for LOAD_GLOBAL + +Two specialized forms of LOAD_GLOBAL are added: + +* LOAD_GLOBAL_MODULE + +* LOAD_GLOBAL_BUILTIN diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 701a37d..e054c43 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -350,10 +350,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) /* not set */ co->co_weakreflist = NULL; co->co_extra = NULL; - co->co_opcache_map = NULL; - co->co_opcache = NULL; - co->co_opcache_flag = 0; - co->co_opcache_size = 0; + co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE; co->co_quickened = NULL; } @@ -912,55 +909,6 @@ new_linesiterator(PyCodeObject *code) return li; } - -/****************** - * the opcache - ******************/ - -int -_PyCode_InitOpcache(PyCodeObject *co) -{ - Py_ssize_t co_size = PyBytes_Size(co->co_code) / sizeof(_Py_CODEUNIT); - co->co_opcache_map = (unsigned char *)PyMem_Calloc(co_size, 1); - if (co->co_opcache_map == NULL) { - return -1; - } - - const _Py_CODEUNIT *opcodes = (const _Py_CODEUNIT*)PyBytes_AS_STRING(co->co_code); - Py_ssize_t opts = 0; - - for (Py_ssize_t i = 0; i < co_size;) { - unsigned char opcode = _Py_OPCODE(opcodes[i]); - i++; // 'i' is now aligned to (next_instr - first_instr) - - // TODO: LOAD_METHOD - if (opcode == LOAD_GLOBAL || opcode == LOAD_ATTR) { - opts++; - co->co_opcache_map[i] = (unsigned char)opts; - if (opts > 254) { - break; - } - } - } - - if (opts) { - co->co_opcache = (_PyOpcache *)PyMem_Calloc(opts, sizeof(_PyOpcache)); - if (co->co_opcache == NULL) { - PyMem_Free(co->co_opcache_map); - return -1; - } - } - else { - PyMem_Free(co->co_opcache_map); - co->co_opcache_map = NULL; - co->co_opcache = NULL; - } - - co->co_opcache_size = (unsigned char)opts; - return 0; -} - - /****************** * "extra" frame eval info (see PEP 523) ******************/ @@ -1207,15 +1155,6 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount, static void code_dealloc(PyCodeObject *co) { - if (co->co_opcache != NULL) { - PyMem_Free(co->co_opcache); - } - if (co->co_opcache_map != NULL) { - PyMem_Free(co->co_opcache_map); - } - co->co_opcache_flag = 0; - co->co_opcache_size = 0; - if (co->co_extra != NULL) { PyInterpreterState *interp = _PyInterpreterState_GET(); _PyCodeObjectExtra *co_extra = co->co_extra; @@ -1442,12 +1381,11 @@ code_sizeof(PyCodeObject *co, PyObject *Py_UNUSED(args)) res += co->co_ncellvars * sizeof(Py_ssize_t); } - if (co->co_opcache != NULL) { - assert(co->co_opcache_map != NULL); - // co_opcache_map - res += PyBytes_GET_SIZE(co->co_code) / sizeof(_Py_CODEUNIT); - // co_opcache - res += co->co_opcache_size * sizeof(_PyOpcache); + if (co->co_quickened != NULL) { + Py_ssize_t count = co->co_quickened[0].entry.zero.cache_count; + count += (PyBytes_GET_SIZE(co->co_code)+sizeof(SpecializedCacheEntry)-1)/ + sizeof(SpecializedCacheEntry); + res += count * sizeof(SpecializedCacheEntry); } return PyLong_FromSsize_t(res); diff --git a/Python/ceval.c b/Python/ceval.c index c42404c..25d077c 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -13,7 +13,7 @@ #include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_call.h" // _PyObject_FastCallDictTstate() #include "pycore_ceval.h" // _PyEval_SignalAsyncExc() -#include "pycore_code.h" // _PyCode_InitOpcache() +#include "pycore_code.h" #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_object.h" // _PyObject_GC_TRACK() #include "pycore_moduleobject.h" @@ -109,7 +109,6 @@ static long dxp[256]; /* per opcode cache */ static int opcache_min_runs = 1024; /* create opcache when code executed this many times */ #define OPCODE_CACHE_MAX_TRIES 20 -#define OPCACHE_STATS 0 /* Enable stats */ // This function allows to deactivate the opcode cache. As different cache mechanisms may hold // references, this can mess with the reference leak detector functionality so the cache needs @@ -120,22 +119,6 @@ _PyEval_DeactivateOpCache(void) opcache_min_runs = 0; } -#if OPCACHE_STATS -static size_t opcache_code_objects = 0; -static size_t opcache_code_objects_extra_mem = 0; - -static size_t opcache_global_opts = 0; -static size_t opcache_global_hits = 0; -static size_t opcache_global_misses = 0; - -static size_t opcache_attr_opts = 0; -static size_t opcache_attr_hits = 0; -static size_t opcache_attr_misses = 0; -static size_t opcache_attr_deopts = 0; -static size_t opcache_attr_total = 0; -#endif - - #ifndef NDEBUG /* Ensure that tstate is valid: sanity check for PyEval_AcquireThread() and PyEval_RestoreThread(). Detect if tstate memory was freed. It can happen @@ -360,48 +343,8 @@ PyEval_InitThreads(void) void _PyEval_Fini(void) { -#if OPCACHE_STATS - fprintf(stderr, "-- Opcode cache number of objects = %zd\n", - opcache_code_objects); - - fprintf(stderr, "-- Opcode cache total extra mem = %zd\n", - opcache_code_objects_extra_mem); - - fprintf(stderr, "\n"); - - fprintf(stderr, "-- Opcode cache LOAD_GLOBAL hits = %zd (%d%%)\n", - opcache_global_hits, - (int) (100.0 * opcache_global_hits / - (opcache_global_hits + opcache_global_misses))); - - fprintf(stderr, "-- Opcode cache LOAD_GLOBAL misses = %zd (%d%%)\n", - opcache_global_misses, - (int) (100.0 * opcache_global_misses / - (opcache_global_hits + opcache_global_misses))); - - fprintf(stderr, "-- Opcode cache LOAD_GLOBAL opts = %zd\n", - opcache_global_opts); - - fprintf(stderr, "\n"); - - fprintf(stderr, "-- Opcode cache LOAD_ATTR hits = %zd (%d%%)\n", - opcache_attr_hits, - (int) (100.0 * opcache_attr_hits / - opcache_attr_total)); - - fprintf(stderr, "-- Opcode cache LOAD_ATTR misses = %zd (%d%%)\n", - opcache_attr_misses, - (int) (100.0 * opcache_attr_misses / - opcache_attr_total)); - - fprintf(stderr, "-- Opcode cache LOAD_ATTR opts = %zd\n", - opcache_attr_opts); - - fprintf(stderr, "-- Opcode cache LOAD_ATTR deopts = %zd\n", - opcache_attr_deopts); - - fprintf(stderr, "-- Opcode cache LOAD_ATTR total = %zd\n", - opcache_attr_total); +#if SPECIALIZATION_STATS + _Py_PrintSpecializationStats(); #endif } @@ -1448,108 +1391,11 @@ eval_frame_handle_pending(PyThreadState *tstate) GETLOCAL(i) = value; \ Py_XDECREF(tmp); } while (0) - /* macros for opcode cache */ -#define OPCACHE_CHECK() \ - do { \ - co_opcache = NULL; \ - if (co->co_opcache != NULL) { \ - unsigned char co_opcache_offset = \ - co->co_opcache_map[next_instr - first_instr]; \ - if (co_opcache_offset > 0) { \ - assert(co_opcache_offset <= co->co_opcache_size); \ - co_opcache = &co->co_opcache[co_opcache_offset - 1]; \ - assert(co_opcache != NULL); \ - } \ - } \ - } while (0) - -#define OPCACHE_DEOPT() \ - do { \ - if (co_opcache != NULL) { \ - co_opcache->optimized = -1; \ - unsigned char co_opcache_offset = \ - co->co_opcache_map[next_instr - first_instr]; \ - assert(co_opcache_offset <= co->co_opcache_size); \ - co->co_opcache_map[co_opcache_offset] = 0; \ - co_opcache = NULL; \ - } \ - } while (0) - -#define OPCACHE_DEOPT_LOAD_ATTR() \ - do { \ - if (co_opcache != NULL) { \ - OPCACHE_STAT_ATTR_DEOPT(); \ - OPCACHE_DEOPT(); \ - } \ - } while (0) - -#define OPCACHE_MAYBE_DEOPT_LOAD_ATTR() \ - do { \ - if (co_opcache != NULL && --co_opcache->optimized <= 0) { \ - OPCACHE_DEOPT_LOAD_ATTR(); \ - } \ - } while (0) - -#if OPCACHE_STATS - -#define OPCACHE_STAT_GLOBAL_HIT() \ - do { \ - if (co->co_opcache != NULL) opcache_global_hits++; \ - } while (0) - -#define OPCACHE_STAT_GLOBAL_MISS() \ - do { \ - if (co->co_opcache != NULL) opcache_global_misses++; \ - } while (0) - -#define OPCACHE_STAT_GLOBAL_OPT() \ - do { \ - if (co->co_opcache != NULL) opcache_global_opts++; \ - } while (0) - -#define OPCACHE_STAT_ATTR_HIT() \ - do { \ - if (co->co_opcache != NULL) opcache_attr_hits++; \ - } while (0) - -#define OPCACHE_STAT_ATTR_MISS() \ - do { \ - if (co->co_opcache != NULL) opcache_attr_misses++; \ - } while (0) - -#define OPCACHE_STAT_ATTR_OPT() \ - do { \ - if (co->co_opcache!= NULL) opcache_attr_opts++; \ - } while (0) - -#define OPCACHE_STAT_ATTR_DEOPT() \ - do { \ - if (co->co_opcache != NULL) opcache_attr_deopts++; \ - } while (0) - -#define OPCACHE_STAT_ATTR_TOTAL() \ - do { \ - if (co->co_opcache != NULL) opcache_attr_total++; \ - } while (0) - -#else /* OPCACHE_STATS */ - -#define OPCACHE_STAT_GLOBAL_HIT() -#define OPCACHE_STAT_GLOBAL_MISS() -#define OPCACHE_STAT_GLOBAL_OPT() - -#define OPCACHE_STAT_ATTR_HIT() -#define OPCACHE_STAT_ATTR_MISS() -#define OPCACHE_STAT_ATTR_OPT() -#define OPCACHE_STAT_ATTR_DEOPT() -#define OPCACHE_STAT_ATTR_TOTAL() - #define JUMP_TO_INSTRUCTION(op) goto PREDICT_ID(op) #define GET_CACHE() \ _GetSpecializedCacheEntryForInstruction(first_instr, INSTR_OFFSET(), oparg) -#endif #define DEOPT_IF(cond, instname) if (cond) { goto instname ## _miss; } @@ -1582,7 +1428,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) _Py_CODEUNIT *first_instr; PyObject *names; PyObject *consts; - _PyOpcache *co_opcache; #ifdef LLTRACE _Py_IDENTIFIER(__ltrace__); @@ -1690,21 +1535,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) f->f_stackdepth = -1; f->f_state = FRAME_EXECUTING; - if (co->co_opcache_flag < opcache_min_runs) { - co->co_opcache_flag++; - if (co->co_opcache_flag == opcache_min_runs) { - if (_PyCode_InitOpcache(co) < 0) { - goto exit_eval_frame; - } -#if OPCACHE_STATS - opcache_code_objects_extra_mem += - PyBytes_Size(co->co_code) / sizeof(_Py_CODEUNIT) + - sizeof(_PyOpcache) * co->co_opcache_size; - opcache_code_objects++; -#endif - } - } - #ifdef LLTRACE { int r = _PyDict_ContainsId(GLOBALS(), &PyId___ltrace__); @@ -2974,30 +2804,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } case TARGET(LOAD_GLOBAL): { - PyObject *name; + PREDICTED(LOAD_GLOBAL); + PyObject *name = GETITEM(names, oparg); PyObject *v; if (PyDict_CheckExact(GLOBALS()) && PyDict_CheckExact(BUILTINS())) { - OPCACHE_CHECK(); - if (co_opcache != NULL && co_opcache->optimized > 0) { - _PyOpcache_LoadGlobal *lg = &co_opcache->u.lg; - - if (lg->globals_ver == - ((PyDictObject *)GLOBALS())->ma_version_tag - && lg->builtins_ver == - ((PyDictObject *)BUILTINS())->ma_version_tag) - { - PyObject *ptr = lg->ptr; - OPCACHE_STAT_GLOBAL_HIT(); - assert(ptr != NULL); - Py_INCREF(ptr); - PUSH(ptr); - DISPATCH(); - } - } - - name = GETITEM(names, oparg); v = _PyDict_LoadGlobal((PyDictObject *)GLOBALS(), (PyDictObject *)BUILTINS(), name); @@ -3010,25 +2822,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } goto error; } - - if (co_opcache != NULL) { - _PyOpcache_LoadGlobal *lg = &co_opcache->u.lg; - - if (co_opcache->optimized == 0) { - /* Wasn't optimized before. */ - OPCACHE_STAT_GLOBAL_OPT(); - } else { - OPCACHE_STAT_GLOBAL_MISS(); - } - - co_opcache->optimized = 1; - lg->globals_ver = - ((PyDictObject *)GLOBALS())->ma_version_tag; - lg->builtins_ver = - ((PyDictObject *)BUILTINS())->ma_version_tag; - lg->ptr = v; /* borrowed */ - } - Py_INCREF(v); } else { @@ -3059,6 +2852,61 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DISPATCH(); } + case TARGET(LOAD_GLOBAL_ADAPTIVE): { + SpecializedCacheEntry *cache = GET_CACHE(); + if (cache->adaptive.counter == 0) { + PyObject *name = GETITEM(names, cache->adaptive.original_oparg); + next_instr--; + if (_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name, cache) < 0) { + goto error; + } + DISPATCH(); + } + else { + STAT_INC(LOAD_GLOBAL, deferred); + cache->adaptive.counter--; + oparg = cache->adaptive.original_oparg; + JUMP_TO_INSTRUCTION(LOAD_GLOBAL); + } + } + + case TARGET(LOAD_GLOBAL_MODULE): { + DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL); + PyDictObject *dict = (PyDictObject *)GLOBALS(); + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + _PyLoadGlobalCache *cache1 = &caches[-1].load_global; + DEOPT_IF(dict->ma_keys->dk_version != cache1->module_keys_version, LOAD_GLOBAL); + PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index; + PyObject *res = ep->me_value; + DEOPT_IF(res == NULL, LOAD_GLOBAL); + record_cache_hit(cache0); + STAT_INC(LOAD_GLOBAL, hit); + Py_INCREF(res); + PUSH(res); + DISPATCH(); + } + + case TARGET(LOAD_GLOBAL_BUILTIN): { + DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL); + DEOPT_IF(!PyDict_CheckExact(BUILTINS()), LOAD_GLOBAL); + PyDictObject *mdict = (PyDictObject *)GLOBALS(); + PyDictObject *bdict = (PyDictObject *)BUILTINS(); + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + _PyLoadGlobalCache *cache1 = &caches[-1].load_global; + DEOPT_IF(mdict->ma_keys->dk_version != cache1->module_keys_version, LOAD_GLOBAL); + DEOPT_IF(bdict->ma_keys->dk_version != cache1->builtin_keys_version, LOAD_GLOBAL); + PyDictKeyEntry *ep = DK_ENTRIES(bdict->ma_keys) + cache0->index; + PyObject *res = ep->me_value; + DEOPT_IF(res == NULL, LOAD_GLOBAL); + record_cache_hit(cache0); + STAT_INC(LOAD_GLOBAL, hit); + Py_INCREF(res); + PUSH(res); + DISPATCH(); + } + case TARGET(DELETE_FAST): { PyObject *v = GETLOCAL(oparg); if (v != NULL) { @@ -3464,7 +3312,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DISPATCH(); } else { - STAT_INC(loadattr_deferred); + STAT_INC(LOAD_ATTR, deferred); cache->adaptive.counter--; oparg = cache->adaptive.original_oparg; JUMP_TO_INSTRUCTION(LOAD_ATTR); @@ -3487,9 +3335,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version_or_hint, LOAD_ATTR); res = dict->ma_values[cache0->index]; DEOPT_IF(res == NULL, LOAD_ATTR); - STAT_INC(loadattr_hit); + STAT_INC(LOAD_ATTR, hit); record_cache_hit(cache0); - STAT_INC(loadattr_hit); + STAT_INC(LOAD_ATTR, hit); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); @@ -3510,7 +3358,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index; res = ep->me_value; DEOPT_IF(res == NULL, LOAD_ATTR); - STAT_INC(loadattr_hit); + STAT_INC(LOAD_ATTR, hit); record_cache_hit(cache0); Py_INCREF(res); SET_TOP(res); @@ -3538,7 +3386,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(ep->me_key != name, LOAD_ATTR); res = ep->me_value; DEOPT_IF(res == NULL, LOAD_ATTR); - STAT_INC(loadattr_hit); + STAT_INC(LOAD_ATTR, hit); record_cache_hit(cache0); Py_INCREF(res); SET_TOP(res); @@ -3558,7 +3406,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) char *addr = (char *)owner + cache0->index; res = *(PyObject **)addr; DEOPT_IF(res == NULL, LOAD_ATTR); - STAT_INC(loadattr_hit); + STAT_INC(LOAD_ATTR, hit); record_cache_hit(cache0); Py_INCREF(res); SET_TOP(res); @@ -4445,22 +4293,26 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) or goto error. */ Py_UNREACHABLE(); -/* Cache misses */ +/* Specialization misses */ -LOAD_ATTR_miss: - { - STAT_INC(loadattr_miss); - _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; - record_cache_miss(cache); - if (too_many_cache_misses(cache)) { - next_instr[-1] = _Py_MAKECODEUNIT(LOAD_ATTR_ADAPTIVE, _Py_OPARG(next_instr[-1])); - STAT_INC(loadattr_deopt); - cache_backoff(cache); - } - oparg = cache->original_oparg; - JUMP_TO_INSTRUCTION(LOAD_ATTR); +#define MISS_WITH_CACHE(opname) \ +opname ## _miss: \ + { \ + STAT_INC(opname, miss); \ + _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; \ + record_cache_miss(cache); \ + if (too_many_cache_misses(cache)) { \ + next_instr[-1] = _Py_MAKECODEUNIT(opname ## _ADAPTIVE, _Py_OPARG(next_instr[-1])); \ + STAT_INC(opname, deopt); \ + cache_backoff(cache); \ + } \ + oparg = cache->original_oparg; \ + JUMP_TO_INSTRUCTION(opname); \ } +MISS_WITH_CACHE(LOAD_ATTR) +MISS_WITH_CACHE(LOAD_GLOBAL) + error: /* Double-check exception status. */ #ifdef NDEBUG diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 47beee7..ecc95da 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -35,10 +35,10 @@ static void *opcode_targets[256] = { &&TARGET_MATCH_KEYS, &&TARGET_COPY_DICT_WITHOUT_KEYS, &&TARGET_PUSH_EXC_INFO, - &&_unknown_opcode, + &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_POP_EXCEPT_AND_RERAISE, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_LOAD_GLOBAL_MODULE, + &&TARGET_LOAD_GLOBAL_BUILTIN, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index d82122d..d98433b 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -33,18 +33,27 @@ Py_ssize_t _Py_QuickenedCount = 0; #if SPECIALIZATION_STATS -SpecializationStats _specialization_stats = { 0 }; +SpecializationStats _specialization_stats[256] = { 0 }; + +#define PRINT_STAT(name, field) fprintf(stderr, " %s." #field " : %" PRIu64 "\n", name, stats->field); + +static void +print_stats(SpecializationStats *stats, const char *name) +{ + PRINT_STAT(name, specialization_success); + PRINT_STAT(name, specialization_failure); + PRINT_STAT(name, hit); + PRINT_STAT(name, deferred); + PRINT_STAT(name, miss); + PRINT_STAT(name, deopt); +} -#define PRINT_STAT(name) fprintf(stderr, #name " : %" PRIu64" \n", _specialization_stats.name); void _Py_PrintSpecializationStats(void) { - PRINT_STAT(specialization_success); - PRINT_STAT(specialization_failure); - PRINT_STAT(loadattr_hit); - PRINT_STAT(loadattr_deferred); - PRINT_STAT(loadattr_miss); - PRINT_STAT(loadattr_deopt); + printf("Specialization stats:\n"); + print_stats(&_specialization_stats[LOAD_ATTR], "load_attr"); + print_stats(&_specialization_stats[LOAD_GLOBAL], "load_global"); } #endif @@ -77,11 +86,13 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) { Values of zero are ignored. */ static uint8_t adaptive_opcodes[256] = { [LOAD_ATTR] = LOAD_ATTR_ADAPTIVE, + [LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE, }; /* The number of cache entries required for a "family" of instructions. */ static uint8_t cache_requirements[256] = { - [LOAD_ATTR] = 2, + [LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyLoadAttrCache */ + [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */ }; /* Return the oparg for the cache_offset and instruction index. @@ -357,14 +368,81 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp } fail: - STAT_INC(specialization_failure); + STAT_INC(LOAD_ATTR, specialization_failure); assert(!PyErr_Occurred()); cache_backoff(cache0); return 0; success: - STAT_INC(specialization_success); + STAT_INC(LOAD_ATTR, specialization_success); assert(!PyErr_Occurred()); cache0->counter = saturating_start(); return 0; } + +int +_Py_Specialize_LoadGlobal( + PyObject *globals, PyObject *builtins, + _Py_CODEUNIT *instr, PyObject *name, + SpecializedCacheEntry *cache) +{ + _PyAdaptiveEntry *cache0 = &cache->adaptive; + _PyLoadGlobalCache *cache1 = &cache[-1].load_global; + assert(PyUnicode_CheckExact(name)); + if (!PyDict_CheckExact(globals)) { + goto fail; + } + if (((PyDictObject *)globals)->ma_keys->dk_kind != DICT_KEYS_UNICODE) { + goto fail; + } + PyObject *value = NULL; + Py_ssize_t index = _PyDict_GetItemHint((PyDictObject *)globals, name, -1, &value); + assert (index != DKIX_ERROR); + if (index != DKIX_EMPTY) { + if (index != (uint16_t)index) { + goto fail; + } + uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals); + if (keys_version == 0) { + goto fail; + } + cache1->module_keys_version = keys_version; + cache0->index = (uint16_t)index; + *instr = _Py_MAKECODEUNIT(LOAD_GLOBAL_MODULE, _Py_OPARG(*instr)); + goto success; + } + if (!PyDict_CheckExact(builtins)) { + goto fail; + } + if (((PyDictObject *)builtins)->ma_keys->dk_kind != DICT_KEYS_UNICODE) { + goto fail; + } + index = _PyDict_GetItemHint((PyDictObject *)builtins, name, -1, &value); + assert (index != DKIX_ERROR); + if (index != (uint16_t)index) { + goto fail; + } + uint32_t globals_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals); + if (globals_version == 0) { + goto fail; + } + uint32_t builtins_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)builtins); + if (builtins_version == 0) { + goto fail; + } + cache1->module_keys_version = globals_version; + cache1->builtin_keys_version = builtins_version; + cache0->index = (uint16_t)index; + *instr = _Py_MAKECODEUNIT(LOAD_GLOBAL_BUILTIN, _Py_OPARG(*instr)); + goto success; +fail: + STAT_INC(LOAD_GLOBAL, specialization_failure); + assert(!PyErr_Occurred()); + cache_backoff(cache0); + return 0; +success: + STAT_INC(LOAD_GLOBAL, specialization_success); + assert(!PyErr_Occurred()); + cache0->counter = saturating_start(); + return 0; +} -- cgit v0.12