bpo-44338: Port LOAD_GLOBAL to PEP 659 adaptive interpreter (GH-26638)

* Add specializations of LOAD_GLOBAL. * Add more stats. * Remove old opcache; it is no longer used. * Add NEWS
author: Mark Shannon <mark@hotpy.org> 2021-06-14 10:04:09 (GMT)
committer: GitHub <noreply@github.com> 2021-06-14 10:04:09 (GMT)
commit: eecbc7c3900a7f40d8498b151db543a202c72f74 (patch)
tree: 6be5d67366f8df3e24c3dbed0786ec3c4a29bf1b
parent: fafcfff9262ae9dee03a00006638dfcbcfc23a7b (diff)
download: cpython-eecbc7c3900a7f40d8498b151db543a202c72f74.zip
cpython-eecbc7c3900a7f40d8498b151db543a202c72f74.tar.gz
cpython-eecbc7c3900a7f40d8498b151db543a202c72f74.tar.bz2
9 files changed, 209 insertions, 337 deletions
diff --git a/Include/cpython/code.h b/Include/cpython/code.h
index f6e789d..df79ddb 100644
--- a/Include/cpython/code.h
+++ b/Include/cpython/code.h
@@ -106,20 +106,6 @@ struct PyCodeObject {
      interpreter. */
     union _cache_or_instruction *co_quickened;
 
-    /* Per opcodes just-in-time cache
-     *
-     * To reduce cache size, we use indirect mapping from opcode index to
-     * cache object:
-     *   cache = co_opcache[co_opcache_map[next_instr - first_instr] - 1]
-     */
-
-    // co_opcache_map is indexed by (next_instr - first_instr).
-    //  * 0 means there is no cache for this opcode.
-    //  * n > 0 means there is cache in co_opcache[n-1].
-    unsigned char *co_opcache_map;
-    _PyOpcache *co_opcache;
-    int co_opcache_flag;  // used to determine when create a cache.
-    unsigned char co_opcache_size;  // length of co_opcache.
 };
 
 /* Masks for co_flags above */
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index bfc2deb..098fbe4 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -48,6 +48,11 @@ typedef struct {
     uint32_t dk_version_or_hint;
 } _PyLoadAttrCache;
 
+typedef struct {
+    uint32_t module_keys_version;
+    uint32_t builtin_keys_version;
+} _PyLoadGlobalCache;
+
 /* Add specialized versions of entries to this union.
  *
  * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
@@ -62,6 +67,7 @@ typedef union {
     _PyEntryZero zero;
     _PyAdaptiveEntry adaptive;
     _PyLoadAttrCache load_attr;
+    _PyLoadGlobalCache load_global;
 } SpecializedCacheEntry;
 
 #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
@@ -254,8 +260,6 @@ PyAPI_FUNC(PyCodeObject *) _PyCode_New(struct _PyCodeConstructor *);
 
 /* Private API */
 
-int _PyCode_InitOpcache(PyCodeObject *co);
-
 /* Getters for internal PyCodeObject data. */
 PyAPI_FUNC(PyObject *) _PyCode_GetVarnames(PyCodeObject *);
 PyAPI_FUNC(PyObject *) _PyCode_GetCellvars(PyCodeObject *);
@@ -318,24 +322,25 @@ cache_backoff(_PyAdaptiveEntry *entry) {
 /* Specialization functions */
 
 int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
+int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
 
 #define SPECIALIZATION_STATS 0
 #if SPECIALIZATION_STATS
 
-typedef struct _specialization_stats {
+typedef struct _stats {
     uint64_t specialization_success;
     uint64_t specialization_failure;
-    uint64_t loadattr_hit;
-    uint64_t loadattr_deferred;
-    uint64_t loadattr_miss;
-    uint64_t loadattr_deopt;
+    uint64_t hit;
+    uint64_t deferred;
+    uint64_t miss;
+    uint64_t deopt;
 } SpecializationStats;
 
-extern SpecializationStats _specialization_stats;
-#define STAT_INC(name) _specialization_stats.name++
+extern SpecializationStats _specialization_stats[256];
+#define STAT_INC(opname, name) _specialization_stats[opname].name++
 void _Py_PrintSpecializationStats(void);
 #else
-#define STAT_INC(name) ((void)0)
+#define STAT_INC(opname, name) ((void)0)
 #endif
 
 
diff --git a/Include/opcode.h b/Include/opcode.h
index 8f5be99..7f8376f 100644
--- a/Include/opcode.h
+++ b/Include/opcode.h
@@ -142,6 +142,9 @@ extern "C" {
 #define LOAD_ATTR_WITH_HINT      14
 #define LOAD_ATTR_SLOT           18
 #define LOAD_ATTR_MODULE         21
+#define LOAD_GLOBAL_ADAPTIVE     36
+#define LOAD_GLOBAL_MODULE       38
+#define LOAD_GLOBAL_BUILTIN      39
 #ifdef NEED_OPCODE_JUMP_TABLES
 static uint32_t _PyOpcode_RelativeJump[8] = {
     0U,
diff --git a/Lib/opcode.py b/Lib/opcode.py
index 265759e..7e5916a 100644
--- a/Lib/opcode.py
+++ b/Lib/opcode.py
@@ -226,4 +226,7 @@ _specialized_instructions = [
     "LOAD_ATTR_WITH_HINT",
     "LOAD_ATTR_SLOT",
     "LOAD_ATTR_MODULE",
+    "LOAD_GLOBAL_ADAPTIVE",
+    "LOAD_GLOBAL_MODULE",
+    "LOAD_GLOBAL_BUILTIN",
 ]
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-06-10-10-06-18.bpo-44338.c4Myr4.rst b/Misc/NEWS.d/next/Core and Builtins/2021-06-10-10-06-18.bpo-44338.c4Myr4.rst
new file mode 100644
index 0000000..beaa3e5
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-06-10-10-06-18.bpo-44338.c4Myr4.rst
@@ -0,0 +1,7 @@
+Implement adaptive specialization for LOAD_GLOBAL
+
+Two specialized forms of LOAD_GLOBAL are added:
+
+* LOAD_GLOBAL_MODULE
+
+* LOAD_GLOBAL_BUILTIN
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index 701a37d..e054c43 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -350,10 +350,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
     /* not set */
     co->co_weakreflist = NULL;
     co->co_extra = NULL;
-    co->co_opcache_map = NULL;
-    co->co_opcache = NULL;
-    co->co_opcache_flag = 0;
-    co->co_opcache_size = 0;
+
     co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE;
     co->co_quickened = NULL;
 }
@@ -912,55 +909,6 @@ new_linesiterator(PyCodeObject *code)
     return li;
 }
 
-
-/******************
- * the opcache
- ******************/
-
-int
-_PyCode_InitOpcache(PyCodeObject *co)
-{
-    Py_ssize_t co_size = PyBytes_Size(co->co_code) / sizeof(_Py_CODEUNIT);
-    co->co_opcache_map = (unsigned char *)PyMem_Calloc(co_size, 1);
-    if (co->co_opcache_map == NULL) {
-        return -1;
-    }
-
-    const _Py_CODEUNIT *opcodes = (const _Py_CODEUNIT*)PyBytes_AS_STRING(co->co_code);
-    Py_ssize_t opts = 0;
-
-    for (Py_ssize_t i = 0; i < co_size;) {
-        unsigned char opcode = _Py_OPCODE(opcodes[i]);
-        i++;  // 'i' is now aligned to (next_instr - first_instr)
-
-        // TODO: LOAD_METHOD
-        if (opcode == LOAD_GLOBAL || opcode == LOAD_ATTR) {
-            opts++;
-            co->co_opcache_map[i] = (unsigned char)opts;
-            if (opts > 254) {
-                break;
-            }
-        }
-    }
-
-    if (opts) {
-        co->co_opcache = (_PyOpcache *)PyMem_Calloc(opts, sizeof(_PyOpcache));
-        if (co->co_opcache == NULL) {
-            PyMem_Free(co->co_opcache_map);
-            return -1;
-        }
-    }
-    else {
-        PyMem_Free(co->co_opcache_map);
-        co->co_opcache_map = NULL;
-        co->co_opcache = NULL;
-    }
-
-    co->co_opcache_size = (unsigned char)opts;
-    return 0;
-}
-
-
 /******************
  * "extra" frame eval info (see PEP 523)
  ******************/
@@ -1207,15 +1155,6 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount,
 static void
 code_dealloc(PyCodeObject *co)
 {
-    if (co->co_opcache != NULL) {
-        PyMem_Free(co->co_opcache);
-    }
-    if (co->co_opcache_map != NULL) {
-        PyMem_Free(co->co_opcache_map);
-    }
-    co->co_opcache_flag = 0;
-    co->co_opcache_size = 0;
-
     if (co->co_extra != NULL) {
         PyInterpreterState *interp = _PyInterpreterState_GET();
         _PyCodeObjectExtra *co_extra = co->co_extra;
@@ -1442,12 +1381,11 @@ code_sizeof(PyCodeObject *co, PyObject *Py_UNUSED(args))
         res += co->co_ncellvars * sizeof(Py_ssize_t);
     }
 
-    if (co->co_opcache != NULL) {
-        assert(co->co_opcache_map != NULL);
-        // co_opcache_map
-        res += PyBytes_GET_SIZE(co->co_code) / sizeof(_Py_CODEUNIT);
-        // co_opcache
-        res += co->co_opcache_size * sizeof(_PyOpcache);
+    if (co->co_quickened != NULL) {
+        Py_ssize_t count = co->co_quickened[0].entry.zero.cache_count;
+        count += (PyBytes_GET_SIZE(co->co_code)+sizeof(SpecializedCacheEntry)-1)/
+            sizeof(SpecializedCacheEntry);
+        res += count * sizeof(SpecializedCacheEntry);
     }
 
     return PyLong_FromSsize_t(res);
diff --git a/Python/ceval.c b/Python/ceval.c
index c42404c..25d077c 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -13,7 +13,7 @@
 #include "pycore_abstract.h"      // _PyIndex_Check()
 #include "pycore_call.h"          // _PyObject_FastCallDictTstate()
 #include "pycore_ceval.h"         // _PyEval_SignalAsyncExc()
-#include "pycore_code.h"          // _PyCode_InitOpcache()
+#include "pycore_code.h"
 #include "pycore_initconfig.h"    // _PyStatus_OK()
 #include "pycore_object.h"        // _PyObject_GC_TRACK()
 #include "pycore_moduleobject.h"
@@ -109,7 +109,6 @@ static long dxp[256];
 /* per opcode cache */
 static int opcache_min_runs = 1024;  /* create opcache when code executed this many times */
 #define OPCODE_CACHE_MAX_TRIES 20
-#define OPCACHE_STATS 0  /* Enable stats */
 
 // This function allows to deactivate the opcode cache. As different cache mechanisms may hold
 // references, this can mess with the reference leak detector functionality so the cache needs
@@ -120,22 +119,6 @@ _PyEval_DeactivateOpCache(void)
     opcache_min_runs = 0;
 }
 
-#if OPCACHE_STATS
-static size_t opcache_code_objects = 0;
-static size_t opcache_code_objects_extra_mem = 0;
-
-static size_t opcache_global_opts = 0;
-static size_t opcache_global_hits = 0;
-static size_t opcache_global_misses = 0;
-
-static size_t opcache_attr_opts = 0;
-static size_t opcache_attr_hits = 0;
-static size_t opcache_attr_misses = 0;
-static size_t opcache_attr_deopts = 0;
-static size_t opcache_attr_total = 0;
-#endif
-
-
 #ifndef NDEBUG
 /* Ensure that tstate is valid: sanity check for PyEval_AcquireThread() and
    PyEval_RestoreThread(). Detect if tstate memory was freed. It can happen
@@ -360,48 +343,8 @@ PyEval_InitThreads(void)
 void
 _PyEval_Fini(void)
 {
-#if OPCACHE_STATS
-    fprintf(stderr, "-- Opcode cache number of objects  = %zd\n",
-            opcache_code_objects);
-
-    fprintf(stderr, "-- Opcode cache total extra mem    = %zd\n",
-            opcache_code_objects_extra_mem);
-
-    fprintf(stderr, "\n");
-
-    fprintf(stderr, "-- Opcode cache LOAD_GLOBAL hits   = %zd (%d%%)\n",
-            opcache_global_hits,
-            (int) (100.0 * opcache_global_hits /
-                (opcache_global_hits + opcache_global_misses)));
-
-    fprintf(stderr, "-- Opcode cache LOAD_GLOBAL misses = %zd (%d%%)\n",
-            opcache_global_misses,
-            (int) (100.0 * opcache_global_misses /
-                (opcache_global_hits + opcache_global_misses)));
-
-    fprintf(stderr, "-- Opcode cache LOAD_GLOBAL opts   = %zd\n",
-            opcache_global_opts);
-
-    fprintf(stderr, "\n");
-
-    fprintf(stderr, "-- Opcode cache LOAD_ATTR hits     = %zd (%d%%)\n",
-            opcache_attr_hits,
-            (int) (100.0 * opcache_attr_hits /
-                opcache_attr_total));
-
-    fprintf(stderr, "-- Opcode cache LOAD_ATTR misses   = %zd (%d%%)\n",
-            opcache_attr_misses,
-            (int) (100.0 * opcache_attr_misses /
-                opcache_attr_total));
-
-    fprintf(stderr, "-- Opcode cache LOAD_ATTR opts     = %zd\n",
-            opcache_attr_opts);
-
-    fprintf(stderr, "-- Opcode cache LOAD_ATTR deopts   = %zd\n",
-            opcache_attr_deopts);
-
-    fprintf(stderr, "-- Opcode cache LOAD_ATTR total    = %zd\n",
-            opcache_attr_total);
+#if SPECIALIZATION_STATS
+    _Py_PrintSpecializationStats();
 #endif
 }
 
@@ -1448,108 +1391,11 @@ eval_frame_handle_pending(PyThreadState *tstate)
                                      GETLOCAL(i) = value; \
                                      Py_XDECREF(tmp); } while (0)
 
-    /* macros for opcode cache */
-#define OPCACHE_CHECK() \
-    do { \
-        co_opcache = NULL; \
-        if (co->co_opcache != NULL) { \
-            unsigned char co_opcache_offset = \
-                co->co_opcache_map[next_instr - first_instr]; \
-            if (co_opcache_offset > 0) { \
-                assert(co_opcache_offset <= co->co_opcache_size); \
-                co_opcache = &co->co_opcache[co_opcache_offset - 1]; \
-                assert(co_opcache != NULL); \
-            } \
-        } \
-    } while (0)
-
-#define OPCACHE_DEOPT() \
-    do { \
-        if (co_opcache != NULL) { \
-            co_opcache->optimized = -1; \
-            unsigned char co_opcache_offset = \
-                co->co_opcache_map[next_instr - first_instr]; \
-            assert(co_opcache_offset <= co->co_opcache_size); \
-            co->co_opcache_map[co_opcache_offset] = 0; \
-            co_opcache = NULL; \
-        } \
-    } while (0)
-
-#define OPCACHE_DEOPT_LOAD_ATTR() \
-    do { \
-        if (co_opcache != NULL) { \
-            OPCACHE_STAT_ATTR_DEOPT(); \
-            OPCACHE_DEOPT(); \
-        } \
-    } while (0)
-
-#define OPCACHE_MAYBE_DEOPT_LOAD_ATTR() \
-    do { \
-        if (co_opcache != NULL && --co_opcache->optimized <= 0) { \
-            OPCACHE_DEOPT_LOAD_ATTR(); \
-        } \
-    } while (0)
-
-#if OPCACHE_STATS
-
-#define OPCACHE_STAT_GLOBAL_HIT() \
-    do { \
-        if (co->co_opcache != NULL) opcache_global_hits++; \
-    } while (0)
-
-#define OPCACHE_STAT_GLOBAL_MISS() \
-    do { \
-        if (co->co_opcache != NULL) opcache_global_misses++; \
-    } while (0)
-
-#define OPCACHE_STAT_GLOBAL_OPT() \
-    do { \
-        if (co->co_opcache != NULL) opcache_global_opts++; \
-    } while (0)
-
-#define OPCACHE_STAT_ATTR_HIT() \
-    do { \
-        if (co->co_opcache != NULL) opcache_attr_hits++; \
-    } while (0)
-
-#define OPCACHE_STAT_ATTR_MISS() \
-    do { \
-        if (co->co_opcache != NULL) opcache_attr_misses++; \
-    } while (0)
-
-#define OPCACHE_STAT_ATTR_OPT() \
-    do { \
-        if (co->co_opcache!= NULL) opcache_attr_opts++; \
-    } while (0)
-
-#define OPCACHE_STAT_ATTR_DEOPT() \
-    do { \
-        if (co->co_opcache != NULL) opcache_attr_deopts++; \
-    } while (0)
-
-#define OPCACHE_STAT_ATTR_TOTAL() \
-    do { \
-        if (co->co_opcache != NULL) opcache_attr_total++; \
-    } while (0)
-
-#else /* OPCACHE_STATS */
-
-#define OPCACHE_STAT_GLOBAL_HIT()
-#define OPCACHE_STAT_GLOBAL_MISS()
-#define OPCACHE_STAT_GLOBAL_OPT()
-
-#define OPCACHE_STAT_ATTR_HIT()
-#define OPCACHE_STAT_ATTR_MISS()
-#define OPCACHE_STAT_ATTR_OPT()
-#define OPCACHE_STAT_ATTR_DEOPT()
-#define OPCACHE_STAT_ATTR_TOTAL()
-
 #define JUMP_TO_INSTRUCTION(op) goto PREDICT_ID(op)
 
 #define GET_CACHE() \
     _GetSpecializedCacheEntryForInstruction(first_instr, INSTR_OFFSET(), oparg)
 
-#endif
 
 #define DEOPT_IF(cond, instname) if (cond) { goto instname ## _miss; }
 
@@ -1582,7 +1428,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
     _Py_CODEUNIT *first_instr;
     PyObject *names;
     PyObject *consts;
-    _PyOpcache *co_opcache;
 
 #ifdef LLTRACE
     _Py_IDENTIFIER(__ltrace__);
@@ -1690,21 +1535,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
     f->f_stackdepth = -1;
     f->f_state = FRAME_EXECUTING;
 
-    if (co->co_opcache_flag < opcache_min_runs) {
-        co->co_opcache_flag++;
-        if (co->co_opcache_flag == opcache_min_runs) {
-            if (_PyCode_InitOpcache(co) < 0) {
-                goto exit_eval_frame;
-            }
-#if OPCACHE_STATS
-            opcache_code_objects_extra_mem +=
-                PyBytes_Size(co->co_code) / sizeof(_Py_CODEUNIT) +
-                sizeof(_PyOpcache) * co->co_opcache_size;
-            opcache_code_objects++;
-#endif
-        }
-    }
-
 #ifdef LLTRACE
     {
         int r = _PyDict_ContainsId(GLOBALS(), &PyId___ltrace__);
@@ -2974,30 +2804,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
         }
 
         case TARGET(LOAD_GLOBAL): {
-            PyObject *name;
+            PREDICTED(LOAD_GLOBAL);
+            PyObject *name = GETITEM(names, oparg);
             PyObject *v;
             if (PyDict_CheckExact(GLOBALS())
                 && PyDict_CheckExact(BUILTINS()))
             {
-                OPCACHE_CHECK();
-                if (co_opcache != NULL && co_opcache->optimized > 0) {
-                    _PyOpcache_LoadGlobal *lg = &co_opcache->u.lg;
-
-                    if (lg->globals_ver ==
-                            ((PyDictObject *)GLOBALS())->ma_version_tag
-                        && lg->builtins_ver ==
-                           ((PyDictObject *)BUILTINS())->ma_version_tag)
-                    {
-                        PyObject *ptr = lg->ptr;
-                        OPCACHE_STAT_GLOBAL_HIT();
-                        assert(ptr != NULL);
-                        Py_INCREF(ptr);
-                        PUSH(ptr);
-                        DISPATCH();
-                    }
-                }
-
-                name = GETITEM(names, oparg);
                 v = _PyDict_LoadGlobal((PyDictObject *)GLOBALS(),
                                        (PyDictObject *)BUILTINS(),
                                        name);
@@ -3010,25 +2822,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
                     }
                     goto error;
                 }
-
-                if (co_opcache != NULL) {
-                    _PyOpcache_LoadGlobal *lg = &co_opcache->u.lg;
-
-                    if (co_opcache->optimized == 0) {
-                        /* Wasn't optimized before. */
-                        OPCACHE_STAT_GLOBAL_OPT();
-                    } else {
-                        OPCACHE_STAT_GLOBAL_MISS();
-                    }
-
-                    co_opcache->optimized = 1;
-                    lg->globals_ver =
-                        ((PyDictObject *)GLOBALS())->ma_version_tag;
-                    lg->builtins_ver =
-                        ((PyDictObject *)BUILTINS())->ma_version_tag;
-                    lg->ptr = v; /* borrowed */
-                }
-
                 Py_INCREF(v);
             }
             else {
@@ -3059,6 +2852,61 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
             DISPATCH();
         }
 
+        case TARGET(LOAD_GLOBAL_ADAPTIVE): {
+            SpecializedCacheEntry *cache = GET_CACHE();
+            if (cache->adaptive.counter == 0) {
+                PyObject *name = GETITEM(names, cache->adaptive.original_oparg);
+                next_instr--;
+                if (_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name, cache) < 0) {
+                    goto error;
+                }
+                DISPATCH();
+            }
+            else {
+                STAT_INC(LOAD_GLOBAL, deferred);
+                cache->adaptive.counter--;
+                oparg = cache->adaptive.original_oparg;
+                JUMP_TO_INSTRUCTION(LOAD_GLOBAL);
+            }
+        }
+
+        case TARGET(LOAD_GLOBAL_MODULE): {
+            DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL);
+            PyDictObject *dict = (PyDictObject *)GLOBALS();
+            SpecializedCacheEntry *caches = GET_CACHE();
+            _PyAdaptiveEntry *cache0 = &caches[0].adaptive;
+            _PyLoadGlobalCache *cache1 = &caches[-1].load_global;
+            DEOPT_IF(dict->ma_keys->dk_version != cache1->module_keys_version, LOAD_GLOBAL);
+            PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index;
+            PyObject *res = ep->me_value;
+            DEOPT_IF(res == NULL, LOAD_GLOBAL);
+            record_cache_hit(cache0);
+            STAT_INC(LOAD_GLOBAL, hit);
+            Py_INCREF(res);
+            PUSH(res);
+            DISPATCH();
+        }
+
+        case TARGET(LOAD_GLOBAL_BUILTIN): {
+            DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL);
+            DEOPT_IF(!PyDict_CheckExact(BUILTINS()), LOAD_GLOBAL);
+            PyDictObject *mdict = (PyDictObject *)GLOBALS();
+            PyDictObject *bdict = (PyDictObject *)BUILTINS();
+            SpecializedCacheEntry *caches = GET_CACHE();
+            _PyAdaptiveEntry *cache0 = &caches[0].adaptive;
+            _PyLoadGlobalCache *cache1 = &caches[-1].load_global;
+            DEOPT_IF(mdict->ma_keys->dk_version != cache1->module_keys_version, LOAD_GLOBAL);
+            DEOPT_IF(bdict->ma_keys->dk_version != cache1->builtin_keys_version, LOAD_GLOBAL);
+            PyDictKeyEntry *ep = DK_ENTRIES(bdict->ma_keys) + cache0->index;
+            PyObject *res = ep->me_value;
+            DEOPT_IF(res == NULL, LOAD_GLOBAL);
+            record_cache_hit(cache0);
+            STAT_INC(LOAD_GLOBAL, hit);
+            Py_INCREF(res);
+            PUSH(res);
+            DISPATCH();
+        }
+
         case TARGET(DELETE_FAST): {
             PyObject *v = GETLOCAL(oparg);
             if (v != NULL) {
@@ -3464,7 +3312,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
                 DISPATCH();
             }
             else {
-                STAT_INC(loadattr_deferred);
+                STAT_INC(LOAD_ATTR, deferred);
                 cache->adaptive.counter--;
                 oparg = cache->adaptive.original_oparg;
                 JUMP_TO_INSTRUCTION(LOAD_ATTR);
@@ -3487,9 +3335,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
             DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version_or_hint, LOAD_ATTR);
             res = dict->ma_values[cache0->index];
             DEOPT_IF(res == NULL, LOAD_ATTR);
-            STAT_INC(loadattr_hit);
+            STAT_INC(LOAD_ATTR, hit);
             record_cache_hit(cache0);
-            STAT_INC(loadattr_hit);
+            STAT_INC(LOAD_ATTR, hit);
             Py_INCREF(res);
             SET_TOP(res);
             Py_DECREF(owner);
@@ -3510,7 +3358,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
             PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index;
             res = ep->me_value;
             DEOPT_IF(res == NULL, LOAD_ATTR);
-            STAT_INC(loadattr_hit);
+            STAT_INC(LOAD_ATTR, hit);
             record_cache_hit(cache0);
             Py_INCREF(res);
             SET_TOP(res);
@@ -3538,7 +3386,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
             DEOPT_IF(ep->me_key != name, LOAD_ATTR);
             res = ep->me_value;
             DEOPT_IF(res == NULL, LOAD_ATTR);
-            STAT_INC(loadattr_hit);
+            STAT_INC(LOAD_ATTR, hit);
             record_cache_hit(cache0);
             Py_INCREF(res);
             SET_TOP(res);
@@ -3558,7 +3406,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
             char *addr = (char *)owner + cache0->index;
             res = *(PyObject **)addr;
             DEOPT_IF(res == NULL, LOAD_ATTR);
-            STAT_INC(loadattr_hit);
+            STAT_INC(LOAD_ATTR, hit);
             record_cache_hit(cache0);
             Py_INCREF(res);
             SET_TOP(res);
@@ -4445,22 +4293,26 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
            or goto error. */
         Py_UNREACHABLE();
 
-/* Cache misses */
+/* Specialization misses */
 
-LOAD_ATTR_miss:
-    {
-        STAT_INC(loadattr_miss);
-        _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive;
-        record_cache_miss(cache);
-        if (too_many_cache_misses(cache)) {
-            next_instr[-1] = _Py_MAKECODEUNIT(LOAD_ATTR_ADAPTIVE, _Py_OPARG(next_instr[-1]));
-            STAT_INC(loadattr_deopt);
-            cache_backoff(cache);
-        }
-        oparg = cache->original_oparg;
-        JUMP_TO_INSTRUCTION(LOAD_ATTR);
+#define MISS_WITH_CACHE(opname) \
+opname ## _miss: \
+    { \
+        STAT_INC(opname, miss); \
+        _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; \
+        record_cache_miss(cache); \
+        if (too_many_cache_misses(cache)) { \
+            next_instr[-1] = _Py_MAKECODEUNIT(opname ## _ADAPTIVE, _Py_OPARG(next_instr[-1])); \
+            STAT_INC(opname, deopt); \
+            cache_backoff(cache); \
+        } \
+        oparg = cache->original_oparg; \
+        JUMP_TO_INSTRUCTION(opname); \
     }
 
+MISS_WITH_CACHE(LOAD_ATTR)
+MISS_WITH_CACHE(LOAD_GLOBAL)
+
 error:
         /* Double-check exception status. */
 #ifdef NDEBUG
diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h
index 47beee7..ecc95da 100644
--- a/Python/opcode_targets.h
+++ b/Python/opcode_targets.h
@@ -35,10 +35,10 @@ static void *opcode_targets[256] = {
     &&TARGET_MATCH_KEYS,
     &&TARGET_COPY_DICT_WITHOUT_KEYS,
     &&TARGET_PUSH_EXC_INFO,
-    &&_unknown_opcode,
+    &&TARGET_LOAD_GLOBAL_ADAPTIVE,
     &&TARGET_POP_EXCEPT_AND_RERAISE,
-    &&_unknown_opcode,
-    &&_unknown_opcode,
+    &&TARGET_LOAD_GLOBAL_MODULE,
+    &&TARGET_LOAD_GLOBAL_BUILTIN,
     &&_unknown_opcode,
     &&_unknown_opcode,
     &&_unknown_opcode,
diff --git a/Python/specialize.c b/Python/specialize.c
index d82122d..d98433b 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -33,18 +33,27 @@
 
 Py_ssize_t _Py_QuickenedCount = 0;
 #if SPECIALIZATION_STATS
-SpecializationStats _specialization_stats = { 0 };
+SpecializationStats _specialization_stats[256] = { 0 };
+
+#define PRINT_STAT(name, field) fprintf(stderr, "    %s." #field " : %" PRIu64 "\n", name, stats->field);
+
+static void
+print_stats(SpecializationStats *stats, const char *name)
+{
+    PRINT_STAT(name, specialization_success);
+    PRINT_STAT(name, specialization_failure);
+    PRINT_STAT(name, hit);
+    PRINT_STAT(name, deferred);
+    PRINT_STAT(name, miss);
+    PRINT_STAT(name, deopt);
+}
 
-#define PRINT_STAT(name) fprintf(stderr, #name " : %" PRIu64" \n", _specialization_stats.name);
 void
 _Py_PrintSpecializationStats(void)
 {
-    PRINT_STAT(specialization_success);
-    PRINT_STAT(specialization_failure);
-    PRINT_STAT(loadattr_hit);
-    PRINT_STAT(loadattr_deferred);
-    PRINT_STAT(loadattr_miss);
-    PRINT_STAT(loadattr_deopt);
+    printf("Specialization stats:\n");
+    print_stats(&_specialization_stats[LOAD_ATTR], "load_attr");
+    print_stats(&_specialization_stats[LOAD_GLOBAL], "load_global");
 }
 
 #endif
@@ -77,11 +86,13 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) {
   Values of zero are ignored. */
 static uint8_t adaptive_opcodes[256] = {
     [LOAD_ATTR] = LOAD_ATTR_ADAPTIVE,
+    [LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE,
 };
 
 /* The number of cache entries required for a "family" of instructions. */
 static uint8_t cache_requirements[256] = {
-    [LOAD_ATTR] = 2,
+    [LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyLoadAttrCache */
+    [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */
 };
 
 /* Return the oparg for the cache_offset and instruction index.
@@ -357,14 +368,81 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp
     }
 
 fail:
-    STAT_INC(specialization_failure);
+    STAT_INC(LOAD_ATTR, specialization_failure);
     assert(!PyErr_Occurred());
     cache_backoff(cache0);
     return 0;
 success:
-    STAT_INC(specialization_success);
+    STAT_INC(LOAD_ATTR, specialization_success);
     assert(!PyErr_Occurred());
     cache0->counter = saturating_start();
     return 0;
 }
 
+
+int
+_Py_Specialize_LoadGlobal(
+    PyObject *globals, PyObject *builtins,
+    _Py_CODEUNIT *instr, PyObject *name,
+    SpecializedCacheEntry *cache)
+{
+    _PyAdaptiveEntry *cache0 = &cache->adaptive;
+    _PyLoadGlobalCache *cache1 = &cache[-1].load_global;
+    assert(PyUnicode_CheckExact(name));
+    if (!PyDict_CheckExact(globals)) {
+        goto fail;
+    }
+    if (((PyDictObject *)globals)->ma_keys->dk_kind != DICT_KEYS_UNICODE) {
+        goto fail;
+    }
+    PyObject *value = NULL;
+    Py_ssize_t index = _PyDict_GetItemHint((PyDictObject *)globals, name, -1, &value);
+    assert (index != DKIX_ERROR);
+    if (index != DKIX_EMPTY) {
+        if (index != (uint16_t)index) {
+            goto fail;
+        }
+        uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals);
+        if (keys_version == 0) {
+            goto fail;
+        }
+        cache1->module_keys_version = keys_version;
+        cache0->index = (uint16_t)index;
+        *instr = _Py_MAKECODEUNIT(LOAD_GLOBAL_MODULE, _Py_OPARG(*instr));
+        goto success;
+    }
+    if (!PyDict_CheckExact(builtins)) {
+        goto fail;
+    }
+    if (((PyDictObject *)builtins)->ma_keys->dk_kind != DICT_KEYS_UNICODE) {
+        goto fail;
+    }
+    index = _PyDict_GetItemHint((PyDictObject *)builtins, name, -1, &value);
+    assert (index != DKIX_ERROR);
+    if (index != (uint16_t)index) {
+        goto fail;
+    }
+    uint32_t globals_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals);
+    if (globals_version == 0) {
+        goto fail;
+    }
+    uint32_t builtins_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)builtins);
+    if (builtins_version == 0) {
+        goto fail;
+    }
+    cache1->module_keys_version = globals_version;
+    cache1->builtin_keys_version = builtins_version;
+    cache0->index = (uint16_t)index;
+    *instr = _Py_MAKECODEUNIT(LOAD_GLOBAL_BUILTIN, _Py_OPARG(*instr));
+    goto success;
+fail:
+    STAT_INC(LOAD_GLOBAL, specialization_failure);
+    assert(!PyErr_Occurred());
+    cache_backoff(cache0);
+    return 0;
+success:
+    STAT_INC(LOAD_GLOBAL, specialization_success);
+    assert(!PyErr_Occurred());
+    cache0->counter = saturating_start();
+    return 0;
+}
author	Mark Shannon <mark@hotpy.org>	2021-06-14 10:04:09 (GMT)
committer	GitHub <noreply@github.com>	2021-06-14 10:04:09 (GMT)
commit	eecbc7c3900a7f40d8498b151db543a202c72f74 (patch)
tree	6be5d67366f8df3e24c3dbed0786ec3c4a29bf1b
parent	fafcfff9262ae9dee03a00006638dfcbcfc23a7b (diff)
download	cpython-eecbc7c3900a7f40d8498b151db543a202c72f74.zip cpython-eecbc7c3900a7f40d8498b151db543a202c72f74.tar.gz cpython-eecbc7c3900a7f40d8498b151db543a202c72f74.tar.bz2