diff options
author | Brandt Bucher <brandtbucher@microsoft.com> | 2022-03-07 19:45:00 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-07 19:45:00 (GMT) |
commit | f193631387bfee99a812e39b05d5b7e6384b57f5 (patch) | |
tree | 31f161bd1e2f6469f32be8333705c82992486485 /Include | |
parent | 105b9ac00174d7bcc653f9e9dc5052215e197c77 (diff) | |
download | cpython-f193631387bfee99a812e39b05d5b7e6384b57f5.zip cpython-f193631387bfee99a812e39b05d5b7e6384b57f5.tar.gz cpython-f193631387bfee99a812e39b05d5b7e6384b57f5.tar.bz2 |
bpo-46841: Use inline caching for calls (GH-31709)
Diffstat (limited to 'Include')
-rw-r--r-- | Include/cpython/code.h | 2 | ||||
-rw-r--r-- | Include/internal/pycore_code.h | 141 | ||||
-rw-r--r-- | Include/internal/pycore_global_strings.h | 2 | ||||
-rw-r--r-- | Include/internal/pycore_interp.h | 2 | ||||
-rw-r--r-- | Include/internal/pycore_runtime_init.h | 2 | ||||
-rw-r--r-- | Include/opcode.h | 142 |
6 files changed, 105 insertions, 186 deletions
diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 21f8fe7..f3e0761 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -105,7 +105,7 @@ struct PyCodeObject { /* Quickened instructions and cache, or NULL This should be treated as opaque by all code except the specializer and interpreter. */ - union _cache_or_instruction *co_quickened; + _Py_CODEUNIT *co_quickened; }; diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 2e03358..21c657a 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -8,50 +8,10 @@ extern "C" { * Specialization and quickening structs and helper functions */ -typedef struct { - int32_t cache_count; - int32_t _; /* Force 8 byte size */ -} _PyEntryZero; - -typedef struct { - uint8_t original_oparg; - uint8_t counter; - uint16_t index; - uint32_t version; -} _PyAdaptiveEntry; -typedef struct { - /* Borrowed ref */ - PyObject *obj; -} _PyObjectCache; - -typedef struct { - uint32_t func_version; - uint16_t min_args; - uint16_t defaults_len; -} _PyCallCache; - - -/* Add specialized versions of entries to this union. - * - * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8 - * Preserving this invariant is necessary because: - - If any one form uses more space, then all must and on 64 bit machines - this is likely to double the memory consumption of caches - - The function for calculating the offset of caches assumes a 4:1 - cache:instruction size ratio. Changing that would need careful - analysis to choose a new function. - */ -typedef union { - _PyEntryZero zero; - _PyAdaptiveEntry adaptive; - _PyObjectCache obj; - _PyCallCache call; -} SpecializedCacheEntry; - -#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT)) - -/* Inline caches */ +// Inline caches. If you change the number of cache entries for an instruction, +// you must *also* update the number of cache entries in Lib/opcode.py and bump +// the magic number in Lib/importlib/_bootstrap_external.py! #define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT)) @@ -112,73 +72,22 @@ typedef struct { #define INLINE_CACHE_ENTRIES_LOAD_METHOD CACHE_ENTRIES(_PyLoadMethodCache) -/* Maximum size of code to quicken, in code units. */ -#define MAX_SIZE_TO_QUICKEN 5000 - -typedef union _cache_or_instruction { - _Py_CODEUNIT code[1]; - SpecializedCacheEntry entry; -} SpecializedCacheOrInstruction; +typedef struct { + _Py_CODEUNIT counter; + _Py_CODEUNIT func_version[2]; + _Py_CODEUNIT min_args; +} _PyCallCache; -/* Get pointer to the nth cache entry, from the first instruction and n. - * Cache entries are indexed backwards, with [count-1] first in memory, and [0] last. - * The zeroth entry immediately precedes the instructions. - */ -static inline SpecializedCacheEntry * -_GetSpecializedCacheEntry(const _Py_CODEUNIT *first_instr, Py_ssize_t n) -{ - SpecializedCacheOrInstruction *last_cache_plus_one = (SpecializedCacheOrInstruction *)first_instr; - assert(&last_cache_plus_one->code[0] == first_instr); - return &last_cache_plus_one[-1-n].entry; -} +#define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache) -/* Following two functions form a pair. - * - * oparg_from_offset_and_index() is used to compute the oparg - * when quickening, so that offset_from_oparg_and_nexti() - * can be used at runtime to compute the offset. - * - * The relationship between the three values is currently - * offset == (index>>1) + oparg - * This relation is chosen based on the following observations: - * 1. typically 1 in 4 instructions need a cache - * 2. instructions that need a cache typically use 2 entries - * These observations imply: offset ≈ index/2 - * We use the oparg to fine tune the relation to avoid wasting space - * and allow consecutive instructions to use caches. - * - * If the number of cache entries < number of instructions/2 we will waste - * some small amoount of space. - * If the number of cache entries > (number of instructions/2) + 255, then - * some instructions will not be able to use a cache. - * In practice, we expect some small amount of wasted space in a shorter functions - * and only functions exceeding a 1000 lines or more not to have enugh cache space. - * - */ -static inline int -oparg_from_offset_and_nexti(int offset, int nexti) -{ - return offset-(nexti>>1); -} +typedef struct { + _Py_CODEUNIT counter; +} _PyPrecallCache; -static inline int -offset_from_oparg_and_nexti(int oparg, int nexti) -{ - return (nexti>>1)+oparg; -} +#define INLINE_CACHE_ENTRIES_PRECALL CACHE_ENTRIES(_PyPrecallCache) -/* Get pointer to the cache entry associated with an instruction. - * nexti is the index of the instruction plus one. - * nexti is used as it corresponds to the instruction pointer in the interpreter. - * This doesn't check that an entry has been allocated for that instruction. */ -static inline SpecializedCacheEntry * -_GetSpecializedCacheEntryForInstruction(const _Py_CODEUNIT *first_instr, int nexti, int oparg) -{ - return _GetSpecializedCacheEntry( - first_instr, - offset_from_oparg_and_nexti(oparg, nexti) - ); -} +/* Maximum size of code to quicken, in code units. */ +#define MAX_SIZE_TO_QUICKEN 10000 #define QUICKENING_WARMUP_DELAY 8 @@ -205,6 +114,13 @@ _Py_IncrementCountAndMaybeQuicken(PyCodeObject *code) extern Py_ssize_t _Py_QuickenedCount; +// Borrowed references to common callables: +struct callable_cache { + PyObject *isinstance; + PyObject *len; + PyObject *list_append; +}; + /* "Locals plus" for a code object is the set of locals + cell vars + * free vars. This relates to variable names as well as offsets into * the "fast locals" storage array of execution frames. The compiler @@ -332,11 +248,6 @@ extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range); #define ADAPTIVE_CACHE_BACKOFF 64 -static inline void -cache_backoff(_PyAdaptiveEntry *entry) { - entry->counter = ADAPTIVE_CACHE_BACKOFF; -} - /* Specialization functions */ extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, @@ -348,10 +259,10 @@ extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name); extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr); -extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, - PyObject *kwnames, SpecializedCacheEntry *cache); -extern int _Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr, int nargs, - PyObject *kwnames, SpecializedCacheEntry *cache, PyObject *builtins); +extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, + int nargs, PyObject *kwnames); +extern int _Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr, + int nargs, PyObject *kwnames, int oparg); extern void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, int oparg); extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 2a42dc1..74ebc14 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -269,6 +269,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(inf) STRUCT_FOR_ID(intersection) STRUCT_FOR_ID(isatty) + STRUCT_FOR_ID(isinstance) STRUCT_FOR_ID(items) STRUCT_FOR_ID(iter) STRUCT_FOR_ID(join) @@ -278,6 +279,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(last_type) STRUCT_FOR_ID(last_value) STRUCT_FOR_ID(latin1) + STRUCT_FOR_ID(len) STRUCT_FOR_ID(line) STRUCT_FOR_ID(lineno) STRUCT_FOR_ID(listcomp) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index db8edff..d556279 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -12,6 +12,7 @@ extern "C" { #include "pycore_atomic.h" // _Py_atomic_address #include "pycore_ast_state.h" // struct ast_state +#include "pycore_code.h" // struct callable_cache #include "pycore_context.h" // struct _Py_context_state #include "pycore_dict.h" // struct _Py_dict_state #include "pycore_exceptions.h" // struct _Py_exc_state @@ -176,6 +177,7 @@ struct _is { struct ast_state ast; struct type_cache type_cache; + struct callable_cache callable_cache; /* The following fields are here to avoid allocation during init. The data is exposed through PyInterpreterState pointer fields. diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 2f2bc65..8b1abcd 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -884,6 +884,7 @@ extern "C" { INIT_ID(inf), \ INIT_ID(intersection), \ INIT_ID(isatty), \ + INIT_ID(isinstance), \ INIT_ID(items), \ INIT_ID(iter), \ INIT_ID(join), \ @@ -893,6 +894,7 @@ extern "C" { INIT_ID(last_type), \ INIT_ID(last_value), \ INIT_ID(latin1), \ + INIT_ID(len), \ INIT_ID(line), \ INIT_ID(lineno), \ INIT_ID(listcomp), \ diff --git a/Include/opcode.h b/Include/opcode.h index 1b9eeac..930a975 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -7,9 +7,9 @@ extern "C" { /* Instruction opcodes for compiled code */ +#define CACHE 0 #define POP_TOP 1 #define PUSH_NULL 2 -#define CACHE 3 #define NOP 9 #define UNARY_POSITIVE 10 #define UNARY_NEGATIVE 11 @@ -114,75 +114,75 @@ extern "C" { #define PRECALL 166 #define CALL 171 #define KW_NAMES 172 -#define BINARY_OP_ADAPTIVE 4 -#define BINARY_OP_ADD_INT 5 -#define BINARY_OP_ADD_FLOAT 6 -#define BINARY_OP_ADD_UNICODE 7 -#define BINARY_OP_INPLACE_ADD_UNICODE 8 -#define BINARY_OP_MULTIPLY_INT 13 -#define BINARY_OP_MULTIPLY_FLOAT 14 -#define BINARY_OP_SUBTRACT_INT 16 -#define BINARY_OP_SUBTRACT_FLOAT 17 -#define COMPARE_OP_ADAPTIVE 18 -#define COMPARE_OP_FLOAT_JUMP 19 -#define COMPARE_OP_INT_JUMP 20 -#define COMPARE_OP_STR_JUMP 21 -#define BINARY_SUBSCR_ADAPTIVE 22 -#define BINARY_SUBSCR_GETITEM 23 -#define BINARY_SUBSCR_LIST_INT 24 -#define BINARY_SUBSCR_TUPLE_INT 26 -#define BINARY_SUBSCR_DICT 27 -#define STORE_SUBSCR_ADAPTIVE 28 -#define STORE_SUBSCR_LIST_INT 29 -#define STORE_SUBSCR_DICT 34 -#define CALL_ADAPTIVE 36 -#define CALL_PY_EXACT_ARGS 37 -#define CALL_PY_WITH_DEFAULTS 38 -#define JUMP_ABSOLUTE_QUICK 39 -#define LOAD_ATTR_ADAPTIVE 40 -#define LOAD_ATTR_INSTANCE_VALUE 41 -#define LOAD_ATTR_WITH_HINT 42 -#define LOAD_ATTR_SLOT 43 -#define LOAD_ATTR_MODULE 44 -#define LOAD_GLOBAL_ADAPTIVE 45 -#define LOAD_GLOBAL_MODULE 46 -#define LOAD_GLOBAL_BUILTIN 47 -#define LOAD_METHOD_ADAPTIVE 48 -#define LOAD_METHOD_CLASS 55 -#define LOAD_METHOD_MODULE 56 -#define LOAD_METHOD_NO_DICT 57 -#define LOAD_METHOD_WITH_DICT 58 -#define LOAD_METHOD_WITH_VALUES 59 -#define PRECALL_ADAPTIVE 62 -#define PRECALL_BUILTIN_CLASS 63 -#define PRECALL_NO_KW_BUILTIN_O 64 -#define PRECALL_NO_KW_BUILTIN_FAST 65 -#define PRECALL_BUILTIN_FAST_WITH_KEYWORDS 66 -#define PRECALL_NO_KW_LEN 67 -#define PRECALL_NO_KW_ISINSTANCE 72 -#define PRECALL_NO_KW_LIST_APPEND 73 -#define PRECALL_NO_KW_METHOD_DESCRIPTOR_O 76 -#define PRECALL_NO_KW_METHOD_DESCRIPTOR_NOARGS 77 -#define PRECALL_NO_KW_STR_1 78 -#define PRECALL_NO_KW_TUPLE_1 79 -#define PRECALL_NO_KW_TYPE_1 80 -#define PRECALL_NO_KW_METHOD_DESCRIPTOR_FAST 81 -#define PRECALL_BOUND_METHOD 140 -#define PRECALL_PYFUNC 141 -#define RESUME_QUICK 143 -#define STORE_ATTR_ADAPTIVE 150 -#define STORE_ATTR_INSTANCE_VALUE 153 -#define STORE_ATTR_SLOT 154 -#define STORE_ATTR_WITH_HINT 158 -#define UNPACK_SEQUENCE_ADAPTIVE 159 -#define UNPACK_SEQUENCE_LIST 161 -#define UNPACK_SEQUENCE_TUPLE 167 -#define UNPACK_SEQUENCE_TWO_TUPLE 168 -#define LOAD_FAST__LOAD_FAST 169 -#define STORE_FAST__LOAD_FAST 170 -#define LOAD_FAST__LOAD_CONST 173 -#define LOAD_CONST__LOAD_FAST 174 -#define STORE_FAST__STORE_FAST 175 +#define BINARY_OP_ADAPTIVE 3 +#define BINARY_OP_ADD_INT 4 +#define BINARY_OP_ADD_FLOAT 5 +#define BINARY_OP_ADD_UNICODE 6 +#define BINARY_OP_INPLACE_ADD_UNICODE 7 +#define BINARY_OP_MULTIPLY_INT 8 +#define BINARY_OP_MULTIPLY_FLOAT 13 +#define BINARY_OP_SUBTRACT_INT 14 +#define BINARY_OP_SUBTRACT_FLOAT 16 +#define COMPARE_OP_ADAPTIVE 17 +#define COMPARE_OP_FLOAT_JUMP 18 +#define COMPARE_OP_INT_JUMP 19 +#define COMPARE_OP_STR_JUMP 20 +#define BINARY_SUBSCR_ADAPTIVE 21 +#define BINARY_SUBSCR_GETITEM 22 +#define BINARY_SUBSCR_LIST_INT 23 +#define BINARY_SUBSCR_TUPLE_INT 24 +#define BINARY_SUBSCR_DICT 26 +#define STORE_SUBSCR_ADAPTIVE 27 +#define STORE_SUBSCR_LIST_INT 28 +#define STORE_SUBSCR_DICT 29 +#define CALL_ADAPTIVE 34 +#define CALL_PY_EXACT_ARGS 36 +#define CALL_PY_WITH_DEFAULTS 37 +#define JUMP_ABSOLUTE_QUICK 38 +#define LOAD_ATTR_ADAPTIVE 39 +#define LOAD_ATTR_INSTANCE_VALUE 40 +#define LOAD_ATTR_WITH_HINT 41 +#define LOAD_ATTR_SLOT 42 +#define LOAD_ATTR_MODULE 43 +#define LOAD_GLOBAL_ADAPTIVE 44 +#define LOAD_GLOBAL_MODULE 45 +#define LOAD_GLOBAL_BUILTIN 46 +#define LOAD_METHOD_ADAPTIVE 47 +#define LOAD_METHOD_CLASS 48 +#define LOAD_METHOD_MODULE 55 +#define LOAD_METHOD_NO_DICT 56 +#define LOAD_METHOD_WITH_DICT 57 +#define LOAD_METHOD_WITH_VALUES 58 +#define PRECALL_ADAPTIVE 59 +#define PRECALL_BUILTIN_CLASS 62 +#define PRECALL_NO_KW_BUILTIN_O 63 +#define PRECALL_NO_KW_BUILTIN_FAST 64 +#define PRECALL_BUILTIN_FAST_WITH_KEYWORDS 65 +#define PRECALL_NO_KW_LEN 66 +#define PRECALL_NO_KW_ISINSTANCE 67 +#define PRECALL_NO_KW_LIST_APPEND 72 +#define PRECALL_NO_KW_METHOD_DESCRIPTOR_O 73 +#define PRECALL_NO_KW_METHOD_DESCRIPTOR_NOARGS 76 +#define PRECALL_NO_KW_STR_1 77 +#define PRECALL_NO_KW_TUPLE_1 78 +#define PRECALL_NO_KW_TYPE_1 79 +#define PRECALL_NO_KW_METHOD_DESCRIPTOR_FAST 80 +#define PRECALL_BOUND_METHOD 81 +#define PRECALL_PYFUNC 140 +#define RESUME_QUICK 141 +#define STORE_ATTR_ADAPTIVE 143 +#define STORE_ATTR_INSTANCE_VALUE 150 +#define STORE_ATTR_SLOT 153 +#define STORE_ATTR_WITH_HINT 154 +#define UNPACK_SEQUENCE_ADAPTIVE 158 +#define UNPACK_SEQUENCE_LIST 159 +#define UNPACK_SEQUENCE_TUPLE 161 +#define UNPACK_SEQUENCE_TWO_TUPLE 167 +#define LOAD_FAST__LOAD_FAST 168 +#define STORE_FAST__LOAD_FAST 169 +#define LOAD_FAST__LOAD_CONST 170 +#define LOAD_CONST__LOAD_FAST 173 +#define STORE_FAST__STORE_FAST 174 #define DO_TRACING 255 extern const uint8_t _PyOpcode_InlineCacheEntries[256]; @@ -218,6 +218,8 @@ const uint8_t _PyOpcode_InlineCacheEntries[256] = { [LOAD_GLOBAL] = 5, [BINARY_OP] = 1, [LOAD_METHOD] = 10, + [PRECALL] = 1, + [CALL] = 4, }; #endif /* OPCODE_TABLES */ |