summaryrefslogtreecommitdiffstats
path: root/Include
diff options
context:
space:
mode:
authorBrandt Bucher <brandtbucher@microsoft.com>2022-03-07 19:45:00 (GMT)
committerGitHub <noreply@github.com>2022-03-07 19:45:00 (GMT)
commitf193631387bfee99a812e39b05d5b7e6384b57f5 (patch)
tree31f161bd1e2f6469f32be8333705c82992486485 /Include
parent105b9ac00174d7bcc653f9e9dc5052215e197c77 (diff)
downloadcpython-f193631387bfee99a812e39b05d5b7e6384b57f5.zip
cpython-f193631387bfee99a812e39b05d5b7e6384b57f5.tar.gz
cpython-f193631387bfee99a812e39b05d5b7e6384b57f5.tar.bz2
bpo-46841: Use inline caching for calls (GH-31709)
Diffstat (limited to 'Include')
-rw-r--r--Include/cpython/code.h2
-rw-r--r--Include/internal/pycore_code.h141
-rw-r--r--Include/internal/pycore_global_strings.h2
-rw-r--r--Include/internal/pycore_interp.h2
-rw-r--r--Include/internal/pycore_runtime_init.h2
-rw-r--r--Include/opcode.h142
6 files changed, 105 insertions, 186 deletions
diff --git a/Include/cpython/code.h b/Include/cpython/code.h
index 21f8fe7..f3e0761 100644
--- a/Include/cpython/code.h
+++ b/Include/cpython/code.h
@@ -105,7 +105,7 @@ struct PyCodeObject {
/* Quickened instructions and cache, or NULL
This should be treated as opaque by all code except the specializer and
interpreter. */
- union _cache_or_instruction *co_quickened;
+ _Py_CODEUNIT *co_quickened;
};
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index 2e03358..21c657a 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -8,50 +8,10 @@ extern "C" {
* Specialization and quickening structs and helper functions
*/
-typedef struct {
- int32_t cache_count;
- int32_t _; /* Force 8 byte size */
-} _PyEntryZero;
-
-typedef struct {
- uint8_t original_oparg;
- uint8_t counter;
- uint16_t index;
- uint32_t version;
-} _PyAdaptiveEntry;
-typedef struct {
- /* Borrowed ref */
- PyObject *obj;
-} _PyObjectCache;
-
-typedef struct {
- uint32_t func_version;
- uint16_t min_args;
- uint16_t defaults_len;
-} _PyCallCache;
-
-
-/* Add specialized versions of entries to this union.
- *
- * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
- * Preserving this invariant is necessary because:
- - If any one form uses more space, then all must and on 64 bit machines
- this is likely to double the memory consumption of caches
- - The function for calculating the offset of caches assumes a 4:1
- cache:instruction size ratio. Changing that would need careful
- analysis to choose a new function.
- */
-typedef union {
- _PyEntryZero zero;
- _PyAdaptiveEntry adaptive;
- _PyObjectCache obj;
- _PyCallCache call;
-} SpecializedCacheEntry;
-
-#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
-
-/* Inline caches */
+// Inline caches. If you change the number of cache entries for an instruction,
+// you must *also* update the number of cache entries in Lib/opcode.py and bump
+// the magic number in Lib/importlib/_bootstrap_external.py!
#define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT))
@@ -112,73 +72,22 @@ typedef struct {
#define INLINE_CACHE_ENTRIES_LOAD_METHOD CACHE_ENTRIES(_PyLoadMethodCache)
-/* Maximum size of code to quicken, in code units. */
-#define MAX_SIZE_TO_QUICKEN 5000
-
-typedef union _cache_or_instruction {
- _Py_CODEUNIT code[1];
- SpecializedCacheEntry entry;
-} SpecializedCacheOrInstruction;
+typedef struct {
+ _Py_CODEUNIT counter;
+ _Py_CODEUNIT func_version[2];
+ _Py_CODEUNIT min_args;
+} _PyCallCache;
-/* Get pointer to the nth cache entry, from the first instruction and n.
- * Cache entries are indexed backwards, with [count-1] first in memory, and [0] last.
- * The zeroth entry immediately precedes the instructions.
- */
-static inline SpecializedCacheEntry *
-_GetSpecializedCacheEntry(const _Py_CODEUNIT *first_instr, Py_ssize_t n)
-{
- SpecializedCacheOrInstruction *last_cache_plus_one = (SpecializedCacheOrInstruction *)first_instr;
- assert(&last_cache_plus_one->code[0] == first_instr);
- return &last_cache_plus_one[-1-n].entry;
-}
+#define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache)
-/* Following two functions form a pair.
- *
- * oparg_from_offset_and_index() is used to compute the oparg
- * when quickening, so that offset_from_oparg_and_nexti()
- * can be used at runtime to compute the offset.
- *
- * The relationship between the three values is currently
- * offset == (index>>1) + oparg
- * This relation is chosen based on the following observations:
- * 1. typically 1 in 4 instructions need a cache
- * 2. instructions that need a cache typically use 2 entries
- * These observations imply: offset ≈ index/2
- * We use the oparg to fine tune the relation to avoid wasting space
- * and allow consecutive instructions to use caches.
- *
- * If the number of cache entries < number of instructions/2 we will waste
- * some small amoount of space.
- * If the number of cache entries > (number of instructions/2) + 255, then
- * some instructions will not be able to use a cache.
- * In practice, we expect some small amount of wasted space in a shorter functions
- * and only functions exceeding a 1000 lines or more not to have enugh cache space.
- *
- */
-static inline int
-oparg_from_offset_and_nexti(int offset, int nexti)
-{
- return offset-(nexti>>1);
-}
+typedef struct {
+ _Py_CODEUNIT counter;
+} _PyPrecallCache;
-static inline int
-offset_from_oparg_and_nexti(int oparg, int nexti)
-{
- return (nexti>>1)+oparg;
-}
+#define INLINE_CACHE_ENTRIES_PRECALL CACHE_ENTRIES(_PyPrecallCache)
-/* Get pointer to the cache entry associated with an instruction.
- * nexti is the index of the instruction plus one.
- * nexti is used as it corresponds to the instruction pointer in the interpreter.
- * This doesn't check that an entry has been allocated for that instruction. */
-static inline SpecializedCacheEntry *
-_GetSpecializedCacheEntryForInstruction(const _Py_CODEUNIT *first_instr, int nexti, int oparg)
-{
- return _GetSpecializedCacheEntry(
- first_instr,
- offset_from_oparg_and_nexti(oparg, nexti)
- );
-}
+/* Maximum size of code to quicken, in code units. */
+#define MAX_SIZE_TO_QUICKEN 10000
#define QUICKENING_WARMUP_DELAY 8
@@ -205,6 +114,13 @@ _Py_IncrementCountAndMaybeQuicken(PyCodeObject *code)
extern Py_ssize_t _Py_QuickenedCount;
+// Borrowed references to common callables:
+struct callable_cache {
+ PyObject *isinstance;
+ PyObject *len;
+ PyObject *list_append;
+};
+
/* "Locals plus" for a code object is the set of locals + cell vars +
* free vars. This relates to variable names as well as offsets into
* the "fast locals" storage array of execution frames. The compiler
@@ -332,11 +248,6 @@ extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);
#define ADAPTIVE_CACHE_BACKOFF 64
-static inline void
-cache_backoff(_PyAdaptiveEntry *entry) {
- entry->counter = ADAPTIVE_CACHE_BACKOFF;
-}
-
/* Specialization functions */
extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr,
@@ -348,10 +259,10 @@ extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr,
PyObject *name);
extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr);
extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr);
-extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
- PyObject *kwnames, SpecializedCacheEntry *cache);
-extern int _Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
- PyObject *kwnames, SpecializedCacheEntry *cache, PyObject *builtins);
+extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr,
+ int nargs, PyObject *kwnames);
+extern int _Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr,
+ int nargs, PyObject *kwnames, int oparg);
extern void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
int oparg);
extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs,
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index 2a42dc1..74ebc14 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -269,6 +269,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(inf)
STRUCT_FOR_ID(intersection)
STRUCT_FOR_ID(isatty)
+ STRUCT_FOR_ID(isinstance)
STRUCT_FOR_ID(items)
STRUCT_FOR_ID(iter)
STRUCT_FOR_ID(join)
@@ -278,6 +279,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(last_type)
STRUCT_FOR_ID(last_value)
STRUCT_FOR_ID(latin1)
+ STRUCT_FOR_ID(len)
STRUCT_FOR_ID(line)
STRUCT_FOR_ID(lineno)
STRUCT_FOR_ID(listcomp)
diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h
index db8edff..d556279 100644
--- a/Include/internal/pycore_interp.h
+++ b/Include/internal/pycore_interp.h
@@ -12,6 +12,7 @@ extern "C" {
#include "pycore_atomic.h" // _Py_atomic_address
#include "pycore_ast_state.h" // struct ast_state
+#include "pycore_code.h" // struct callable_cache
#include "pycore_context.h" // struct _Py_context_state
#include "pycore_dict.h" // struct _Py_dict_state
#include "pycore_exceptions.h" // struct _Py_exc_state
@@ -176,6 +177,7 @@ struct _is {
struct ast_state ast;
struct type_cache type_cache;
+ struct callable_cache callable_cache;
/* The following fields are here to avoid allocation during init.
The data is exposed through PyInterpreterState pointer fields.
diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h
index 2f2bc65..8b1abcd 100644
--- a/Include/internal/pycore_runtime_init.h
+++ b/Include/internal/pycore_runtime_init.h
@@ -884,6 +884,7 @@ extern "C" {
INIT_ID(inf), \
INIT_ID(intersection), \
INIT_ID(isatty), \
+ INIT_ID(isinstance), \
INIT_ID(items), \
INIT_ID(iter), \
INIT_ID(join), \
@@ -893,6 +894,7 @@ extern "C" {
INIT_ID(last_type), \
INIT_ID(last_value), \
INIT_ID(latin1), \
+ INIT_ID(len), \
INIT_ID(line), \
INIT_ID(lineno), \
INIT_ID(listcomp), \
diff --git a/Include/opcode.h b/Include/opcode.h
index 1b9eeac..930a975 100644
--- a/Include/opcode.h
+++ b/Include/opcode.h
@@ -7,9 +7,9 @@ extern "C" {
/* Instruction opcodes for compiled code */
+#define CACHE 0
#define POP_TOP 1
#define PUSH_NULL 2
-#define CACHE 3
#define NOP 9
#define UNARY_POSITIVE 10
#define UNARY_NEGATIVE 11
@@ -114,75 +114,75 @@ extern "C" {
#define PRECALL 166
#define CALL 171
#define KW_NAMES 172
-#define BINARY_OP_ADAPTIVE 4
-#define BINARY_OP_ADD_INT 5
-#define BINARY_OP_ADD_FLOAT 6
-#define BINARY_OP_ADD_UNICODE 7
-#define BINARY_OP_INPLACE_ADD_UNICODE 8
-#define BINARY_OP_MULTIPLY_INT 13
-#define BINARY_OP_MULTIPLY_FLOAT 14
-#define BINARY_OP_SUBTRACT_INT 16
-#define BINARY_OP_SUBTRACT_FLOAT 17
-#define COMPARE_OP_ADAPTIVE 18
-#define COMPARE_OP_FLOAT_JUMP 19
-#define COMPARE_OP_INT_JUMP 20
-#define COMPARE_OP_STR_JUMP 21
-#define BINARY_SUBSCR_ADAPTIVE 22
-#define BINARY_SUBSCR_GETITEM 23
-#define BINARY_SUBSCR_LIST_INT 24
-#define BINARY_SUBSCR_TUPLE_INT 26
-#define BINARY_SUBSCR_DICT 27
-#define STORE_SUBSCR_ADAPTIVE 28
-#define STORE_SUBSCR_LIST_INT 29
-#define STORE_SUBSCR_DICT 34
-#define CALL_ADAPTIVE 36
-#define CALL_PY_EXACT_ARGS 37
-#define CALL_PY_WITH_DEFAULTS 38
-#define JUMP_ABSOLUTE_QUICK 39
-#define LOAD_ATTR_ADAPTIVE 40
-#define LOAD_ATTR_INSTANCE_VALUE 41
-#define LOAD_ATTR_WITH_HINT 42
-#define LOAD_ATTR_SLOT 43
-#define LOAD_ATTR_MODULE 44
-#define LOAD_GLOBAL_ADAPTIVE 45
-#define LOAD_GLOBAL_MODULE 46
-#define LOAD_GLOBAL_BUILTIN 47
-#define LOAD_METHOD_ADAPTIVE 48
-#define LOAD_METHOD_CLASS 55
-#define LOAD_METHOD_MODULE 56
-#define LOAD_METHOD_NO_DICT 57
-#define LOAD_METHOD_WITH_DICT 58
-#define LOAD_METHOD_WITH_VALUES 59
-#define PRECALL_ADAPTIVE 62
-#define PRECALL_BUILTIN_CLASS 63
-#define PRECALL_NO_KW_BUILTIN_O 64
-#define PRECALL_NO_KW_BUILTIN_FAST 65
-#define PRECALL_BUILTIN_FAST_WITH_KEYWORDS 66
-#define PRECALL_NO_KW_LEN 67
-#define PRECALL_NO_KW_ISINSTANCE 72
-#define PRECALL_NO_KW_LIST_APPEND 73
-#define PRECALL_NO_KW_METHOD_DESCRIPTOR_O 76
-#define PRECALL_NO_KW_METHOD_DESCRIPTOR_NOARGS 77
-#define PRECALL_NO_KW_STR_1 78
-#define PRECALL_NO_KW_TUPLE_1 79
-#define PRECALL_NO_KW_TYPE_1 80
-#define PRECALL_NO_KW_METHOD_DESCRIPTOR_FAST 81
-#define PRECALL_BOUND_METHOD 140
-#define PRECALL_PYFUNC 141
-#define RESUME_QUICK 143
-#define STORE_ATTR_ADAPTIVE 150
-#define STORE_ATTR_INSTANCE_VALUE 153
-#define STORE_ATTR_SLOT 154
-#define STORE_ATTR_WITH_HINT 158
-#define UNPACK_SEQUENCE_ADAPTIVE 159
-#define UNPACK_SEQUENCE_LIST 161
-#define UNPACK_SEQUENCE_TUPLE 167
-#define UNPACK_SEQUENCE_TWO_TUPLE 168
-#define LOAD_FAST__LOAD_FAST 169
-#define STORE_FAST__LOAD_FAST 170
-#define LOAD_FAST__LOAD_CONST 173
-#define LOAD_CONST__LOAD_FAST 174
-#define STORE_FAST__STORE_FAST 175
+#define BINARY_OP_ADAPTIVE 3
+#define BINARY_OP_ADD_INT 4
+#define BINARY_OP_ADD_FLOAT 5
+#define BINARY_OP_ADD_UNICODE 6
+#define BINARY_OP_INPLACE_ADD_UNICODE 7
+#define BINARY_OP_MULTIPLY_INT 8
+#define BINARY_OP_MULTIPLY_FLOAT 13
+#define BINARY_OP_SUBTRACT_INT 14
+#define BINARY_OP_SUBTRACT_FLOAT 16
+#define COMPARE_OP_ADAPTIVE 17
+#define COMPARE_OP_FLOAT_JUMP 18
+#define COMPARE_OP_INT_JUMP 19
+#define COMPARE_OP_STR_JUMP 20
+#define BINARY_SUBSCR_ADAPTIVE 21
+#define BINARY_SUBSCR_GETITEM 22
+#define BINARY_SUBSCR_LIST_INT 23
+#define BINARY_SUBSCR_TUPLE_INT 24
+#define BINARY_SUBSCR_DICT 26
+#define STORE_SUBSCR_ADAPTIVE 27
+#define STORE_SUBSCR_LIST_INT 28
+#define STORE_SUBSCR_DICT 29
+#define CALL_ADAPTIVE 34
+#define CALL_PY_EXACT_ARGS 36
+#define CALL_PY_WITH_DEFAULTS 37
+#define JUMP_ABSOLUTE_QUICK 38
+#define LOAD_ATTR_ADAPTIVE 39
+#define LOAD_ATTR_INSTANCE_VALUE 40
+#define LOAD_ATTR_WITH_HINT 41
+#define LOAD_ATTR_SLOT 42
+#define LOAD_ATTR_MODULE 43
+#define LOAD_GLOBAL_ADAPTIVE 44
+#define LOAD_GLOBAL_MODULE 45
+#define LOAD_GLOBAL_BUILTIN 46
+#define LOAD_METHOD_ADAPTIVE 47
+#define LOAD_METHOD_CLASS 48
+#define LOAD_METHOD_MODULE 55
+#define LOAD_METHOD_NO_DICT 56
+#define LOAD_METHOD_WITH_DICT 57
+#define LOAD_METHOD_WITH_VALUES 58
+#define PRECALL_ADAPTIVE 59
+#define PRECALL_BUILTIN_CLASS 62
+#define PRECALL_NO_KW_BUILTIN_O 63
+#define PRECALL_NO_KW_BUILTIN_FAST 64
+#define PRECALL_BUILTIN_FAST_WITH_KEYWORDS 65
+#define PRECALL_NO_KW_LEN 66
+#define PRECALL_NO_KW_ISINSTANCE 67
+#define PRECALL_NO_KW_LIST_APPEND 72
+#define PRECALL_NO_KW_METHOD_DESCRIPTOR_O 73
+#define PRECALL_NO_KW_METHOD_DESCRIPTOR_NOARGS 76
+#define PRECALL_NO_KW_STR_1 77
+#define PRECALL_NO_KW_TUPLE_1 78
+#define PRECALL_NO_KW_TYPE_1 79
+#define PRECALL_NO_KW_METHOD_DESCRIPTOR_FAST 80
+#define PRECALL_BOUND_METHOD 81
+#define PRECALL_PYFUNC 140
+#define RESUME_QUICK 141
+#define STORE_ATTR_ADAPTIVE 143
+#define STORE_ATTR_INSTANCE_VALUE 150
+#define STORE_ATTR_SLOT 153
+#define STORE_ATTR_WITH_HINT 154
+#define UNPACK_SEQUENCE_ADAPTIVE 158
+#define UNPACK_SEQUENCE_LIST 159
+#define UNPACK_SEQUENCE_TUPLE 161
+#define UNPACK_SEQUENCE_TWO_TUPLE 167
+#define LOAD_FAST__LOAD_FAST 168
+#define STORE_FAST__LOAD_FAST 169
+#define LOAD_FAST__LOAD_CONST 170
+#define LOAD_CONST__LOAD_FAST 173
+#define STORE_FAST__STORE_FAST 174
#define DO_TRACING 255
extern const uint8_t _PyOpcode_InlineCacheEntries[256];
@@ -218,6 +218,8 @@ const uint8_t _PyOpcode_InlineCacheEntries[256] = {
[LOAD_GLOBAL] = 5,
[BINARY_OP] = 1,
[LOAD_METHOD] = 10,
+ [PRECALL] = 1,
+ [CALL] = 4,
};
#endif /* OPCODE_TABLES */