From 21fa7a3e8f99a1a32467f85c877e40cbdafa9da7 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 18 Nov 2021 11:02:14 +0000 Subject: bpo-45829: Specialize BINARY_SUBSCR for __getitem__ implemented in Python. (GH-29592) --- Include/internal/pycore_code.h | 3 +- Include/opcode.h | 65 ++++++++-------- Lib/opcode.py | 1 + .../2021-11-17-10-14-35.bpo-45829.5Cf6fY.rst | 2 + Python/ceval.c | 61 +++++++++------ Python/opcode_targets.h | 16 ++-- Python/specialize.c | 86 +++++++++++++++------- 7 files changed, 145 insertions(+), 89 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-11-17-10-14-35.bpo-45829.5Cf6fY.rst diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 7fe9e74..6563f7b 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -17,6 +17,7 @@ typedef struct { uint8_t original_oparg; uint8_t counter; uint16_t index; + uint32_t version; } _PyAdaptiveEntry; @@ -266,7 +267,7 @@ int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); -int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); +int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); int _Py_Specialize_CallFunction(PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache, PyObject *builtins); void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); diff --git a/Include/opcode.h b/Include/opcode.h index ca20ccd..2367064 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -121,38 +121,39 @@ extern "C" { #define BINARY_OP_SUBTRACT_INT 19 #define BINARY_OP_SUBTRACT_FLOAT 20 #define BINARY_SUBSCR_ADAPTIVE 21 -#define BINARY_SUBSCR_LIST_INT 22 -#define BINARY_SUBSCR_TUPLE_INT 23 -#define BINARY_SUBSCR_DICT 24 -#define CALL_FUNCTION_ADAPTIVE 26 -#define CALL_FUNCTION_BUILTIN_O 27 -#define CALL_FUNCTION_BUILTIN_FAST 28 -#define CALL_FUNCTION_LEN 29 -#define CALL_FUNCTION_ISINSTANCE 34 -#define CALL_FUNCTION_PY_SIMPLE 36 -#define JUMP_ABSOLUTE_QUICK 38 -#define LOAD_ATTR_ADAPTIVE 39 -#define LOAD_ATTR_INSTANCE_VALUE 40 -#define LOAD_ATTR_WITH_HINT 41 -#define LOAD_ATTR_SLOT 42 -#define LOAD_ATTR_MODULE 43 -#define LOAD_GLOBAL_ADAPTIVE 44 -#define LOAD_GLOBAL_MODULE 45 -#define LOAD_GLOBAL_BUILTIN 46 -#define LOAD_METHOD_ADAPTIVE 47 -#define LOAD_METHOD_CACHED 48 -#define LOAD_METHOD_CLASS 55 -#define LOAD_METHOD_MODULE 56 -#define LOAD_METHOD_NO_DICT 57 -#define STORE_ATTR_ADAPTIVE 58 -#define STORE_ATTR_INSTANCE_VALUE 59 -#define STORE_ATTR_SLOT 62 -#define STORE_ATTR_WITH_HINT 63 -#define LOAD_FAST__LOAD_FAST 64 -#define STORE_FAST__LOAD_FAST 65 -#define LOAD_FAST__LOAD_CONST 66 -#define LOAD_CONST__LOAD_FAST 67 -#define STORE_FAST__STORE_FAST 75 +#define BINARY_SUBSCR_GETITEM 22 +#define BINARY_SUBSCR_LIST_INT 23 +#define BINARY_SUBSCR_TUPLE_INT 24 +#define BINARY_SUBSCR_DICT 26 +#define CALL_FUNCTION_ADAPTIVE 27 +#define CALL_FUNCTION_BUILTIN_O 28 +#define CALL_FUNCTION_BUILTIN_FAST 29 +#define CALL_FUNCTION_LEN 34 +#define CALL_FUNCTION_ISINSTANCE 36 +#define CALL_FUNCTION_PY_SIMPLE 38 +#define JUMP_ABSOLUTE_QUICK 39 +#define LOAD_ATTR_ADAPTIVE 40 +#define LOAD_ATTR_INSTANCE_VALUE 41 +#define LOAD_ATTR_WITH_HINT 42 +#define LOAD_ATTR_SLOT 43 +#define LOAD_ATTR_MODULE 44 +#define LOAD_GLOBAL_ADAPTIVE 45 +#define LOAD_GLOBAL_MODULE 46 +#define LOAD_GLOBAL_BUILTIN 47 +#define LOAD_METHOD_ADAPTIVE 48 +#define LOAD_METHOD_CACHED 55 +#define LOAD_METHOD_CLASS 56 +#define LOAD_METHOD_MODULE 57 +#define LOAD_METHOD_NO_DICT 58 +#define STORE_ATTR_ADAPTIVE 59 +#define STORE_ATTR_INSTANCE_VALUE 62 +#define STORE_ATTR_SLOT 63 +#define STORE_ATTR_WITH_HINT 64 +#define LOAD_FAST__LOAD_FAST 65 +#define STORE_FAST__LOAD_FAST 66 +#define LOAD_FAST__LOAD_CONST 67 +#define LOAD_CONST__LOAD_FAST 75 +#define STORE_FAST__STORE_FAST 76 #define DO_TRACING 255 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { diff --git a/Lib/opcode.py b/Lib/opcode.py index 4abe99f..1df192b 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -234,6 +234,7 @@ _specialized_instructions = [ "BINARY_OP_SUBTRACT_INT", "BINARY_OP_SUBTRACT_FLOAT", "BINARY_SUBSCR_ADAPTIVE", + "BINARY_SUBSCR_GETITEM", "BINARY_SUBSCR_LIST_INT", "BINARY_SUBSCR_TUPLE_INT", "BINARY_SUBSCR_DICT", diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-11-17-10-14-35.bpo-45829.5Cf6fY.rst b/Misc/NEWS.d/next/Core and Builtins/2021-11-17-10-14-35.bpo-45829.5Cf6fY.rst new file mode 100644 index 0000000..ed8bfb9 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-11-17-10-14-35.bpo-45829.5Cf6fY.rst @@ -0,0 +1,2 @@ +Specialize :opcode:`BINARY_SUBSCR` for classes with a ``__getitem__`` method +implemented in Python diff --git a/Python/ceval.c b/Python/ceval.c index e579ede..2b7b31c 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2140,21 +2140,21 @@ check_eval_breaker: } TARGET(BINARY_SUBSCR_ADAPTIVE) { - if (oparg == 0) { + SpecializedCacheEntry *cache = GET_CACHE(); + if (cache->adaptive.counter == 0) { PyObject *sub = TOP(); PyObject *container = SECOND(); next_instr--; - if (_Py_Specialize_BinarySubscr(container, sub, next_instr) < 0) { + if (_Py_Specialize_BinarySubscr(container, sub, next_instr, cache) < 0) { goto error; } DISPATCH(); } else { STAT_INC(BINARY_SUBSCR, deferred); - // oparg is the adaptive cache counter - UPDATE_PREV_INSTR_OPARG(next_instr, oparg - 1); - assert(_Py_OPCODE(next_instr[-1]) == BINARY_SUBSCR_ADAPTIVE); - assert(_Py_OPARG(next_instr[-1]) == oparg - 1); + cache->adaptive.counter--; + assert(cache->adaptive.original_oparg == 0); + /* No need to set oparg here; it isn't used by BINARY_SUBSCR */ STAT_DEC(BINARY_SUBSCR, unquickened); JUMP_TO_INSTRUCTION(BINARY_SUBSCR); } @@ -2223,6 +2223,37 @@ check_eval_breaker: DISPATCH(); } + TARGET(BINARY_SUBSCR_GETITEM) { + PyObject *sub = TOP(); + PyObject *container = SECOND(); + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + _PyObjectCache *cache1 = &caches[-1].obj; + PyFunctionObject *getitem = (PyFunctionObject *)cache1->obj; + DEOPT_IF(Py_TYPE(container)->tp_version_tag != cache0->version, BINARY_SUBSCR); + DEOPT_IF(getitem->func_version != cache0->index, BINARY_SUBSCR); + PyCodeObject *code = (PyCodeObject *)getitem->func_code; + size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE; + assert(code->co_argcount == 2); + InterpreterFrame *new_frame = _PyThreadState_BumpFramePointer(tstate, size); + if (new_frame == NULL) { + goto error; + } + _PyFrame_InitializeSpecials(new_frame, PyFunction_AS_FRAME_CONSTRUCTOR(getitem), + NULL, code->co_nlocalsplus); + STACK_SHRINK(2); + new_frame->localsplus[0] = container; + new_frame->localsplus[1] = sub; + for (int i = 2; i < code->co_nlocalsplus; i++) { + new_frame->localsplus[i] = NULL; + } + _PyFrame_SetStackPointer(frame, stack_pointer); + new_frame->previous = frame; + frame = cframe.current_frame = new_frame; + new_frame->depth = frame->depth + 1; + goto start_frame; + } + TARGET(LIST_APPEND) { PyObject *v = POP(); PyObject *list = PEEK(oparg); @@ -4878,29 +4909,13 @@ opname ## _miss: \ JUMP_TO_INSTRUCTION(opname); \ } -#define MISS_WITH_OPARG_COUNTER(opname) \ -opname ## _miss: \ - { \ - STAT_INC(opname, miss); \ - uint8_t oparg = _Py_OPARG(next_instr[-1])-1; \ - UPDATE_PREV_INSTR_OPARG(next_instr, oparg); \ - assert(_Py_OPARG(next_instr[-1]) == oparg); \ - if (oparg == 0) /* too many cache misses */ { \ - oparg = ADAPTIVE_CACHE_BACKOFF; \ - next_instr[-1] = _Py_MAKECODEUNIT(opname ## _ADAPTIVE, oparg); \ - STAT_INC(opname, deopt); \ - } \ - STAT_DEC(opname, unquickened); \ - JUMP_TO_INSTRUCTION(opname); \ - } - MISS_WITH_CACHE(LOAD_ATTR) MISS_WITH_CACHE(STORE_ATTR) MISS_WITH_CACHE(LOAD_GLOBAL) MISS_WITH_CACHE(LOAD_METHOD) MISS_WITH_CACHE(CALL_FUNCTION) MISS_WITH_CACHE(BINARY_OP) -MISS_WITH_OPARG_COUNTER(BINARY_SUBSCR) +MISS_WITH_CACHE(BINARY_SUBSCR) binary_subscr_dict_error: { diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 07852d1..4703255 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -21,22 +21,23 @@ static void *opcode_targets[256] = { &&TARGET_BINARY_OP_SUBTRACT_INT, &&TARGET_BINARY_OP_SUBTRACT_FLOAT, &&TARGET_BINARY_SUBSCR_ADAPTIVE, + &&TARGET_BINARY_SUBSCR_GETITEM, &&TARGET_BINARY_SUBSCR_LIST_INT, &&TARGET_BINARY_SUBSCR_TUPLE_INT, - &&TARGET_BINARY_SUBSCR_DICT, &&TARGET_BINARY_SUBSCR, + &&TARGET_BINARY_SUBSCR_DICT, &&TARGET_CALL_FUNCTION_ADAPTIVE, &&TARGET_CALL_FUNCTION_BUILTIN_O, &&TARGET_CALL_FUNCTION_BUILTIN_FAST, - &&TARGET_CALL_FUNCTION_LEN, &&TARGET_GET_LEN, &&TARGET_MATCH_MAPPING, &&TARGET_MATCH_SEQUENCE, &&TARGET_MATCH_KEYS, - &&TARGET_CALL_FUNCTION_ISINSTANCE, + &&TARGET_CALL_FUNCTION_LEN, &&TARGET_PUSH_EXC_INFO, - &&TARGET_CALL_FUNCTION_PY_SIMPLE, + &&TARGET_CALL_FUNCTION_ISINSTANCE, &&TARGET_POP_EXCEPT_AND_RERAISE, + &&TARGET_CALL_FUNCTION_PY_SIMPLE, &&TARGET_JUMP_ABSOLUTE_QUICK, &&TARGET_LOAD_ATTR_ADAPTIVE, &&TARGET_LOAD_ATTR_INSTANCE_VALUE, @@ -47,26 +48,25 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_LOAD_METHOD_ADAPTIVE, - &&TARGET_LOAD_METHOD_CACHED, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, &&TARGET_BEFORE_ASYNC_WITH, &&TARGET_BEFORE_WITH, &&TARGET_END_ASYNC_FOR, + &&TARGET_LOAD_METHOD_CACHED, &&TARGET_LOAD_METHOD_CLASS, &&TARGET_LOAD_METHOD_MODULE, &&TARGET_LOAD_METHOD_NO_DICT, &&TARGET_STORE_ATTR_ADAPTIVE, - &&TARGET_STORE_ATTR_INSTANCE_VALUE, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, + &&TARGET_STORE_ATTR_INSTANCE_VALUE, &&TARGET_STORE_ATTR_SLOT, &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_LOAD_FAST__LOAD_CONST, - &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_GET_ITER, &&TARGET_GET_YIELD_FROM_ITER, &&TARGET_PRINT_EXPR, @@ -74,13 +74,13 @@ static void *opcode_targets[256] = { &&TARGET_YIELD_FROM, &&TARGET_GET_AWAITABLE, &&TARGET_LOAD_ASSERTION_ERROR, + &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_STORE_FAST__STORE_FAST, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, diff --git a/Python/specialize.c b/Python/specialize.c index dd15de7..06b0764 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -243,7 +243,7 @@ static uint8_t cache_requirements[256] = { [LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */ [LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */ - [BINARY_SUBSCR] = 0, + [BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */ [CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ [BINARY_OP] = 1, // _PyAdaptiveEntry @@ -1100,7 +1100,7 @@ success: #if COLLECT_SPECIALIZATION_STATS_DETAILED static int -binary_subscr_faiL_kind(PyTypeObject *container_type, PyObject *sub) +binary_subscr_fail_kind(PyTypeObject *container_type, PyObject *sub) { if (container_type == &PyUnicode_Type) { if (PyLong_CheckExact(sub)) { @@ -1138,14 +1138,37 @@ binary_subscr_faiL_kind(PyTypeObject *container_type, PyObject *sub) } #endif +_Py_IDENTIFIER(__getitem__); + +#define SIMPLE_FUNCTION 0 + +static int +function_kind(PyCodeObject *code) { + int flags = code->co_flags; + if (flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) { + return SPEC_FAIL_GENERATOR; + } + if ((flags & (CO_VARKEYWORDS | CO_VARARGS)) || code->co_kwonlyargcount) { + return SPEC_FAIL_COMPLEX_PARAMETERS; + } + if ((flags & CO_OPTIMIZED) == 0) { + return SPEC_FAIL_CO_NOT_OPTIMIZED; + } + if (code->co_nfreevars) { + return SPEC_FAIL_FREE_VARS; + } + return SIMPLE_FUNCTION; +} + int _Py_Specialize_BinarySubscr( - PyObject *container, PyObject *sub, _Py_CODEUNIT *instr) + PyObject *container, PyObject *sub, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) { + _PyAdaptiveEntry *cache0 = &cache->adaptive; PyTypeObject *container_type = Py_TYPE(container); if (container_type == &PyList_Type) { if (PyLong_CheckExact(sub)) { - *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST_INT, initial_counter_value()); + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST_INT, _Py_OPARG(*instr)); goto success; } SPECIALIZATION_FAIL(BINARY_SUBSCR, @@ -1154,7 +1177,7 @@ _Py_Specialize_BinarySubscr( } if (container_type == &PyTuple_Type) { if (PyLong_CheckExact(sub)) { - *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE_INT, initial_counter_value()); + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE_INT, _Py_OPARG(*instr)); goto success; } SPECIALIZATION_FAIL(BINARY_SUBSCR, @@ -1162,20 +1185,46 @@ _Py_Specialize_BinarySubscr( goto fail; } if (container_type == &PyDict_Type) { - *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_DICT, initial_counter_value()); + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_DICT, _Py_OPARG(*instr)); + goto success; + } + PyTypeObject *cls = Py_TYPE(container); + PyObject *descriptor = _PyType_LookupId(cls, &PyId___getitem__); + if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) { + PyFunctionObject *func = (PyFunctionObject *)descriptor; + PyCodeObject *code = (PyCodeObject *)func->func_code; + int kind = function_kind(code); + if (kind != SIMPLE_FUNCTION) { + SPECIALIZATION_FAIL(BINARY_SUBSCR, kind); + goto fail; + } + if (code->co_argcount != 2) { + SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); + goto fail; + } + assert(cls->tp_version_tag != 0); + cache0->version = cls->tp_version_tag; + int version = _PyFunction_GetVersionForCurrentState(func); + if (version == 0) { + SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS); + goto fail; + } + cache0->index = version; + cache[-1].obj.obj = descriptor; + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_GETITEM, _Py_OPARG(*instr)); goto success; } SPECIALIZATION_FAIL(BINARY_SUBSCR, - binary_subscr_faiL_kind(container_type, sub)); - goto fail; + binary_subscr_fail_kind(container_type, sub)); fail: STAT_INC(BINARY_SUBSCR, specialization_failure); assert(!PyErr_Occurred()); - *instr = _Py_MAKECODEUNIT(_Py_OPCODE(*instr), ADAPTIVE_CACHE_BACKOFF); + cache_backoff(cache0); return 0; success: STAT_INC(BINARY_SUBSCR, specialization_success); assert(!PyErr_Occurred()); + cache0->counter = initial_counter_value(); return 0; } @@ -1194,23 +1243,10 @@ specialize_py_call( int nargs, SpecializedCacheEntry *cache) { _PyCallCache *cache1 = &cache[-1].call; - /* Exclude generator or coroutines for now */ PyCodeObject *code = (PyCodeObject *)func->func_code; - int flags = code->co_flags; - if (flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) { - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_GENERATOR); - return -1; - } - if ((flags & (CO_VARKEYWORDS | CO_VARARGS)) || code->co_kwonlyargcount) { - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_COMPLEX_PARAMETERS); - return -1; - } - if ((flags & CO_OPTIMIZED) == 0) { - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CO_NOT_OPTIMIZED); - return -1; - } - if (code->co_nfreevars) { - SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_FREE_VARS); + int kind = function_kind(code); + if (kind != SIMPLE_FUNCTION) { + SPECIALIZATION_FAIL(CALL_FUNCTION, kind); return -1; } int argcount = code->co_argcount; -- cgit v0.12