diff options
author | Mark Shannon <mark@hotpy.org> | 2021-08-27 08:21:01 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-27 08:21:01 (GMT) |
commit | d3eaf0cc5b311ad023fd13e367f817d528403306 (patch) | |
tree | d21a76b0e3aa781e66d07442c75599e35c3d27bd /Python | |
parent | 245f1f260577a005fd631144b4377febef0b47ed (diff) | |
download | cpython-d3eaf0cc5b311ad023fd13e367f817d528403306.zip cpython-d3eaf0cc5b311ad023fd13e367f817d528403306.tar.gz cpython-d3eaf0cc5b311ad023fd13e367f817d528403306.tar.bz2 |
bpo-44945: Specialize BINARY_ADD (GH-27967)
Diffstat (limited to 'Python')
-rw-r--r-- | Python/ceval.c | 129 | ||||
-rw-r--r-- | Python/opcode_targets.h | 50 | ||||
-rw-r--r-- | Python/specialize.c | 62 |
3 files changed, 195 insertions, 46 deletions
diff --git a/Python/ceval.c b/Python/ceval.c index 5fec90b..8aaa83b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1435,6 +1435,12 @@ eval_frame_handle_pending(PyThreadState *tstate) #define UPDATE_PREV_INSTR_OPARG(instr, oparg) ((uint8_t*)(instr))[-1] = (oparg) +static inline void +record_hit_inline(_Py_CODEUNIT *next_instr, int oparg) +{ + UPDATE_PREV_INSTR_OPARG(next_instr, saturating_increment(oparg)); +} + #define GLOBALS() frame->f_globals #define BUILTINS() frame->f_builtins #define LOCALS() frame->f_locals @@ -1980,28 +1986,120 @@ check_eval_breaker: } TARGET(BINARY_ADD): { + PREDICTED(BINARY_ADD); + STAT_INC(BINARY_ADD, unquickened); PyObject *right = POP(); PyObject *left = TOP(); - PyObject *sum; - /* NOTE(vstinner): Please don't try to micro-optimize int+int on - CPython using bytecode, it is simply worthless. - See http://bugs.python.org/issue21955 and - http://bugs.python.org/issue10044 for the discussion. In short, - no patch shown any impact on a realistic benchmark, only a minor - speedup on microbenchmarks. */ - if (PyUnicode_CheckExact(left) && - PyUnicode_CheckExact(right)) { - sum = unicode_concatenate(tstate, left, right, frame, next_instr); - /* unicode_concatenate consumed the ref to left */ + PyObject *sum = PyNumber_Add(left, right); + SET_TOP(sum); + Py_DECREF(left); + Py_DECREF(right); + if (sum == NULL) { + goto error; + } + DISPATCH(); + } + + TARGET(BINARY_ADD_ADAPTIVE): { + if (oparg == 0) { + PyObject *left = SECOND(); + PyObject *right = TOP(); + next_instr--; + if (_Py_Specialize_BinaryAdd(left, right, next_instr) < 0) { + goto error; + } + DISPATCH(); } else { - sum = PyNumber_Add(left, right); - Py_DECREF(left); + STAT_INC(BINARY_ADD, deferred); + UPDATE_PREV_INSTR_OPARG(next_instr, oparg - 1); + STAT_DEC(BINARY_ADD, unquickened); + JUMP_TO_INSTRUCTION(BINARY_ADD); } + } + + TARGET(BINARY_ADD_UNICODE): { + PyObject *left = SECOND(); + PyObject *right = TOP(); + DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_ADD); + DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD); + STAT_INC(BINARY_ADD, hit); + record_hit_inline(next_instr, oparg); + PyObject *res = PyUnicode_Concat(left, right); + STACK_SHRINK(1); + SET_TOP(res); + Py_DECREF(left); Py_DECREF(right); - SET_TOP(sum); - if (sum == NULL) + if (TOP() == NULL) { + goto error; + } + DISPATCH(); + } + + TARGET(BINARY_ADD_UNICODE_INPLACE_FAST): { + PyObject *left = SECOND(); + PyObject *right = TOP(); + DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_ADD); + DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD); + DEOPT_IF(Py_REFCNT(left) != 2, BINARY_ADD); + int next_oparg = _Py_OPARG(*next_instr); + assert(_Py_OPCODE(*next_instr) == STORE_FAST); + /* In the common case, there are 2 references to the value + * stored in 'variable' when the v = v + ... is performed: one + * on the value stack (in 'v') and one still stored in the + * 'variable'. We try to delete the variable now to reduce + * the refcnt to 1. + */ + PyObject *var = GETLOCAL(next_oparg); + DEOPT_IF(var != left, BINARY_ADD); + STAT_INC(BINARY_ADD, hit); + record_hit_inline(next_instr, oparg); + GETLOCAL(next_oparg) = NULL; + Py_DECREF(left); + STACK_SHRINK(1); + PyUnicode_Append(&TOP(), right); + Py_DECREF(right); + if (TOP() == NULL) { goto error; + } + DISPATCH(); + } + + TARGET(BINARY_ADD_FLOAT): { + PyObject *left = SECOND(); + PyObject *right = TOP(); + DEOPT_IF(!PyFloat_CheckExact(left), BINARY_ADD); + DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD); + STAT_INC(BINARY_ADD, hit); + record_hit_inline(next_instr, oparg); + double dsum = ((PyFloatObject *)left)->ob_fval + + ((PyFloatObject *)right)->ob_fval; + PyObject *sum = PyFloat_FromDouble(dsum); + SET_SECOND(sum); + Py_DECREF(right); + Py_DECREF(left); + STACK_SHRINK(1); + if (sum == NULL) { + goto error; + } + DISPATCH(); + } + + TARGET(BINARY_ADD_INT): { + PyObject *left = SECOND(); + PyObject *right = TOP(); + DEOPT_IF(!PyLong_CheckExact(left), BINARY_ADD); + DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD); + STAT_INC(BINARY_ADD, hit); + record_hit_inline(next_instr, oparg); + PyObject *sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); + SET_SECOND(sum); + Py_DECREF(right); + Py_DECREF(left); + STACK_SHRINK(1); + if (sum == NULL) { + goto error; + } DISPATCH(); } @@ -4761,6 +4859,7 @@ MISS_WITH_CACHE(STORE_ATTR) MISS_WITH_CACHE(LOAD_GLOBAL) MISS_WITH_CACHE(LOAD_METHOD) MISS_WITH_OPARG_COUNTER(BINARY_SUBSCR) +MISS_WITH_OPARG_COUNTER(BINARY_ADD) binary_subscr_dict_error: { diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index f97eaf8..f3bfae5 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -6,21 +6,21 @@ static void *opcode_targets[256] = { &&TARGET_DUP_TOP, &&TARGET_DUP_TOP_TWO, &&TARGET_ROT_FOUR, - &&TARGET_BINARY_SUBSCR_ADAPTIVE, - &&TARGET_BINARY_SUBSCR_LIST_INT, + &&TARGET_BINARY_ADD_ADAPTIVE, + &&TARGET_BINARY_ADD_INT, &&TARGET_NOP, &&TARGET_UNARY_POSITIVE, &&TARGET_UNARY_NEGATIVE, &&TARGET_UNARY_NOT, - &&TARGET_BINARY_SUBSCR_TUPLE_INT, - &&TARGET_BINARY_SUBSCR_DICT, + &&TARGET_BINARY_ADD_FLOAT, + &&TARGET_BINARY_ADD_UNICODE, &&TARGET_UNARY_INVERT, &&TARGET_BINARY_MATRIX_MULTIPLY, &&TARGET_INPLACE_MATRIX_MULTIPLY, - &&TARGET_JUMP_ABSOLUTE_QUICK, + &&TARGET_BINARY_ADD_UNICODE_INPLACE_FAST, &&TARGET_BINARY_POWER, &&TARGET_BINARY_MULTIPLY, - &&TARGET_LOAD_ATTR_ADAPTIVE, + &&TARGET_BINARY_SUBSCR_ADAPTIVE, &&TARGET_BINARY_MODULO, &&TARGET_BINARY_ADD, &&TARGET_BINARY_SUBTRACT, @@ -35,19 +35,19 @@ static void *opcode_targets[256] = { &&TARGET_MATCH_KEYS, &&TARGET_COPY_DICT_WITHOUT_KEYS, &&TARGET_PUSH_EXC_INFO, - &&TARGET_LOAD_ATTR_SPLIT_KEYS, + &&TARGET_BINARY_SUBSCR_LIST_INT, &&TARGET_POP_EXCEPT_AND_RERAISE, + &&TARGET_BINARY_SUBSCR_TUPLE_INT, + &&TARGET_BINARY_SUBSCR_DICT, + &&TARGET_JUMP_ABSOLUTE_QUICK, + &&TARGET_LOAD_ATTR_ADAPTIVE, + &&TARGET_LOAD_ATTR_SPLIT_KEYS, &&TARGET_LOAD_ATTR_WITH_HINT, &&TARGET_LOAD_ATTR_SLOT, &&TARGET_LOAD_ATTR_MODULE, &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_LOAD_GLOBAL_BUILTIN, - &&TARGET_LOAD_METHOD_ADAPTIVE, - &&TARGET_LOAD_METHOD_CACHED, - &&TARGET_LOAD_METHOD_CLASS, - &&TARGET_LOAD_METHOD_MODULE, - &&TARGET_STORE_ATTR_ADAPTIVE, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, @@ -57,7 +57,7 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_ADD, &&TARGET_INPLACE_SUBTRACT, &&TARGET_INPLACE_MULTIPLY, - &&TARGET_STORE_ATTR_SPLIT_KEYS, + &&TARGET_LOAD_METHOD_ADAPTIVE, &&TARGET_INPLACE_MODULO, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, @@ -79,15 +79,15 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_AND, &&TARGET_INPLACE_XOR, &&TARGET_INPLACE_OR, - &&TARGET_STORE_ATTR_SLOT, - &&TARGET_STORE_ATTR_WITH_HINT, + &&TARGET_LOAD_METHOD_CACHED, + &&TARGET_LOAD_METHOD_CLASS, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, - &&TARGET_LOAD_FAST__LOAD_FAST, - &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_LOAD_METHOD_MODULE, + &&TARGET_STORE_ATTR_ADAPTIVE, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, @@ -119,30 +119,30 @@ static void *opcode_targets[256] = { &&TARGET_IS_OP, &&TARGET_CONTAINS_OP, &&TARGET_RERAISE, - &&TARGET_LOAD_FAST__LOAD_CONST, + &&TARGET_STORE_ATTR_SPLIT_KEYS, &&TARGET_JUMP_IF_NOT_EXC_MATCH, - &&TARGET_LOAD_CONST__LOAD_FAST, - &&TARGET_STORE_FAST__STORE_FAST, + &&TARGET_STORE_ATTR_SLOT, + &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_LOAD_FAST, &&TARGET_STORE_FAST, &&TARGET_DELETE_FAST, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_LOAD_FAST__LOAD_FAST, + &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_GEN_START, &&TARGET_RAISE_VARARGS, &&TARGET_CALL_FUNCTION, &&TARGET_MAKE_FUNCTION, &&TARGET_BUILD_SLICE, - &&_unknown_opcode, + &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_MAKE_CELL, &&TARGET_LOAD_CLOSURE, &&TARGET_LOAD_DEREF, &&TARGET_STORE_DEREF, &&TARGET_DELETE_DEREF, - &&_unknown_opcode, + &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_CALL_FUNCTION_KW, &&TARGET_CALL_FUNCTION_EX, - &&_unknown_opcode, + &&TARGET_STORE_FAST__STORE_FAST, &&TARGET_EXTENDED_ARG, &&TARGET_LIST_APPEND, &&TARGET_SET_ADD, diff --git a/Python/specialize.c b/Python/specialize.c index 359bec5..b321368 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -121,7 +121,8 @@ _Py_GetSpecializationStats(void) { int err = 0; err += add_stat_dict(stats, LOAD_ATTR, "load_attr"); err += add_stat_dict(stats, LOAD_GLOBAL, "load_global"); - err += add_stat_dict(stats, LOAD_GLOBAL, "load_method"); + err += add_stat_dict(stats, LOAD_METHOD, "load_method"); + err += add_stat_dict(stats, BINARY_ADD, "binary_add"); err += add_stat_dict(stats, BINARY_SUBSCR, "binary_subscr"); err += add_stat_dict(stats, STORE_ATTR, "store_attr"); if (err < 0) { @@ -177,6 +178,7 @@ _Py_PrintSpecializationStats(void) print_stats(out, &_specialization_stats[LOAD_ATTR], "load_attr"); print_stats(out, &_specialization_stats[LOAD_GLOBAL], "load_global"); print_stats(out, &_specialization_stats[LOAD_METHOD], "load_method"); + print_stats(out, &_specialization_stats[BINARY_ADD], "binary_add"); print_stats(out, &_specialization_stats[BINARY_SUBSCR], "binary_subscr"); print_stats(out, &_specialization_stats[STORE_ATTR], "store_attr"); if (out != stderr) { @@ -226,6 +228,7 @@ static uint8_t adaptive_opcodes[256] = { [LOAD_ATTR] = LOAD_ATTR_ADAPTIVE, [LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE, [LOAD_METHOD] = LOAD_METHOD_ADAPTIVE, + [BINARY_ADD] = BINARY_ADD_ADAPTIVE, [BINARY_SUBSCR] = BINARY_SUBSCR_ADAPTIVE, [STORE_ATTR] = STORE_ATTR_ADAPTIVE, }; @@ -235,6 +238,7 @@ static uint8_t cache_requirements[256] = { [LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */ [LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */ + [BINARY_ADD] = 0, [BINARY_SUBSCR] = 0, [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ }; @@ -436,6 +440,12 @@ _Py_Quicken(PyCodeObject *code) { #define SPEC_FAIL_TUPLE_NON_INT_SUBSCRIPT 9 #define SPEC_FAIL_NOT_TUPLE_LIST_OR_DICT 10 +/* Binary add */ + +#define SPEC_FAIL_NON_FUNCTION_SCOPE 11 +#define SPEC_FAIL_DIFFERENT_TYPES 12 +#define SPEC_FAIL_OTHER_TYPE 13 + static int specialize_module_load_attr( @@ -898,7 +908,7 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_NOT_METHOD); goto fail; } - + assert(kind == METHOD); // If o.__dict__ changes, the method might be found in o.__dict__ // instead of old type lookup. So record o.__dict__'s keys. @@ -933,15 +943,15 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, } // Fall through. } // Else owner is maybe a builtin with no dict, or __slots__. Doesn't matter. - + /* `descr` is borrowed. Just check tp_version_tag before accessing in case * it's deleted. This is safe for methods (even inherited ones from super * classes!) as long as tp_version_tag is validated for two main reasons: - * + * * 1. The class will always hold a reference to the method so it will * usually not be GC-ed. Should it be deleted in Python, e.g. * `del obj.meth`, tp_version_tag will be invalidated, because of reason 2. - * + * * 2. The pre-existing type method cache (MCACHE) uses the same principles * of caching a borrowed descriptor. It does all the heavy lifting for us. * E.g. it invalidates on any MRO modification, on any type object @@ -968,6 +978,7 @@ fail: return 0; } + int _Py_Specialize_LoadGlobal( PyObject *globals, PyObject *builtins, @@ -1035,7 +1046,6 @@ success: return 0; } - int _Py_Specialize_BinarySubscr( PyObject *container, PyObject *sub, _Py_CODEUNIT *instr) @@ -1076,3 +1086,43 @@ success: return 0; } +int +_Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) +{ + PyTypeObject *left_type = Py_TYPE(left); + if (left_type != Py_TYPE(right)) { + SPECIALIZATION_FAIL(BINARY_ADD, SPEC_FAIL_DIFFERENT_TYPES); + goto fail; + } + if (left_type == &PyUnicode_Type) { + int next_opcode = _Py_OPCODE(instr[1]); + if (next_opcode == STORE_FAST) { + *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE_INPLACE_FAST, saturating_start()); + } + else { + *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE, saturating_start()); + } + goto success; + } + else if (left_type == &PyLong_Type) { + *instr = _Py_MAKECODEUNIT(BINARY_ADD_INT, saturating_start()); + goto success; + } + else if (left_type == &PyFloat_Type) { + *instr = _Py_MAKECODEUNIT(BINARY_ADD_FLOAT, saturating_start()); + goto success; + + } + else { + SPECIALIZATION_FAIL(BINARY_ADD, SPEC_FAIL_OTHER_TYPE); + } +fail: + STAT_INC(BINARY_ADD, specialization_failure); + assert(!PyErr_Occurred()); + *instr = _Py_MAKECODEUNIT(_Py_OPCODE(*instr), ADAPTIVE_CACHE_BACKOFF); + return 0; +success: + STAT_INC(BINARY_ADD, specialization_success); + assert(!PyErr_Occurred()); + return 0; +} |