diff options
author | Dennis Sweeney <36520290+sweeneyde@users.noreply.github.com> | 2021-12-03 11:29:12 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-12-03 11:29:12 (GMT) |
commit | 03768c4d139df46212a091ed931aad03bec18b57 (patch) | |
tree | 35ab5e03aacded970ee0b1d21077f18782176c08 | |
parent | 99fcf1505218464c489d419d4500f126b6d6dc28 (diff) | |
download | cpython-03768c4d139df46212a091ed931aad03bec18b57.zip cpython-03768c4d139df46212a091ed931aad03bec18b57.tar.gz cpython-03768c4d139df46212a091ed931aad03bec18b57.tar.bz2 |
bpo-45885: Specialize COMPARE_OP (GH-29734)
* Add COMPARE_OP_ADAPTIVE adaptive instruction.
* Add COMPARE_OP_FLOAT_JUMP, COMPARE_OP_INT_JUMP and COMPARE_OP_STR_JUMP specialized instructions.
* Introduce and use _PyUnicode_Equal
-rw-r--r-- | Include/cpython/unicodeobject.h | 3 | ||||
-rw-r--r-- | Include/internal/pycore_code.h | 2 | ||||
-rw-r--r-- | Include/opcode.h | 78 | ||||
-rw-r--r-- | Lib/opcode.py | 4 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2021-11-23-21-01-56.bpo-45885.3IxeCX.rst | 1 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 14 | ||||
-rw-r--r-- | Python/ceval.c | 122 | ||||
-rw-r--r-- | Python/opcode_targets.h | 46 | ||||
-rw-r--r-- | Python/specialize.c | 79 |
9 files changed, 289 insertions, 60 deletions
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index ab4aebf..e02137c 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -1016,6 +1016,9 @@ PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*); and where the hash values are equal (i.e. a very probable match) */ PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *); +/* Equality check. Returns -1 on failure. */ +PyAPI_FUNC(int) _PyUnicode_Equal(PyObject *, PyObject *); + PyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *); PyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *); diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index d4d1392..496d52f 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -42,6 +42,7 @@ typedef struct { uint16_t defaults_len; } _PyCallCache; + /* Add specialized versions of entries to this union. * * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8 @@ -273,6 +274,7 @@ int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT int _Py_Specialize_CallFunction(PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache, PyObject *builtins); void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); +void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); #define PRINT_SPECIALIZATION_STATS 0 #define PRINT_SPECIALIZATION_STATS_DETAILED 0 diff --git a/Include/opcode.h b/Include/opcode.h index 2c1a212..f22f7e9 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -121,43 +121,47 @@ extern "C" { #define BINARY_OP_MULTIPLY_FLOAT 18 #define BINARY_OP_SUBTRACT_INT 19 #define BINARY_OP_SUBTRACT_FLOAT 20 -#define BINARY_SUBSCR_ADAPTIVE 21 -#define BINARY_SUBSCR_GETITEM 22 -#define BINARY_SUBSCR_LIST_INT 23 -#define BINARY_SUBSCR_TUPLE_INT 24 -#define BINARY_SUBSCR_DICT 26 -#define STORE_SUBSCR_ADAPTIVE 27 -#define STORE_SUBSCR_LIST_INT 28 -#define STORE_SUBSCR_DICT 29 -#define CALL_FUNCTION_ADAPTIVE 34 -#define CALL_FUNCTION_BUILTIN_O 36 -#define CALL_FUNCTION_BUILTIN_FAST 38 -#define CALL_FUNCTION_LEN 39 -#define CALL_FUNCTION_ISINSTANCE 40 -#define CALL_FUNCTION_PY_SIMPLE 41 -#define JUMP_ABSOLUTE_QUICK 42 -#define LOAD_ATTR_ADAPTIVE 43 -#define LOAD_ATTR_INSTANCE_VALUE 44 -#define LOAD_ATTR_WITH_HINT 45 -#define LOAD_ATTR_SLOT 46 -#define LOAD_ATTR_MODULE 47 -#define LOAD_GLOBAL_ADAPTIVE 48 -#define LOAD_GLOBAL_MODULE 55 -#define LOAD_GLOBAL_BUILTIN 56 -#define LOAD_METHOD_ADAPTIVE 57 -#define LOAD_METHOD_CACHED 58 -#define LOAD_METHOD_CLASS 59 -#define LOAD_METHOD_MODULE 62 -#define LOAD_METHOD_NO_DICT 63 -#define STORE_ATTR_ADAPTIVE 64 -#define STORE_ATTR_INSTANCE_VALUE 65 -#define STORE_ATTR_SLOT 66 -#define STORE_ATTR_WITH_HINT 67 -#define LOAD_FAST__LOAD_FAST 75 -#define STORE_FAST__LOAD_FAST 76 -#define LOAD_FAST__LOAD_CONST 77 -#define LOAD_CONST__LOAD_FAST 78 -#define STORE_FAST__STORE_FAST 79 +#define COMPARE_OP_ADAPTIVE 21 +#define COMPARE_OP_FLOAT_JUMP 22 +#define COMPARE_OP_INT_JUMP 23 +#define COMPARE_OP_STR_JUMP 24 +#define BINARY_SUBSCR_ADAPTIVE 26 +#define BINARY_SUBSCR_GETITEM 27 +#define BINARY_SUBSCR_LIST_INT 28 +#define BINARY_SUBSCR_TUPLE_INT 29 +#define BINARY_SUBSCR_DICT 34 +#define STORE_SUBSCR_ADAPTIVE 36 +#define STORE_SUBSCR_LIST_INT 38 +#define STORE_SUBSCR_DICT 39 +#define CALL_FUNCTION_ADAPTIVE 40 +#define CALL_FUNCTION_BUILTIN_O 41 +#define CALL_FUNCTION_BUILTIN_FAST 42 +#define CALL_FUNCTION_LEN 43 +#define CALL_FUNCTION_ISINSTANCE 44 +#define CALL_FUNCTION_PY_SIMPLE 45 +#define JUMP_ABSOLUTE_QUICK 46 +#define LOAD_ATTR_ADAPTIVE 47 +#define LOAD_ATTR_INSTANCE_VALUE 48 +#define LOAD_ATTR_WITH_HINT 55 +#define LOAD_ATTR_SLOT 56 +#define LOAD_ATTR_MODULE 57 +#define LOAD_GLOBAL_ADAPTIVE 58 +#define LOAD_GLOBAL_MODULE 59 +#define LOAD_GLOBAL_BUILTIN 62 +#define LOAD_METHOD_ADAPTIVE 63 +#define LOAD_METHOD_CACHED 64 +#define LOAD_METHOD_CLASS 65 +#define LOAD_METHOD_MODULE 66 +#define LOAD_METHOD_NO_DICT 67 +#define STORE_ATTR_ADAPTIVE 75 +#define STORE_ATTR_INSTANCE_VALUE 76 +#define STORE_ATTR_SLOT 77 +#define STORE_ATTR_WITH_HINT 78 +#define LOAD_FAST__LOAD_FAST 79 +#define STORE_FAST__LOAD_FAST 80 +#define LOAD_FAST__LOAD_CONST 81 +#define LOAD_CONST__LOAD_FAST 87 +#define STORE_FAST__STORE_FAST 88 #define DO_TRACING 255 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { diff --git a/Lib/opcode.py b/Lib/opcode.py index 60805e9..e5889bc 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -234,6 +234,10 @@ _specialized_instructions = [ "BINARY_OP_MULTIPLY_FLOAT", "BINARY_OP_SUBTRACT_INT", "BINARY_OP_SUBTRACT_FLOAT", + "COMPARE_OP_ADAPTIVE", + "COMPARE_OP_FLOAT_JUMP", + "COMPARE_OP_INT_JUMP", + "COMPARE_OP_STR_JUMP", "BINARY_SUBSCR_ADAPTIVE", "BINARY_SUBSCR_GETITEM", "BINARY_SUBSCR_LIST_INT", diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-11-23-21-01-56.bpo-45885.3IxeCX.rst b/Misc/NEWS.d/next/Core and Builtins/2021-11-23-21-01-56.bpo-45885.3IxeCX.rst new file mode 100644 index 0000000..316daf9 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-11-23-21-01-56.bpo-45885.3IxeCX.rst @@ -0,0 +1 @@ +Specialized the ``COMPARE_OP`` opcode using the PEP 659 machinery.
\ No newline at end of file diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 61fc34d..532c48a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11168,6 +11168,20 @@ unicode_compare_eq(PyObject *str1, PyObject *str2) return (cmp == 0); } +int +_PyUnicode_Equal(PyObject *str1, PyObject *str2) +{ + assert(PyUnicode_CheckExact(str1)); + assert(PyUnicode_CheckExact(str2)); + if (str1 == str2) { + return 1; + } + if (PyUnicode_READY(str1) || PyUnicode_READY(str2)) { + return -1; + } + return unicode_compare_eq(str1, str2); +} + int PyUnicode_Compare(PyObject *left, PyObject *right) diff --git a/Python/ceval.c b/Python/ceval.c index 97c6844..05897c5 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3778,6 +3778,8 @@ check_eval_breaker: } TARGET(COMPARE_OP) { + PREDICTED(COMPARE_OP); + STAT_INC(COMPARE_OP, unquickened); assert(oparg <= Py_GE); PyObject *right = POP(); PyObject *left = TOP(); @@ -3792,6 +3794,125 @@ check_eval_breaker: DISPATCH(); } + TARGET(COMPARE_OP_ADAPTIVE) { + assert(cframe.use_tracing == 0); + SpecializedCacheEntry *cache = GET_CACHE(); + if (cache->adaptive.counter == 0) { + PyObject *right = TOP(); + PyObject *left = SECOND(); + next_instr--; + _Py_Specialize_CompareOp(left, right, next_instr, cache); + DISPATCH(); + } + else { + STAT_INC(COMPARE_OP, deferred); + cache->adaptive.counter--; + oparg = cache->adaptive.original_oparg; + STAT_DEC(COMPARE_OP, unquickened); + JUMP_TO_INSTRUCTION(COMPARE_OP); + } + } + + TARGET(COMPARE_OP_FLOAT_JUMP) { + assert(cframe.use_tracing == 0); + // Combined: COMPARE_OP (float ? float) + POP_JUMP_IF_(true/false) + SpecializedCacheEntry *caches = GET_CACHE(); + int when_to_jump_mask = caches[0].adaptive.index; + PyObject *right = TOP(); + PyObject *left = SECOND(); + DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP); + DEOPT_IF(!PyFloat_CheckExact(right), COMPARE_OP); + double dleft = PyFloat_AS_DOUBLE(left); + double dright = PyFloat_AS_DOUBLE(right); + int sign = (dleft > dright) - (dleft < dright); + DEOPT_IF(isnan(dleft), COMPARE_OP); + DEOPT_IF(isnan(dright), COMPARE_OP); + STAT_INC(COMPARE_OP, hit); + NEXTOPARG(); + STACK_SHRINK(2); + Py_DECREF(left); + Py_DECREF(right); + assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE); + int jump = (1 << (sign + 1)) & when_to_jump_mask; + if (!jump) { + next_instr++; + NOTRACE_DISPATCH(); + } + else { + JUMPTO(oparg); + CHECK_EVAL_BREAKER(); + NOTRACE_DISPATCH(); + } + } + + TARGET(COMPARE_OP_INT_JUMP) { + assert(cframe.use_tracing == 0); + // Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false) + SpecializedCacheEntry *caches = GET_CACHE(); + int when_to_jump_mask = caches[0].adaptive.index; + PyObject *right = TOP(); + PyObject *left = SECOND(); + DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP); + DEOPT_IF(!PyLong_CheckExact(right), COMPARE_OP); + DEOPT_IF((size_t)(Py_SIZE(left) + 1) > 2, COMPARE_OP); + DEOPT_IF((size_t)(Py_SIZE(right) + 1) > 2, COMPARE_OP); + STAT_INC(COMPARE_OP, hit); + assert(Py_ABS(Py_SIZE(left)) <= 1 && Py_ABS(Py_SIZE(right)) <= 1); + Py_ssize_t ileft = Py_SIZE(left) * ((PyLongObject *)left)->ob_digit[0]; + Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->ob_digit[0]; + int sign = (ileft > iright) - (ileft < iright); + NEXTOPARG(); + STACK_SHRINK(2); + Py_DECREF(left); + Py_DECREF(right); + assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE); + int jump = (1 << (sign + 1)) & when_to_jump_mask; + if (!jump) { + next_instr++; + NOTRACE_DISPATCH(); + } + else { + JUMPTO(oparg); + CHECK_EVAL_BREAKER(); + NOTRACE_DISPATCH(); + } + } + + TARGET(COMPARE_OP_STR_JUMP) { + assert(cframe.use_tracing == 0); + // Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false) + SpecializedCacheEntry *caches = GET_CACHE(); + int invert = caches[0].adaptive.index; + PyObject *right = TOP(); + PyObject *left = SECOND(); + DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP); + DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_OP); + STAT_INC(COMPARE_OP, hit); + int res = _PyUnicode_Equal(left, right); + if (res < 0) { + goto error; + } + assert(caches[0].adaptive.original_oparg == Py_EQ || + caches[0].adaptive.original_oparg == Py_NE); + NEXTOPARG(); + assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE); + STACK_SHRINK(2); + Py_DECREF(left); + Py_DECREF(right); + assert(res == 0 || res == 1); + assert(invert == 0 || invert == 1); + int jump = res ^ invert; + if (!jump) { + next_instr++; + NOTRACE_DISPATCH(); + } + else { + JUMPTO(oparg); + CHECK_EVAL_BREAKER(); + NOTRACE_DISPATCH(); + } + } + TARGET(IS_OP) { PyObject *right = POP(); PyObject *left = TOP(); @@ -5083,6 +5204,7 @@ MISS_WITH_CACHE(LOAD_GLOBAL) MISS_WITH_CACHE(LOAD_METHOD) MISS_WITH_CACHE(CALL_FUNCTION) MISS_WITH_CACHE(BINARY_OP) +MISS_WITH_CACHE(COMPARE_OP) MISS_WITH_CACHE(BINARY_SUBSCR) MISS_WITH_OPARG_COUNTER(STORE_SUBSCR) diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index c9d430d..872a688 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -20,23 +20,27 @@ static void *opcode_targets[256] = { &&TARGET_BINARY_OP_MULTIPLY_FLOAT, &&TARGET_BINARY_OP_SUBTRACT_INT, &&TARGET_BINARY_OP_SUBTRACT_FLOAT, + &&TARGET_COMPARE_OP_ADAPTIVE, + &&TARGET_COMPARE_OP_FLOAT_JUMP, + &&TARGET_COMPARE_OP_INT_JUMP, + &&TARGET_COMPARE_OP_STR_JUMP, + &&TARGET_BINARY_SUBSCR, &&TARGET_BINARY_SUBSCR_ADAPTIVE, &&TARGET_BINARY_SUBSCR_GETITEM, &&TARGET_BINARY_SUBSCR_LIST_INT, &&TARGET_BINARY_SUBSCR_TUPLE_INT, - &&TARGET_BINARY_SUBSCR, - &&TARGET_BINARY_SUBSCR_DICT, - &&TARGET_STORE_SUBSCR_ADAPTIVE, - &&TARGET_STORE_SUBSCR_LIST_INT, - &&TARGET_STORE_SUBSCR_DICT, &&TARGET_GET_LEN, &&TARGET_MATCH_MAPPING, &&TARGET_MATCH_SEQUENCE, &&TARGET_MATCH_KEYS, - &&TARGET_CALL_FUNCTION_ADAPTIVE, + &&TARGET_BINARY_SUBSCR_DICT, &&TARGET_PUSH_EXC_INFO, - &&TARGET_CALL_FUNCTION_BUILTIN_O, + &&TARGET_STORE_SUBSCR_ADAPTIVE, &&TARGET_POP_EXCEPT_AND_RERAISE, + &&TARGET_STORE_SUBSCR_LIST_INT, + &&TARGET_STORE_SUBSCR_DICT, + &&TARGET_CALL_FUNCTION_ADAPTIVE, + &&TARGET_CALL_FUNCTION_BUILTIN_O, &&TARGET_CALL_FUNCTION_BUILTIN_FAST, &&TARGET_CALL_FUNCTION_LEN, &&TARGET_CALL_FUNCTION_ISINSTANCE, @@ -44,29 +48,25 @@ static void *opcode_targets[256] = { &&TARGET_JUMP_ABSOLUTE_QUICK, &&TARGET_LOAD_ATTR_ADAPTIVE, &&TARGET_LOAD_ATTR_INSTANCE_VALUE, - &&TARGET_LOAD_ATTR_WITH_HINT, - &&TARGET_LOAD_ATTR_SLOT, - &&TARGET_LOAD_ATTR_MODULE, - &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, &&TARGET_BEFORE_ASYNC_WITH, &&TARGET_BEFORE_WITH, &&TARGET_END_ASYNC_FOR, + &&TARGET_LOAD_ATTR_WITH_HINT, + &&TARGET_LOAD_ATTR_SLOT, + &&TARGET_LOAD_ATTR_MODULE, + &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_LOAD_GLOBAL_MODULE, + &&TARGET_STORE_SUBSCR, + &&TARGET_DELETE_SUBSCR, &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_LOAD_METHOD_ADAPTIVE, &&TARGET_LOAD_METHOD_CACHED, &&TARGET_LOAD_METHOD_CLASS, - &&TARGET_STORE_SUBSCR, - &&TARGET_DELETE_SUBSCR, &&TARGET_LOAD_METHOD_MODULE, &&TARGET_LOAD_METHOD_NO_DICT, - &&TARGET_STORE_ATTR_ADAPTIVE, - &&TARGET_STORE_ATTR_INSTANCE_VALUE, - &&TARGET_STORE_ATTR_SLOT, - &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_GET_ITER, &&TARGET_GET_YIELD_FROM_ITER, &&TARGET_PRINT_EXPR, @@ -74,20 +74,20 @@ static void *opcode_targets[256] = { &&TARGET_YIELD_FROM, &&TARGET_GET_AWAITABLE, &&TARGET_LOAD_ASSERTION_ERROR, + &&TARGET_STORE_ATTR_ADAPTIVE, + &&TARGET_STORE_ATTR_INSTANCE_VALUE, + &&TARGET_STORE_ATTR_SLOT, + &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_LOAD_FAST__LOAD_CONST, - &&TARGET_LOAD_CONST__LOAD_FAST, - &&TARGET_STORE_FAST__STORE_FAST, - &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_STORE_FAST__STORE_FAST, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, diff --git a/Python/specialize.c b/Python/specialize.c index f5f1213..b384675 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -129,6 +129,7 @@ _Py_GetSpecializationStats(void) { err += add_stat_dict(stats, STORE_ATTR, "store_attr"); err += add_stat_dict(stats, CALL_FUNCTION, "call_function"); err += add_stat_dict(stats, BINARY_OP, "binary_op"); + err += add_stat_dict(stats, COMPARE_OP, "compare_op"); if (err < 0) { Py_DECREF(stats); return NULL; @@ -187,6 +188,7 @@ _Py_PrintSpecializationStats(void) print_stats(out, &_specialization_stats[STORE_ATTR], "store_attr"); print_stats(out, &_specialization_stats[CALL_FUNCTION], "call_function"); print_stats(out, &_specialization_stats[BINARY_OP], "binary_op"); + print_stats(out, &_specialization_stats[COMPARE_OP], "compare_op"); if (out != stderr) { fclose(out); } @@ -239,6 +241,7 @@ static uint8_t adaptive_opcodes[256] = { [CALL_FUNCTION] = CALL_FUNCTION_ADAPTIVE, [STORE_ATTR] = STORE_ATTR_ADAPTIVE, [BINARY_OP] = BINARY_OP_ADAPTIVE, + [COMPARE_OP] = COMPARE_OP_ADAPTIVE, }; /* The number of cache entries required for a "family" of instructions. */ @@ -251,6 +254,7 @@ static uint8_t cache_requirements[256] = { [CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ [BINARY_OP] = 1, // _PyAdaptiveEntry + [COMPARE_OP] = 1, /* _PyAdaptiveEntry */ }; /* Return the oparg for the cache_offset and instruction index. @@ -487,6 +491,10 @@ initial_counter_value(void) { #define SPEC_FAIL_BAD_CALL_FLAGS 17 #define SPEC_FAIL_CLASS 18 +/* COMPARE_OP */ +#define SPEC_FAIL_STRING_COMPARE 13 +#define SPEC_FAIL_NOT_FOLLOWED_BY_COND_JUMP 14 +#define SPEC_FAIL_BIG_INT 15 static int specialize_module_load_attr( @@ -1536,3 +1544,74 @@ success: STAT_INC(BINARY_OP, specialization_success); adaptive->counter = initial_counter_value(); } + +static int compare_masks[] = { + // 1-bit: jump if less than + // 2-bit: jump if equal + // 4-bit: jump if greater + [Py_LT] = 1 | 0 | 0, + [Py_LE] = 1 | 2 | 0, + [Py_EQ] = 0 | 2 | 0, + [Py_NE] = 1 | 0 | 4, + [Py_GT] = 0 | 0 | 4, + [Py_GE] = 0 | 2 | 4, +}; + +void +_Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, + _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) +{ + _PyAdaptiveEntry *adaptive = &cache->adaptive; + int op = adaptive->original_oparg; + int next_opcode = _Py_OPCODE(instr[1]); + if (next_opcode != POP_JUMP_IF_FALSE && next_opcode != POP_JUMP_IF_TRUE) { + // Can't ever combine, so don't don't bother being adaptive. + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_NOT_FOLLOWED_BY_COND_JUMP); + *instr = _Py_MAKECODEUNIT(COMPARE_OP, adaptive->original_oparg); + goto failure; + } + assert(op <= Py_GE); + int when_to_jump_mask = compare_masks[op]; + if (next_opcode == POP_JUMP_IF_FALSE) { + when_to_jump_mask = (1 | 2 | 4) & ~when_to_jump_mask; + } + if (Py_TYPE(lhs) != Py_TYPE(rhs)) { + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_DIFFERENT_TYPES); + goto failure; + } + if (PyFloat_CheckExact(lhs)) { + *instr = _Py_MAKECODEUNIT(COMPARE_OP_FLOAT_JUMP, _Py_OPARG(*instr)); + adaptive->index = when_to_jump_mask; + goto success; + } + if (PyLong_CheckExact(lhs)) { + if (Py_ABS(Py_SIZE(lhs)) <= 1 && Py_ABS(Py_SIZE(rhs)) <= 1) { + *instr = _Py_MAKECODEUNIT(COMPARE_OP_INT_JUMP, _Py_OPARG(*instr)); + adaptive->index = when_to_jump_mask; + goto success; + } + else { + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_BIG_INT); + goto failure; + } + } + if (PyUnicode_CheckExact(lhs)) { + if (op != Py_EQ && op != Py_NE) { + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_STRING_COMPARE); + goto failure; + } + else { + *instr = _Py_MAKECODEUNIT(COMPARE_OP_STR_JUMP, _Py_OPARG(*instr)); + adaptive->index = (when_to_jump_mask & 2) == 0; + goto success; + } + } + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_OTHER); +failure: + STAT_INC(COMPARE_OP, specialization_failure); + cache_backoff(adaptive); + return; +success: + STAT_INC(COMPARE_OP, specialization_success); + adaptive->counter = initial_counter_value(); +} |