From d54b8d8fbd76c05e9006175ab26d737c4b055dfb Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 7 Feb 2023 08:28:28 -0800 Subject: gh-98831: Modernize the FOR_ITER family of instructions (#101626) Co-authored-by: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> --- Python/bytecodes.c | 83 ++++++++++++++++++++++++++-------------------- Python/generated_cases.c.h | 66 ++++++++++++++++++++++-------------- Python/opcode_metadata.h | 30 ++++++++--------- 3 files changed, 104 insertions(+), 75 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 0fc0b3b..ec0439a 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2066,27 +2066,35 @@ dummy_func( PREDICT(LOAD_CONST); } - // stack effect: ( -- __0) - inst(FOR_ITER) { + // Most members of this family are "secretly" super-instructions. + // When the loop is exhausted, they jump, and the jump target is + // always END_FOR, which pops two values off the stack. + // This is optimized by skipping that instruction and combining + // its effect (popping 'iter' instead of pushing 'next'.) + + family(for_iter, INLINE_CACHE_ENTRIES_FOR_ITER) = { + FOR_ITER, + FOR_ITER_LIST, + FOR_ITER_TUPLE, + FOR_ITER_RANGE, + FOR_ITER_GEN, + }; + + inst(FOR_ITER, (unused/1, iter -- iter, next)) { #if ENABLE_SPECIALIZATION _PyForIterCache *cache = (_PyForIterCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { assert(cframe.use_tracing == 0); next_instr--; - _Py_Specialize_ForIter(TOP(), next_instr, oparg); + _Py_Specialize_ForIter(iter, next_instr, oparg); DISPATCH_SAME_OPARG(); } STAT_INC(FOR_ITER, deferred); DECREMENT_ADAPTIVE_COUNTER(cache->counter); #endif /* ENABLE_SPECIALIZATION */ - /* before: [iter]; after: [iter, iter()] *or* [] */ - PyObject *iter = TOP(); - PyObject *next = (*Py_TYPE(iter)->tp_iternext)(iter); - if (next != NULL) { - PUSH(next); - JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); - } - else { + /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ + next = (*Py_TYPE(iter)->tp_iternext)(iter); + if (next == NULL) { if (_PyErr_Occurred(tstate)) { if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { goto error; @@ -2098,63 +2106,66 @@ dummy_func( } /* iterator ended normally */ assert(_Py_OPCODE(next_instr[INLINE_CACHE_ENTRIES_FOR_ITER + oparg]) == END_FOR); - STACK_SHRINK(1); Py_DECREF(iter); - /* Skip END_FOR */ + STACK_SHRINK(1); + /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); + DISPATCH(); } + // Common case: no jump, leave it to the code generator } - // stack effect: ( -- __0) - inst(FOR_ITER_LIST) { + inst(FOR_ITER_LIST, (unused/1, iter -- iter, next)) { assert(cframe.use_tracing == 0); - _PyListIterObject *it = (_PyListIterObject *)TOP(); - DEOPT_IF(Py_TYPE(it) != &PyListIter_Type, FOR_ITER); + DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, FOR_ITER); + _PyListIterObject *it = (_PyListIterObject *)iter; STAT_INC(FOR_ITER, hit); PyListObject *seq = it->it_seq; if (seq) { if (it->it_index < PyList_GET_SIZE(seq)) { - PyObject *next = PyList_GET_ITEM(seq, it->it_index++); - PUSH(Py_NewRef(next)); - JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); + next = Py_NewRef(PyList_GET_ITEM(seq, it->it_index++)); goto end_for_iter_list; // End of this instruction } it->it_seq = NULL; Py_DECREF(seq); } + Py_DECREF(iter); STACK_SHRINK(1); - Py_DECREF(it); + /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); + DISPATCH(); end_for_iter_list: + // Common case: no jump, leave it to the code generator } - // stack effect: ( -- __0) - inst(FOR_ITER_TUPLE) { + inst(FOR_ITER_TUPLE, (unused/1, iter -- iter, next)) { assert(cframe.use_tracing == 0); - _PyTupleIterObject *it = (_PyTupleIterObject *)TOP(); + _PyTupleIterObject *it = (_PyTupleIterObject *)iter; DEOPT_IF(Py_TYPE(it) != &PyTupleIter_Type, FOR_ITER); STAT_INC(FOR_ITER, hit); PyTupleObject *seq = it->it_seq; if (seq) { if (it->it_index < PyTuple_GET_SIZE(seq)) { - PyObject *next = PyTuple_GET_ITEM(seq, it->it_index++); - PUSH(Py_NewRef(next)); - JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); + next = Py_NewRef(PyTuple_GET_ITEM(seq, it->it_index++)); goto end_for_iter_tuple; // End of this instruction } it->it_seq = NULL; Py_DECREF(seq); } + Py_DECREF(iter); STACK_SHRINK(1); - Py_DECREF(it); + /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); + DISPATCH(); end_for_iter_tuple: + // Common case: no jump, leave it to the code generator } - // stack effect: ( -- __0) - inst(FOR_ITER_RANGE) { + // This is slightly different, when the loop isn't terminated we + // jump over the immediately following STORE_FAST instruction. + inst(FOR_ITER_RANGE, (unused/1, iter -- iter, unused)) { assert(cframe.use_tracing == 0); - _PyRangeIterObject *r = (_PyRangeIterObject *)TOP(); + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, FOR_ITER); STAT_INC(FOR_ITER, hit); _Py_CODEUNIT next = next_instr[INLINE_CACHE_ENTRIES_FOR_ITER]; @@ -2162,6 +2173,7 @@ dummy_func( if (r->len <= 0) { STACK_SHRINK(1); Py_DECREF(r); + // Jump over END_FOR instruction. JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); } else { @@ -2174,11 +2186,13 @@ dummy_func( // The STORE_FAST is already done. JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + 1); } + DISPATCH(); } - inst(FOR_ITER_GEN) { + // This is *not* a super-instruction, unique in the family. + inst(FOR_ITER_GEN, (unused/1, iter -- iter, unused)) { assert(cframe.use_tracing == 0); - PyGenObject *gen = (PyGenObject *)TOP(); + PyGenObject *gen = (PyGenObject *)iter; DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER); DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER); STAT_INC(FOR_ITER, hit); @@ -3168,9 +3182,6 @@ family(call, INLINE_CACHE_ENTRIES_CALL) = { CALL_NO_KW_LIST_APPEND, CALL_NO_KW_METHOD_DESCRIPTOR_FAST, CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS, CALL_NO_KW_METHOD_DESCRIPTOR_O, CALL_NO_KW_STR_1, CALL_NO_KW_TUPLE_1, CALL_NO_KW_TYPE_1 }; -family(for_iter, INLINE_CACHE_ENTRIES_FOR_ITER) = { - FOR_ITER, FOR_ITER_LIST, - FOR_ITER_RANGE }; family(store_fast) = { STORE_FAST, STORE_FAST__LOAD_FAST, STORE_FAST__STORE_FAST }; family(unpack_sequence, INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE) = { UNPACK_SEQUENCE, UNPACK_SEQUENCE_LIST, diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index f0f314a..4e511f4 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2619,25 +2619,23 @@ TARGET(FOR_ITER) { PREDICTED(FOR_ITER); + static_assert(INLINE_CACHE_ENTRIES_FOR_ITER == 1, "incorrect cache size"); + PyObject *iter = PEEK(1); + PyObject *next; #if ENABLE_SPECIALIZATION _PyForIterCache *cache = (_PyForIterCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { assert(cframe.use_tracing == 0); next_instr--; - _Py_Specialize_ForIter(TOP(), next_instr, oparg); + _Py_Specialize_ForIter(iter, next_instr, oparg); DISPATCH_SAME_OPARG(); } STAT_INC(FOR_ITER, deferred); DECREMENT_ADAPTIVE_COUNTER(cache->counter); #endif /* ENABLE_SPECIALIZATION */ - /* before: [iter]; after: [iter, iter()] *or* [] */ - PyObject *iter = TOP(); - PyObject *next = (*Py_TYPE(iter)->tp_iternext)(iter); - if (next != NULL) { - PUSH(next); - JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); - } - else { + /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ + next = (*Py_TYPE(iter)->tp_iternext)(iter); + if (next == NULL) { if (_PyErr_Occurred(tstate)) { if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { goto error; @@ -2649,63 +2647,81 @@ } /* iterator ended normally */ assert(_Py_OPCODE(next_instr[INLINE_CACHE_ENTRIES_FOR_ITER + oparg]) == END_FOR); - STACK_SHRINK(1); Py_DECREF(iter); - /* Skip END_FOR */ + STACK_SHRINK(1); + /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); + DISPATCH(); } + // Common case: no jump, leave it to the code generator + STACK_GROW(1); + POKE(1, next); + JUMPBY(1); DISPATCH(); } TARGET(FOR_ITER_LIST) { + PyObject *iter = PEEK(1); + PyObject *next; assert(cframe.use_tracing == 0); - _PyListIterObject *it = (_PyListIterObject *)TOP(); - DEOPT_IF(Py_TYPE(it) != &PyListIter_Type, FOR_ITER); + DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, FOR_ITER); + _PyListIterObject *it = (_PyListIterObject *)iter; STAT_INC(FOR_ITER, hit); PyListObject *seq = it->it_seq; if (seq) { if (it->it_index < PyList_GET_SIZE(seq)) { - PyObject *next = PyList_GET_ITEM(seq, it->it_index++); - PUSH(Py_NewRef(next)); - JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); + next = Py_NewRef(PyList_GET_ITEM(seq, it->it_index++)); goto end_for_iter_list; // End of this instruction } it->it_seq = NULL; Py_DECREF(seq); } + Py_DECREF(iter); STACK_SHRINK(1); - Py_DECREF(it); + /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); + DISPATCH(); end_for_iter_list: + // Common case: no jump, leave it to the code generator + STACK_GROW(1); + POKE(1, next); + JUMPBY(1); DISPATCH(); } TARGET(FOR_ITER_TUPLE) { + PyObject *iter = PEEK(1); + PyObject *next; assert(cframe.use_tracing == 0); - _PyTupleIterObject *it = (_PyTupleIterObject *)TOP(); + _PyTupleIterObject *it = (_PyTupleIterObject *)iter; DEOPT_IF(Py_TYPE(it) != &PyTupleIter_Type, FOR_ITER); STAT_INC(FOR_ITER, hit); PyTupleObject *seq = it->it_seq; if (seq) { if (it->it_index < PyTuple_GET_SIZE(seq)) { - PyObject *next = PyTuple_GET_ITEM(seq, it->it_index++); - PUSH(Py_NewRef(next)); - JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER); + next = Py_NewRef(PyTuple_GET_ITEM(seq, it->it_index++)); goto end_for_iter_tuple; // End of this instruction } it->it_seq = NULL; Py_DECREF(seq); } + Py_DECREF(iter); STACK_SHRINK(1); - Py_DECREF(it); + /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); + DISPATCH(); end_for_iter_tuple: + // Common case: no jump, leave it to the code generator + STACK_GROW(1); + POKE(1, next); + JUMPBY(1); DISPATCH(); } TARGET(FOR_ITER_RANGE) { + PyObject *iter = PEEK(1); assert(cframe.use_tracing == 0); - _PyRangeIterObject *r = (_PyRangeIterObject *)TOP(); + _PyRangeIterObject *r = (_PyRangeIterObject *)iter; DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, FOR_ITER); STAT_INC(FOR_ITER, hit); _Py_CODEUNIT next = next_instr[INLINE_CACHE_ENTRIES_FOR_ITER]; @@ -2713,6 +2729,7 @@ if (r->len <= 0) { STACK_SHRINK(1); Py_DECREF(r); + // Jump over END_FOR instruction. JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1); } else { @@ -2729,8 +2746,9 @@ } TARGET(FOR_ITER_GEN) { + PyObject *iter = PEEK(1); assert(cframe.use_tracing == 0); - PyGenObject *gen = (PyGenObject *)TOP(); + PyGenObject *gen = (PyGenObject *)iter; DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER); DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER); STAT_INC(FOR_ITER, hit); diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h index 948d175..ed26ff0 100644 --- a/Python/opcode_metadata.h +++ b/Python/opcode_metadata.h @@ -261,15 +261,15 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { case GET_YIELD_FROM_ITER: return 1; case FOR_ITER: - return -1; + return 1; case FOR_ITER_LIST: - return -1; + return 1; case FOR_ITER_TUPLE: - return -1; + return 1; case FOR_ITER_RANGE: - return -1; + return 1; case FOR_ITER_GEN: - return -1; + return 1; case BEFORE_ASYNC_WITH: return 1; case BEFORE_WITH: @@ -607,15 +607,15 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { case GET_YIELD_FROM_ITER: return 1; case FOR_ITER: - return -1; + return 2; case FOR_ITER_LIST: - return -1; + return 2; case FOR_ITER_TUPLE: - return -1; + return 2; case FOR_ITER_RANGE: - return -1; + return 2; case FOR_ITER_GEN: - return -1; + return 2; case BEFORE_ASYNC_WITH: return 2; case BEFORE_WITH: @@ -829,11 +829,11 @@ struct opcode_metadata { [MATCH_KEYS] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, [GET_ITER] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, [GET_YIELD_FROM_ITER] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, - [FOR_ITER] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB }, - [FOR_ITER_LIST] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB }, - [FOR_ITER_TUPLE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB }, - [FOR_ITER_RANGE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB }, - [FOR_ITER_GEN] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB }, + [FOR_ITER] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC }, + [FOR_ITER_LIST] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC }, + [FOR_ITER_TUPLE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC }, + [FOR_ITER_RANGE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC }, + [FOR_ITER_GEN] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IBC }, [BEFORE_ASYNC_WITH] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, [BEFORE_WITH] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, [WITH_EXCEPT_START] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX }, -- cgit v0.12