diff options
Diffstat (limited to 'Python')
-rw-r--r-- | Python/bytecodes.c | 60 | ||||
-rw-r--r-- | Python/ceval.c | 4 | ||||
-rw-r--r-- | Python/ceval_macros.h | 1 | ||||
-rw-r--r-- | Python/executor_cases.c.h | 61 | ||||
-rw-r--r-- | Python/generated_cases.c.h | 57 | ||||
-rw-r--r-- | Python/optimizer.c | 19 | ||||
-rw-r--r-- | Python/optimizer_cases.c.h | 13 |
7 files changed, 181 insertions, 34 deletions
diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 4855049..fe3d613 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1109,6 +1109,10 @@ dummy_func( _PyFrame_StackPush(frame, retval); /* We don't know which of these is relevant here, so keep them equal */ assert(INLINE_CACHE_ENTRIES_SEND == INLINE_CACHE_ENTRIES_FOR_ITER); + assert(_PyOpcode_Deopt[frame->instr_ptr->op.code] == SEND || + _PyOpcode_Deopt[frame->instr_ptr->op.code] == FOR_ITER || + _PyOpcode_Deopt[frame->instr_ptr->op.code] == INTERPRETER_EXIT || + _PyOpcode_Deopt[frame->instr_ptr->op.code] == ENTER_EXECUTOR); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); goto resume_frame; } @@ -2759,24 +2763,26 @@ dummy_func( _ITER_JUMP_RANGE + _ITER_NEXT_RANGE; - inst(FOR_ITER_GEN, (unused/1, iter -- iter, unused)) { - DEOPT_IF(tstate->interp->eval_frame); + op(_FOR_ITER_GEN_FRAME, (iter -- iter, gen_frame: _PyInterpreterFrame*)) { PyGenObject *gen = (PyGenObject *)iter; DEOPT_IF(Py_TYPE(gen) != &PyGen_Type); DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING); STAT_INC(FOR_ITER, hit); - _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; + gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; _PyFrame_StackPush(gen_frame, Py_None); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); - assert(next_instr - this_instr + oparg <= UINT16_MAX); - frame->return_offset = (uint16_t)(next_instr - this_instr + oparg); - DISPATCH_INLINED(gen_frame); + // oparg is the return offset from the next instruction. + frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg); } + macro(FOR_ITER_GEN) = + unused/1 + + _CHECK_PEP_523 + + _FOR_ITER_GEN_FRAME + + _PUSH_FRAME; + inst(BEFORE_ASYNC_WITH, (mgr -- exit, res)) { PyObject *enter = _PyObject_LookupSpecial(mgr, &_Py_ID(__aenter__)); if (enter == NULL) { @@ -3166,10 +3172,7 @@ dummy_func( } } - // The 'unused' output effect represents the return value - // (which will be pushed when the frame returns). - // It is needed so CALL_PY_EXACT_ARGS matches its family. - op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- unused if (0))) { + op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- )) { // Write it out explicitly because it's subtly different. // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); @@ -4189,6 +4192,38 @@ dummy_func( GOTO_TIER_TWO(executor); } + tier2 op(_DYNAMIC_EXIT, (--)) { + tstate->previous_executor = (PyObject *)current_executor; + _PyExitData *exit = (_PyExitData *)¤t_executor->exits[oparg]; + _Py_CODEUNIT *target = frame->instr_ptr; + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + PyCodeObject *code = (PyCodeObject *)frame->f_executable; + executor = code->co_executors->executors[target->op.arg]; + Py_INCREF(executor); + } + else { + if (!backoff_counter_triggers(exit->temperature)) { + exit->temperature = advance_backoff_counter(exit->temperature); + GOTO_TIER_ONE(target); + } + int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); + if (optimized <= 0) { + exit->temperature = restart_backoff_counter(exit->temperature); + if (optimized < 0) { + Py_DECREF(current_executor); + tstate->previous_executor = Py_None; + GOTO_UNWIND(); + } + GOTO_TIER_ONE(target); + } + else { + exit->temperature = initial_temperature_backoff_counter(); + } + } + GOTO_TIER_TWO(executor); + } + tier2 op(_START_EXECUTOR, (executor/4 --)) { Py_DECREF(tstate->previous_executor); tstate->previous_executor = NULL; @@ -4222,6 +4257,7 @@ dummy_func( GOTO_UNWIND(); } + // END BYTECODES // } diff --git a/Python/ceval.c b/Python/ceval.c index 2f217c5..d130c73 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1072,9 +1072,13 @@ jump_to_jump_target: next_uop = current_executor->trace + target; goto tier2_dispatch; +exit_to_tier1_dynamic: + next_instr = frame->instr_ptr; + goto goto_to_tier1; exit_to_tier1: assert(next_uop[-1].format == UOP_FORMAT_TARGET); next_instr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame)); +goto_to_tier1: #ifdef Py_DEBUG if (lltrace >= 2) { printf("DEOPT: [UOp "); diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 871d174..1a8554a 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -442,3 +442,4 @@ do { \ #define GOTO_UNWIND() goto error_tier_two #define EXIT_TO_TRACE() goto exit_to_trace #define EXIT_TO_TIER1() goto exit_to_tier1 +#define EXIT_TO_TIER1_DYNAMIC() goto exit_to_tier1_dynamic; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 1eb3da9..280cca1 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2769,7 +2769,32 @@ break; } - /* _FOR_ITER_GEN is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */ + case _FOR_ITER_GEN_FRAME: { + PyObject *iter; + _PyInterpreterFrame *gen_frame; + oparg = CURRENT_OPARG(); + iter = stack_pointer[-1]; + PyGenObject *gen = (PyGenObject *)iter; + if (Py_TYPE(gen) != &PyGen_Type) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + if (gen->gi_frame_state >= FRAME_EXECUTING) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + STAT_INC(FOR_ITER, hit); + gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; + _PyFrame_StackPush(gen_frame, Py_None); + gen->gi_frame_state = FRAME_EXECUTING; + gen->gi_exc_state.previous_item = tstate->exc_info; + tstate->exc_info = &gen->gi_exc_state; + // oparg is the return offset from the next instruction. + frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg); + stack_pointer[0] = (PyObject *)gen_frame; + stack_pointer += 1; + break; + } /* _BEFORE_ASYNC_WITH is not a viable micro-op for tier 2 because it has both popping and not-popping errors */ @@ -4187,6 +4212,40 @@ break; } + case _DYNAMIC_EXIT: { + oparg = CURRENT_OPARG(); + tstate->previous_executor = (PyObject *)current_executor; + _PyExitData *exit = (_PyExitData *)¤t_executor->exits[oparg]; + _Py_CODEUNIT *target = frame->instr_ptr; + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + PyCodeObject *code = (PyCodeObject *)frame->f_executable; + executor = code->co_executors->executors[target->op.arg]; + Py_INCREF(executor); + } + else { + if (!backoff_counter_triggers(exit->temperature)) { + exit->temperature = advance_backoff_counter(exit->temperature); + GOTO_TIER_ONE(target); + } + int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); + if (optimized <= 0) { + exit->temperature = restart_backoff_counter(exit->temperature); + if (optimized < 0) { + Py_DECREF(current_executor); + tstate->previous_executor = Py_None; + GOTO_UNWIND(); + } + GOTO_TIER_ONE(target); + } + else { + exit->temperature = initial_temperature_backoff_counter(); + } + } + GOTO_TIER_TWO(executor); + break; + } + case _START_EXECUTOR: { PyObject *executor = (PyObject *)CURRENT_OPERAND(); Py_DECREF(tstate->previous_executor); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 0c58f3f..c27505f 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2631,28 +2631,49 @@ } TARGET(FOR_ITER_GEN) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + frame->instr_ptr = next_instr; next_instr += 2; INSTRUCTION_STATS(FOR_ITER_GEN); static_assert(INLINE_CACHE_ENTRIES_FOR_ITER == 1, "incorrect cache size"); PyObject *iter; + _PyInterpreterFrame *gen_frame; + _PyInterpreterFrame *new_frame; /* Skip 1 cache entry */ + // _CHECK_PEP_523 + { + DEOPT_IF(tstate->interp->eval_frame, FOR_ITER); + } + // _FOR_ITER_GEN_FRAME iter = stack_pointer[-1]; - DEOPT_IF(tstate->interp->eval_frame, FOR_ITER); - PyGenObject *gen = (PyGenObject *)iter; - DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER); - DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER); - STAT_INC(FOR_ITER, hit); - _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; - _PyFrame_StackPush(gen_frame, Py_None); - gen->gi_frame_state = FRAME_EXECUTING; - gen->gi_exc_state.previous_item = tstate->exc_info; - tstate->exc_info = &gen->gi_exc_state; - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); - assert(next_instr - this_instr + oparg <= UINT16_MAX); - frame->return_offset = (uint16_t)(next_instr - this_instr + oparg); - DISPATCH_INLINED(gen_frame); + { + PyGenObject *gen = (PyGenObject *)iter; + DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER); + DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER); + STAT_INC(FOR_ITER, hit); + gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; + _PyFrame_StackPush(gen_frame, Py_None); + gen->gi_frame_state = FRAME_EXECUTING; + gen->gi_exc_state.previous_item = tstate->exc_info; + tstate->exc_info = &gen->gi_exc_state; + // oparg is the return offset from the next instruction. + frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg); + } + // _PUSH_FRAME + new_frame = gen_frame; + { + // Write it out explicitly because it's subtly different. + // Eventually this should be the only occurrence of this code. + assert(tstate->interp->eval_frame == NULL); + _PyFrame_SetStackPointer(frame, stack_pointer); + new_frame->previous = frame; + CALL_STAT_INC(inlined_py_calls); + frame = tstate->current_frame = new_frame; + tstate->py_recursion_remaining--; + LOAD_SP(); + LOAD_IP(0); + LLTRACE_RESUME_FRAME(); + } + DISPATCH(); } TARGET(FOR_ITER_LIST) { @@ -6011,6 +6032,10 @@ _PyFrame_StackPush(frame, retval); /* We don't know which of these is relevant here, so keep them equal */ assert(INLINE_CACHE_ENTRIES_SEND == INLINE_CACHE_ENTRIES_FOR_ITER); + assert(_PyOpcode_Deopt[frame->instr_ptr->op.code] == SEND || + _PyOpcode_Deopt[frame->instr_ptr->op.code] == FOR_ITER || + _PyOpcode_Deopt[frame->instr_ptr->op.code] == INTERPRETER_EXIT || + _PyOpcode_Deopt[frame->instr_ptr->op.code] == ENTER_EXECUTOR); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); goto resume_frame; } diff --git a/Python/optimizer.c b/Python/optimizer.c index e5c70f7..02c9b39 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -567,8 +567,6 @@ translate_bytecode_to_trace( top: // Jump here after _PUSH_FRAME or likely branches for (;;) { target = INSTR_IP(instr, code); - RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP"); - ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target); // Need space for _DEOPT max_length--; @@ -597,6 +595,8 @@ top: // Jump here after _PUSH_FRAME or likely branches } } assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG); + RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP"); + ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target); /* Special case the first instruction, * so that we can guarantee forward progress */ @@ -814,6 +814,12 @@ top: // Jump here after _PUSH_FRAME or likely branches ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); goto done; } + if (opcode == FOR_ITER_GEN) { + DPRINTF(2, "Bailing due to dynamic target\n"); + ADD_TO_TRACE(uop, oparg, 0, target); + ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0); + goto done; + } // Increment IP to the return address instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1; TRACE_STACK_PUSH(); @@ -847,7 +853,7 @@ top: // Jump here after _PUSH_FRAME or likely branches } DPRINTF(2, "Bail, new_code == NULL\n"); ADD_TO_TRACE(uop, oparg, 0, target); - ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); + ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0); goto done; } @@ -917,7 +923,7 @@ count_exits(_PyUOpInstruction *buffer, int length) int exit_count = 0; for (int i = 0; i < length; i++) { int opcode = buffer[i].opcode; - if (opcode == _SIDE_EXIT) { + if (opcode == _SIDE_EXIT || opcode == _DYNAMIC_EXIT) { exit_count++; } } @@ -1114,6 +1120,11 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil dest->format = UOP_FORMAT_EXIT; next_exit--; } + if (opcode == _DYNAMIC_EXIT) { + executor->exits[next_exit].target = 0; + dest->oparg = next_exit; + next_exit--; + } } assert(next_exit == -1); assert(dest == executor->trace); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 4f0941a..b196568 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1439,7 +1439,14 @@ break; } - /* _FOR_ITER_GEN is not a viable micro-op for tier 2 */ + case _FOR_ITER_GEN_FRAME: { + _PyInterpreterFrame *gen_frame; + gen_frame = sym_new_not_null(ctx); + if (gen_frame == NULL) goto out_of_space; + stack_pointer[0] = (_Py_UopsSymbol *)gen_frame; + stack_pointer += 1; + break; + } /* _BEFORE_ASYNC_WITH is not a viable micro-op for tier 2 */ @@ -2109,6 +2116,10 @@ break; } + case _DYNAMIC_EXIT: { + break; + } + case _START_EXECUTOR: { break; } |