diff options
author | Guido van Rossum <guido@python.org> | 2023-08-17 18:29:58 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-17 18:29:58 (GMT) |
commit | 61c7249759ce88465ea655d5c19d17d03ff3f74b (patch) | |
tree | d6dd9d45ecbfdb2436ca462517982b95491179af /Python | |
parent | 292a22bdc22f2aa70c96e9e53ca6d6b0c5f8d5bf (diff) | |
download | cpython-61c7249759ce88465ea655d5c19d17d03ff3f74b.zip cpython-61c7249759ce88465ea655d5c19d17d03ff3f74b.tar.gz cpython-61c7249759ce88465ea655d5c19d17d03ff3f74b.tar.bz2 |
gh-106581: Project through calls (#108067)
This finishes the work begun in gh-107760. When, while projecting a superblock, we encounter a call to a short, simple function, the superblock will now enter the function using `_PUSH_FRAME`, continue through it, and leave it using `_POP_FRAME`, and then continue through the original code. Multiple frame pushes and pops are even possible. It is also possible to stop appending to the superblock in the middle of a called function, when running out of space or encountering an unsupported bytecode.
Diffstat (limited to 'Python')
-rw-r--r-- | Python/abstract_interp_cases.c.h | 9 | ||||
-rw-r--r-- | Python/bytecodes.c | 56 | ||||
-rw-r--r-- | Python/ceval.c | 14 | ||||
-rw-r--r-- | Python/ceval_macros.h | 4 | ||||
-rw-r--r-- | Python/executor_cases.c.h | 46 | ||||
-rw-r--r-- | Python/generated_cases.c.h | 109 | ||||
-rw-r--r-- | Python/optimizer.c | 90 |
7 files changed, 264 insertions, 64 deletions
diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index eef0711..1b99b92 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -7,6 +7,10 @@ break; } + case RESUME: { + break; + } + case POP_TOP: { STACK_SHRINK(1); break; @@ -191,6 +195,11 @@ break; } + case _POP_FRAME: { + STACK_SHRINK(1); + break; + } + case GET_AITER: { PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); break; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6f17472..ae459ca 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -133,6 +133,7 @@ dummy_func( } inst(RESUME, (--)) { + #if TIER_ONE assert(frame == tstate->current_frame); /* Possibly combine this with eval breaker */ if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) { @@ -140,7 +141,9 @@ dummy_func( ERROR_IF(err, error); next_instr--; } - else if (oparg < 2) { + else + #endif + if (oparg < 2) { CHECK_EVAL_BREAKER(); } } @@ -757,21 +760,37 @@ dummy_func( return retval; } - inst(RETURN_VALUE, (retval --)) { - STACK_SHRINK(1); + // The stack effect here is ambiguous. + // We definitely pop the return value off the stack on entry. + // We also push it onto the stack on exit, but that's a + // different frame, and it's accounted for by _PUSH_FRAME. + op(_POP_FRAME, (retval --)) { assert(EMPTY()); _PyFrame_SetStackPointer(frame, stack_pointer); _Py_LeaveRecursiveCallPy(tstate); - assert(frame != &entry_frame); // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; + #if TIER_ONE + assert(frame != &entry_frame); + #endif frame = tstate->current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); + _PyEval_FrameClearAndPop(tstate, dying); frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); + #if TIER_ONE goto resume_frame; + #endif + #if TIER_TWO + stack_pointer = _PyFrame_GetStackPointer(frame); + ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; + #endif } + macro(RETURN_VALUE) = + SAVE_IP + // Tier 2 only; special-cased oparg + SAVE_CURRENT_IP + // Sets frame->prev_instr + _POP_FRAME; + inst(INSTRUMENTED_RETURN_VALUE, (retval --)) { int err = _Py_call_instrumentation_arg( tstate, PY_MONITORING_EVENT_PY_RETURN, @@ -785,27 +804,17 @@ dummy_func( // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; frame = tstate->current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); + _PyEval_FrameClearAndPop(tstate, dying); frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); goto resume_frame; } - inst(RETURN_CONST, (--)) { - PyObject *retval = GETITEM(FRAME_CO_CONSTS, oparg); - Py_INCREF(retval); - assert(EMPTY()); - _PyFrame_SetStackPointer(frame, stack_pointer); - _Py_LeaveRecursiveCallPy(tstate); - assert(frame != &entry_frame); - // GH-99729: We need to unlink the frame *before* clearing it: - _PyInterpreterFrame *dying = frame; - frame = tstate->current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); - frame->prev_instr += frame->return_offset; - _PyFrame_StackPush(frame, retval); - goto resume_frame; - } + macro(RETURN_CONST) = + LOAD_CONST + + SAVE_IP + // Tier 2 only; special-cased oparg + SAVE_CURRENT_IP + // Sets frame->prev_instr + _POP_FRAME; inst(INSTRUMENTED_RETURN_CONST, (--)) { PyObject *retval = GETITEM(FRAME_CO_CONSTS, oparg); @@ -821,7 +830,7 @@ dummy_func( // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; frame = tstate->current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); + _PyEval_FrameClearAndPop(tstate, dying); frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); goto resume_frame; @@ -3545,7 +3554,8 @@ dummy_func( goto error; } - func_obj->func_version = ((PyCodeObject *)codeobj)->co_version; + _PyFunction_SetVersion( + func_obj, ((PyCodeObject *)codeobj)->co_version); func = (PyObject *)func_obj; } diff --git a/Python/ceval.c b/Python/ceval.c index 1e2262c..329a1a1 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -222,8 +222,6 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func, static _PyInterpreterFrame * _PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func, PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs); -static void -_PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame); #ifdef HAVE_ERRNO_H #include <errno.h> @@ -603,10 +601,6 @@ int _Py_CheckRecursiveCallPy( } -static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) { - tstate->py_recursion_remaining++; -} - static const _Py_CODEUNIT _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS[] = { /* Put a NOP at the start, so that the IP points into * the code, rather than before it */ @@ -731,7 +725,7 @@ resume_frame: // When tracing executed uops, also trace bytecode char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = (*uop_debug - '0') >= 4; // TODO: Parse an int and all that + lltrace = (*uop_debug - '0') >= 5; // TODO: Parse an int and all that } } } @@ -918,7 +912,7 @@ exit_unwind: // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; frame = tstate->current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); + _PyEval_FrameClearAndPop(tstate, dying); frame->return_offset = 0; if (frame == &entry_frame) { /* Restore previous frame and exit */ @@ -1487,8 +1481,8 @@ clear_gen_frame(PyThreadState *tstate, _PyInterpreterFrame * frame) frame->previous = NULL; } -static void -_PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame * frame) +void +_PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame * frame) { if (frame->owner == FRAME_OWNED_BY_THREAD) { clear_thread_frame(tstate, frame); diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 08f19cd..635b8e5 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -369,3 +369,7 @@ static inline int _Py_EnterRecursivePy(PyThreadState *tstate) { return (tstate->py_recursion_remaining-- <= 0) && _Py_CheckRecursiveCallPy(tstate); } + +static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) { + tstate->py_recursion_remaining++; +} diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 9fbf026..89a5bbf 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7,6 +7,23 @@ break; } + case RESUME: { + #if TIER_ONE + assert(frame == tstate->current_frame); + /* Possibly combine this with eval breaker */ + if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) { + int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp); + if (err) goto error; + next_instr--; + } + else + #endif + if (oparg < 2) { + CHECK_EVAL_BREAKER(); + } + break; + } + case LOAD_FAST_CHECK: { PyObject *value; value = GETLOCAL(oparg); @@ -666,6 +683,32 @@ break; } + case _POP_FRAME: { + PyObject *retval; + retval = stack_pointer[-1]; + STACK_SHRINK(1); + assert(EMPTY()); + _PyFrame_SetStackPointer(frame, stack_pointer); + _Py_LeaveRecursiveCallPy(tstate); + // GH-99729: We need to unlink the frame *before* clearing it: + _PyInterpreterFrame *dying = frame; + #if TIER_ONE + assert(frame != &entry_frame); + #endif + frame = tstate->current_frame = dying->previous; + _PyEval_FrameClearAndPop(tstate, dying); + frame->prev_instr += frame->return_offset; + _PyFrame_StackPush(frame, retval); + #if TIER_ONE + goto resume_frame; + #endif + #if TIER_TWO + stack_pointer = _PyFrame_GetStackPointer(frame); + ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; + #endif + break; + } + case GET_AITER: { PyObject *obj; PyObject *iter; @@ -2607,7 +2650,8 @@ goto error; } - func_obj->func_version = ((PyCodeObject *)codeobj)->co_version; + _PyFunction_SetVersion( + func_obj, ((PyCodeObject *)codeobj)->co_version); func = (PyObject *)func_obj; stack_pointer[-1] = func; break; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 80af8a7..f6322df 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -8,6 +8,7 @@ } TARGET(RESUME) { + #if TIER_ONE assert(frame == tstate->current_frame); /* Possibly combine this with eval breaker */ if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) { @@ -15,7 +16,9 @@ if (err) goto error; next_instr--; } - else if (oparg < 2) { + else + #endif + if (oparg < 2) { CHECK_EVAL_BREAKER(); } DISPATCH(); @@ -970,20 +973,40 @@ TARGET(RETURN_VALUE) { PyObject *retval; + // SAVE_CURRENT_IP + { + #if TIER_ONE + frame->prev_instr = next_instr - 1; + #endif + #if TIER_TWO + // Relies on a preceding SAVE_IP + frame->prev_instr--; + #endif + } + // _POP_FRAME retval = stack_pointer[-1]; STACK_SHRINK(1); - assert(EMPTY()); - _PyFrame_SetStackPointer(frame, stack_pointer); - _Py_LeaveRecursiveCallPy(tstate); - assert(frame != &entry_frame); - // GH-99729: We need to unlink the frame *before* clearing it: - _PyInterpreterFrame *dying = frame; - frame = tstate->current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); - frame->prev_instr += frame->return_offset; - _PyFrame_StackPush(frame, retval); - goto resume_frame; - STACK_SHRINK(1); + { + assert(EMPTY()); + _PyFrame_SetStackPointer(frame, stack_pointer); + _Py_LeaveRecursiveCallPy(tstate); + // GH-99729: We need to unlink the frame *before* clearing it: + _PyInterpreterFrame *dying = frame; + #if TIER_ONE + assert(frame != &entry_frame); + #endif + frame = tstate->current_frame = dying->previous; + _PyEval_FrameClearAndPop(tstate, dying); + frame->prev_instr += frame->return_offset; + _PyFrame_StackPush(frame, retval); + #if TIER_ONE + goto resume_frame; + #endif + #if TIER_TWO + stack_pointer = _PyFrame_GetStackPointer(frame); + ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; + #endif + } } TARGET(INSTRUMENTED_RETURN_VALUE) { @@ -1001,7 +1024,7 @@ // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; frame = tstate->current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); + _PyEval_FrameClearAndPop(tstate, dying); frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); goto resume_frame; @@ -1009,19 +1032,46 @@ } TARGET(RETURN_CONST) { - PyObject *retval = GETITEM(FRAME_CO_CONSTS, oparg); - Py_INCREF(retval); - assert(EMPTY()); - _PyFrame_SetStackPointer(frame, stack_pointer); - _Py_LeaveRecursiveCallPy(tstate); - assert(frame != &entry_frame); - // GH-99729: We need to unlink the frame *before* clearing it: - _PyInterpreterFrame *dying = frame; - frame = tstate->current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); - frame->prev_instr += frame->return_offset; - _PyFrame_StackPush(frame, retval); - goto resume_frame; + PyObject *value; + PyObject *retval; + // LOAD_CONST + { + value = GETITEM(FRAME_CO_CONSTS, oparg); + Py_INCREF(value); + } + // SAVE_CURRENT_IP + { + #if TIER_ONE + frame->prev_instr = next_instr - 1; + #endif + #if TIER_TWO + // Relies on a preceding SAVE_IP + frame->prev_instr--; + #endif + } + // _POP_FRAME + retval = value; + { + assert(EMPTY()); + _PyFrame_SetStackPointer(frame, stack_pointer); + _Py_LeaveRecursiveCallPy(tstate); + // GH-99729: We need to unlink the frame *before* clearing it: + _PyInterpreterFrame *dying = frame; + #if TIER_ONE + assert(frame != &entry_frame); + #endif + frame = tstate->current_frame = dying->previous; + _PyEval_FrameClearAndPop(tstate, dying); + frame->prev_instr += frame->return_offset; + _PyFrame_StackPush(frame, retval); + #if TIER_ONE + goto resume_frame; + #endif + #if TIER_TWO + stack_pointer = _PyFrame_GetStackPointer(frame); + ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; + #endif + } } TARGET(INSTRUMENTED_RETURN_CONST) { @@ -1038,7 +1088,7 @@ // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; frame = tstate->current_frame = dying->previous; - _PyEvalFrameClearAndPop(tstate, dying); + _PyEval_FrameClearAndPop(tstate, dying); frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); goto resume_frame; @@ -4575,7 +4625,8 @@ goto error; } - func_obj->func_version = ((PyCodeObject *)codeobj)->co_version; + _PyFunction_SetVersion( + func_obj, ((PyCodeObject *)codeobj)->co_version); func = (PyObject *)func_obj; stack_pointer[-1] = func; DISPATCH(); diff --git a/Python/optimizer.c b/Python/optimizer.c index 559c4ae..5751840 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -373,6 +373,8 @@ static PyTypeObject UOpExecutor_Type = { .tp_as_sequence = &uop_as_sequence, }; +#define TRACE_STACK_SIZE 5 + static int translate_bytecode_to_trace( PyCodeObject *code, @@ -380,10 +382,16 @@ translate_bytecode_to_trace( _PyUOpInstruction *trace, int buffer_size) { + PyCodeObject *initial_code = code; _Py_CODEUNIT *initial_instr = instr; int trace_length = 0; int max_length = buffer_size; int reserved = 0; + struct { + PyCodeObject *code; + _Py_CODEUNIT *instr; + } trace_stack[TRACE_STACK_SIZE]; + int trace_stack_depth = 0; #ifdef Py_DEBUG char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); @@ -441,6 +449,24 @@ translate_bytecode_to_trace( // Reserve space for main+stub uops, plus 2 for SAVE_IP and EXIT_TRACE #define RESERVE(main, stub) RESERVE_RAW((main) + (stub) + 2, uop_name(opcode)) +// Trace stack operations (used by _PUSH_FRAME, _POP_FRAME) +#define TRACE_STACK_PUSH() \ + if (trace_stack_depth >= TRACE_STACK_SIZE) { \ + DPRINTF(2, "Trace stack overflow\n"); \ + ADD_TO_TRACE(SAVE_IP, 0, 0); \ + goto done; \ + } \ + trace_stack[trace_stack_depth].code = code; \ + trace_stack[trace_stack_depth].instr = instr; \ + trace_stack_depth++; +#define TRACE_STACK_POP() \ + if (trace_stack_depth <= 0) { \ + Py_FatalError("Trace stack underflow\n"); \ + } \ + trace_stack_depth--; \ + code = trace_stack[trace_stack_depth].code; \ + instr = trace_stack[trace_stack_depth].instr; + DPRINTF(4, "Optimizing %s (%s:%d) at byte offset %d\n", PyUnicode_AsUTF8(code->co_qualname), @@ -448,6 +474,7 @@ translate_bytecode_to_trace( code->co_firstlineno, 2 * INSTR_IP(initial_instr, code)); +top: // Jump here after _PUSH_FRAME for (;;) { RESERVE_RAW(2, "epilogue"); // Always need space for SAVE_IP and EXIT_TRACE ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code), 0); @@ -508,7 +535,7 @@ pop_jump_if_bool: case JUMP_BACKWARD: { - if (instr + 2 - oparg == initial_instr) { + if (instr + 2 - oparg == initial_instr && code == initial_code) { RESERVE(1, 0); ADD_TO_TRACE(JUMP_TO_TOP, 0, 0); } @@ -573,6 +600,14 @@ pop_jump_if_bool: // Reserve space for nuops (+ SAVE_IP + EXIT_TRACE) int nuops = expansion->nuops; RESERVE(nuops, 0); + if (expansion->uops[nuops-1].uop == _POP_FRAME) { + // Check for trace stack underflow now: + // We can't bail e.g. in the middle of + // LOAD_CONST + _POP_FRAME. + if (trace_stack_depth == 0) { + DPRINTF(2, "Trace stack underflow\n"); + goto done;} + } uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM for (int i = 0; i < nuops; i++) { oparg = orig_oparg; @@ -619,8 +654,57 @@ pop_jump_if_bool: Py_FatalError("garbled expansion"); } ADD_TO_TRACE(expansion->uops[i].uop, oparg, operand); + if (expansion->uops[i].uop == _POP_FRAME) { + TRACE_STACK_POP(); + DPRINTF(2, + "Returning to %s (%s:%d) at byte offset %d\n", + PyUnicode_AsUTF8(code->co_qualname), + PyUnicode_AsUTF8(code->co_filename), + code->co_firstlineno, + 2 * INSTR_IP(instr, code)); + goto top; + } if (expansion->uops[i].uop == _PUSH_FRAME) { assert(i + 1 == nuops); + int func_version_offset = + offsetof(_PyCallCache, func_version)/sizeof(_Py_CODEUNIT) + // Add one to account for the actual opcode/oparg pair: + + 1; + uint32_t func_version = read_u32(&instr[func_version_offset].cache); + PyFunctionObject *func = _PyFunction_LookupByVersion(func_version); + DPRINTF(3, "Function object: %p\n", func); + if (func != NULL) { + PyCodeObject *new_code = (PyCodeObject *)PyFunction_GET_CODE(func); + if (new_code == code) { + // Recursive call, bail (we could be here forever). + DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n", + PyUnicode_AsUTF8(new_code->co_qualname), + PyUnicode_AsUTF8(new_code->co_filename), + new_code->co_firstlineno); + ADD_TO_TRACE(SAVE_IP, 0, 0); + goto done; + } + if (new_code->co_version != func_version) { + // func.__code__ was updated. + // Perhaps it may happen again, so don't bother tracing. + // TODO: Reason about this -- is it better to bail or not? + DPRINTF(2, "Bailing because co_version != func_version\n"); + ADD_TO_TRACE(SAVE_IP, 0, 0); + goto done; + } + // Increment IP to the return address + instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1; + TRACE_STACK_PUSH(); + code = new_code; + instr = _PyCode_CODE(code); + DPRINTF(2, + "Continuing in %s (%s:%d) at byte offset %d\n", + PyUnicode_AsUTF8(code->co_qualname), + PyUnicode_AsUTF8(code->co_filename), + code->co_firstlineno, + 2 * INSTR_IP(instr, code)); + goto top; + } ADD_TO_TRACE(SAVE_IP, 0, 0); goto done; } @@ -639,6 +723,10 @@ pop_jump_if_bool: } // End for (;;) done: + while (trace_stack_depth > 0) { + TRACE_STACK_POP(); + } + assert(code == initial_code); // Skip short traces like SAVE_IP, LOAD_FAST, SAVE_IP, EXIT_TRACE if (trace_length > 3) { ADD_TO_TRACE(EXIT_TRACE, 0, 0); |