diff options
author | Guido van Rossum <guido@python.org> | 2023-08-17 18:29:58 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-17 18:29:58 (GMT) |
commit | 61c7249759ce88465ea655d5c19d17d03ff3f74b (patch) | |
tree | d6dd9d45ecbfdb2436ca462517982b95491179af /Python/optimizer.c | |
parent | 292a22bdc22f2aa70c96e9e53ca6d6b0c5f8d5bf (diff) | |
download | cpython-61c7249759ce88465ea655d5c19d17d03ff3f74b.zip cpython-61c7249759ce88465ea655d5c19d17d03ff3f74b.tar.gz cpython-61c7249759ce88465ea655d5c19d17d03ff3f74b.tar.bz2 |
gh-106581: Project through calls (#108067)
This finishes the work begun in gh-107760. When, while projecting a superblock, we encounter a call to a short, simple function, the superblock will now enter the function using `_PUSH_FRAME`, continue through it, and leave it using `_POP_FRAME`, and then continue through the original code. Multiple frame pushes and pops are even possible. It is also possible to stop appending to the superblock in the middle of a called function, when running out of space or encountering an unsupported bytecode.
Diffstat (limited to 'Python/optimizer.c')
-rw-r--r-- | Python/optimizer.c | 90 |
1 files changed, 89 insertions, 1 deletions
diff --git a/Python/optimizer.c b/Python/optimizer.c index 559c4ae..5751840 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -373,6 +373,8 @@ static PyTypeObject UOpExecutor_Type = { .tp_as_sequence = &uop_as_sequence, }; +#define TRACE_STACK_SIZE 5 + static int translate_bytecode_to_trace( PyCodeObject *code, @@ -380,10 +382,16 @@ translate_bytecode_to_trace( _PyUOpInstruction *trace, int buffer_size) { + PyCodeObject *initial_code = code; _Py_CODEUNIT *initial_instr = instr; int trace_length = 0; int max_length = buffer_size; int reserved = 0; + struct { + PyCodeObject *code; + _Py_CODEUNIT *instr; + } trace_stack[TRACE_STACK_SIZE]; + int trace_stack_depth = 0; #ifdef Py_DEBUG char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); @@ -441,6 +449,24 @@ translate_bytecode_to_trace( // Reserve space for main+stub uops, plus 2 for SAVE_IP and EXIT_TRACE #define RESERVE(main, stub) RESERVE_RAW((main) + (stub) + 2, uop_name(opcode)) +// Trace stack operations (used by _PUSH_FRAME, _POP_FRAME) +#define TRACE_STACK_PUSH() \ + if (trace_stack_depth >= TRACE_STACK_SIZE) { \ + DPRINTF(2, "Trace stack overflow\n"); \ + ADD_TO_TRACE(SAVE_IP, 0, 0); \ + goto done; \ + } \ + trace_stack[trace_stack_depth].code = code; \ + trace_stack[trace_stack_depth].instr = instr; \ + trace_stack_depth++; +#define TRACE_STACK_POP() \ + if (trace_stack_depth <= 0) { \ + Py_FatalError("Trace stack underflow\n"); \ + } \ + trace_stack_depth--; \ + code = trace_stack[trace_stack_depth].code; \ + instr = trace_stack[trace_stack_depth].instr; + DPRINTF(4, "Optimizing %s (%s:%d) at byte offset %d\n", PyUnicode_AsUTF8(code->co_qualname), @@ -448,6 +474,7 @@ translate_bytecode_to_trace( code->co_firstlineno, 2 * INSTR_IP(initial_instr, code)); +top: // Jump here after _PUSH_FRAME for (;;) { RESERVE_RAW(2, "epilogue"); // Always need space for SAVE_IP and EXIT_TRACE ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code), 0); @@ -508,7 +535,7 @@ pop_jump_if_bool: case JUMP_BACKWARD: { - if (instr + 2 - oparg == initial_instr) { + if (instr + 2 - oparg == initial_instr && code == initial_code) { RESERVE(1, 0); ADD_TO_TRACE(JUMP_TO_TOP, 0, 0); } @@ -573,6 +600,14 @@ pop_jump_if_bool: // Reserve space for nuops (+ SAVE_IP + EXIT_TRACE) int nuops = expansion->nuops; RESERVE(nuops, 0); + if (expansion->uops[nuops-1].uop == _POP_FRAME) { + // Check for trace stack underflow now: + // We can't bail e.g. in the middle of + // LOAD_CONST + _POP_FRAME. + if (trace_stack_depth == 0) { + DPRINTF(2, "Trace stack underflow\n"); + goto done;} + } uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM for (int i = 0; i < nuops; i++) { oparg = orig_oparg; @@ -619,8 +654,57 @@ pop_jump_if_bool: Py_FatalError("garbled expansion"); } ADD_TO_TRACE(expansion->uops[i].uop, oparg, operand); + if (expansion->uops[i].uop == _POP_FRAME) { + TRACE_STACK_POP(); + DPRINTF(2, + "Returning to %s (%s:%d) at byte offset %d\n", + PyUnicode_AsUTF8(code->co_qualname), + PyUnicode_AsUTF8(code->co_filename), + code->co_firstlineno, + 2 * INSTR_IP(instr, code)); + goto top; + } if (expansion->uops[i].uop == _PUSH_FRAME) { assert(i + 1 == nuops); + int func_version_offset = + offsetof(_PyCallCache, func_version)/sizeof(_Py_CODEUNIT) + // Add one to account for the actual opcode/oparg pair: + + 1; + uint32_t func_version = read_u32(&instr[func_version_offset].cache); + PyFunctionObject *func = _PyFunction_LookupByVersion(func_version); + DPRINTF(3, "Function object: %p\n", func); + if (func != NULL) { + PyCodeObject *new_code = (PyCodeObject *)PyFunction_GET_CODE(func); + if (new_code == code) { + // Recursive call, bail (we could be here forever). + DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n", + PyUnicode_AsUTF8(new_code->co_qualname), + PyUnicode_AsUTF8(new_code->co_filename), + new_code->co_firstlineno); + ADD_TO_TRACE(SAVE_IP, 0, 0); + goto done; + } + if (new_code->co_version != func_version) { + // func.__code__ was updated. + // Perhaps it may happen again, so don't bother tracing. + // TODO: Reason about this -- is it better to bail or not? + DPRINTF(2, "Bailing because co_version != func_version\n"); + ADD_TO_TRACE(SAVE_IP, 0, 0); + goto done; + } + // Increment IP to the return address + instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1; + TRACE_STACK_PUSH(); + code = new_code; + instr = _PyCode_CODE(code); + DPRINTF(2, + "Continuing in %s (%s:%d) at byte offset %d\n", + PyUnicode_AsUTF8(code->co_qualname), + PyUnicode_AsUTF8(code->co_filename), + code->co_firstlineno, + 2 * INSTR_IP(instr, code)); + goto top; + } ADD_TO_TRACE(SAVE_IP, 0, 0); goto done; } @@ -639,6 +723,10 @@ pop_jump_if_bool: } // End for (;;) done: + while (trace_stack_depth > 0) { + TRACE_STACK_POP(); + } + assert(code == initial_code); // Skip short traces like SAVE_IP, LOAD_FAST, SAVE_IP, EXIT_TRACE if (trace_length > 3) { ADD_TO_TRACE(EXIT_TRACE, 0, 0); |