diff options
author | Mark Shannon <mark@hotpy.org> | 2024-04-18 10:09:30 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-18 10:09:30 (GMT) |
commit | e32f6e9e4b202ce8759b201f7eafe4982c0eaa41 (patch) | |
tree | 3e6df2c609c618429c9601c09367564d9b580346 | |
parent | f70395786f6da2412d7406606feb64347fc71262 (diff) | |
download | cpython-e32f6e9e4b202ce8759b201f7eafe4982c0eaa41.zip cpython-e32f6e9e4b202ce8759b201f7eafe4982c0eaa41.tar.gz cpython-e32f6e9e4b202ce8759b201f7eafe4982c0eaa41.tar.bz2 |
GH-115419: Tidy up tier 2 optimizer. Merge peephole pass into main pass (GH-117997)
-rw-r--r-- | Python/optimizer_analysis.c | 163 | ||||
-rw-r--r-- | Python/optimizer_bytecodes.c | 83 | ||||
-rw-r--r-- | Python/optimizer_cases.c.h | 55 |
3 files changed, 169 insertions, 132 deletions
diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a21679f..90444e3 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -362,6 +362,30 @@ eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit) } } +/* _PUSH_FRAME/_POP_FRAME's operand can be 0, a PyFunctionObject *, or a + * PyCodeObject *. Retrieve the code object if possible. + */ +static PyCodeObject * +get_code(_PyUOpInstruction *op) +{ + assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME); + PyCodeObject *co = NULL; + uint64_t operand = op->operand; + if (operand == 0) { + return NULL; + } + if (operand & 1) { + co = (PyCodeObject *)(operand & ~1); + } + else { + PyFunctionObject *func = (PyFunctionObject *)operand; + assert(PyFunction_Check(func)); + co = (PyCodeObject *)func->func_code; + } + assert(PyCode_Check(co)); + return co; +} + /* 1 for success, 0 for not ready, cannot error at the moment. */ static int optimize_uops( @@ -376,6 +400,10 @@ optimize_uops( _Py_UOpsContext context; _Py_UOpsContext *ctx = &context; uint32_t opcode = UINT16_MAX; + int curr_space = 0; + int max_space = 0; + _PyUOpInstruction *first_valid_check_stack = NULL; + _PyUOpInstruction *corresponding_check_stack = NULL; if (_Py_uop_abstractcontext_init(ctx) < 0) { goto out_of_space; @@ -416,8 +444,7 @@ optimize_uops( ctx->frame->stack_pointer = stack_pointer; assert(STACK_LEVEL() >= 0); } - _Py_uop_abstractcontext_fini(ctx); - return trace_len; + Py_UNREACHABLE(); out_of_space: DPRINTF(3, "\n"); @@ -443,9 +470,17 @@ hit_bottom: _Py_uop_abstractcontext_fini(ctx); return 0; done: - /* Cannot optimize further, but there would be no benefit - * in retrying later */ + /* Either reached the end or cannot optimize further, but there + * would be no benefit in retrying later */ _Py_uop_abstractcontext_fini(ctx); + if (first_valid_check_stack != NULL) { + assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE); + assert(max_space > 0); + assert(max_space <= INT_MAX); + assert(max_space <= INT32_MAX); + first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND; + first_valid_check_stack->operand = max_space; + } return trace_len; } @@ -532,124 +567,6 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) Py_UNREACHABLE(); } -/* _PUSH_FRAME/_POP_FRAME's operand can be 0, a PyFunctionObject *, or a - * PyCodeObject *. Retrieve the code object if possible. - */ -static PyCodeObject * -get_co(_PyUOpInstruction *op) -{ - assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME); - PyCodeObject *co = NULL; - uint64_t operand = op->operand; - if (operand == 0) { - return NULL; - } - if (operand & 1) { - co = (PyCodeObject *)(operand & ~1); - } - else { - PyFunctionObject *func = (PyFunctionObject *)operand; - assert(PyFunction_Check(func)); - co = (PyCodeObject *)func->func_code; - } - assert(PyCode_Check(co)); - return co; -} - -static void -peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size) -{ - PyCodeObject *co = _PyFrame_GetCode(frame); - int curr_space = 0; - int max_space = 0; - _PyUOpInstruction *first_valid_check_stack = NULL; - _PyUOpInstruction *corresponding_check_stack = NULL; - for (int pc = 0; pc < buffer_size; pc++) { - int opcode = buffer[pc].opcode; - switch(opcode) { - case _LOAD_CONST: { - assert(co != NULL); - PyObject *val = PyTuple_GET_ITEM(co->co_consts, buffer[pc].oparg); - buffer[pc].opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; - buffer[pc].operand = (uintptr_t)val; - break; - } - case _CHECK_PEP_523: { - /* Setting the eval frame function invalidates - * all executors, so no need to check dynamically */ - if (_PyInterpreterState_GET()->eval_frame == NULL) { - buffer[pc].opcode = _NOP; - } - break; - } - case _CHECK_STACK_SPACE: { - assert(corresponding_check_stack == NULL); - corresponding_check_stack = &buffer[pc]; - break; - } - case _PUSH_FRAME: { - assert(corresponding_check_stack != NULL); - co = get_co(&buffer[pc]); - if (co == NULL) { - // should be about to _EXIT_TRACE anyway - goto finish; - } - int framesize = co->co_framesize; - assert(framesize > 0); - curr_space += framesize; - if (curr_space < 0 || curr_space > INT32_MAX) { - // won't fit in signed 32-bit int - goto finish; - } - max_space = curr_space > max_space ? curr_space : max_space; - if (first_valid_check_stack == NULL) { - first_valid_check_stack = corresponding_check_stack; - } - else { - // delete all but the first valid _CHECK_STACK_SPACE - corresponding_check_stack->opcode = _NOP; - } - corresponding_check_stack = NULL; - break; - } - case _POP_FRAME: { - assert(corresponding_check_stack == NULL); - assert(co != NULL); - int framesize = co->co_framesize; - assert(framesize > 0); - assert(framesize <= curr_space); - curr_space -= framesize; - co = get_co(&buffer[pc]); - if (co == NULL) { - // might be impossible, but bailing is still safe - goto finish; - } - break; - } - case _JUMP_TO_TOP: - case _EXIT_TRACE: - goto finish; -#ifdef Py_DEBUG - case _CHECK_STACK_SPACE_OPERAND: { - /* We should never see _CHECK_STACK_SPACE_OPERANDs. - * They are only created at the end of this pass. */ - Py_UNREACHABLE(); - } -#endif - } - } - Py_UNREACHABLE(); -finish: - if (first_valid_check_stack != NULL) { - assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE); - assert(max_space > 0); - assert(max_space <= INT_MAX); - assert(max_space <= INT32_MAX); - first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND; - first_valid_check_stack->operand = max_space; - } -} - // 0 - failure, no error raised, just fall back to Tier 1 // -1 - failure, and raise error // > 0 - length of optimized trace @@ -669,8 +586,6 @@ _Py_uop_analyze_and_optimize( return err; } - peephole_opt(frame, buffer, length); - length = optimize_uops( _PyFrame_GetCode(frame), buffer, length, curr_stacklen, dependencies); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index e38428a..f119b8e 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -38,12 +38,14 @@ optimize_to_bool( _Py_UopsSymbol **result_ptr); extern void -eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit) +eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit); + +extern PyCodeObject *get_code(_PyUOpInstruction *op); static int dummy_func(void) { - PyCodeObject *code; + PyCodeObject *co; int oparg; _Py_UopsSymbol *flag; _Py_UopsSymbol *left; @@ -54,10 +56,15 @@ dummy_func(void) { _Py_UopsSymbol *top; _Py_UopsSymbol *bottom; _Py_UOpsAbstractFrame *frame; + _Py_UOpsAbstractFrame *new_frame; _Py_UOpsContext *ctx; _PyUOpInstruction *this_instr; _PyBloomFilter *dependencies; int modified; + int curr_space; + int max_space; + _PyUOpInstruction *first_valid_check_stack; + _PyUOpInstruction *corresponding_check_stack; // BEGIN BYTECODES // @@ -393,9 +400,10 @@ dummy_func(void) { } op(_LOAD_CONST, (-- value)) { - // There should be no LOAD_CONST. It should be all - // replaced by peephole_opt. - Py_UNREACHABLE(); + PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); + int opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; + REPLACE_OP(this_instr, opcode, 0, (uintptr_t)val); + OUT_OF_SPACE_IF_NULL(value = sym_new_const(ctx, val)); } op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { @@ -590,6 +598,32 @@ dummy_func(void) { frame_pop(ctx); stack_pointer = ctx->frame->stack_pointer; res = retval; + + /* Stack space handling */ + assert(corresponding_check_stack == NULL); + assert(co != NULL); + int framesize = co->co_framesize; + assert(framesize > 0); + assert(framesize <= curr_space); + curr_space -= framesize; + + co = get_code(this_instr); + if (co == NULL) { + // might be impossible, but bailing is still safe + goto done; + } + } + + op(_CHECK_STACK_SPACE, ( --)) { + assert(corresponding_check_stack == NULL); + corresponding_check_stack = this_instr; + } + + op (_CHECK_STACK_SPACE_OPERAND, ( -- )) { + (void)framesize; + /* We should never see _CHECK_STACK_SPACE_OPERANDs. + * They are only created at the end of this pass. */ + Py_UNREACHABLE(); } op(_PUSH_FRAME, (new_frame: _Py_UOpsAbstractFrame * -- unused if (0))) { @@ -598,6 +632,29 @@ dummy_func(void) { ctx->frame = new_frame; ctx->curr_frame_depth++; stack_pointer = new_frame->stack_pointer; + co = get_code(this_instr); + if (co == NULL) { + // should be about to _EXIT_TRACE anyway + goto done; + } + + /* Stack space handling */ + int framesize = co->co_framesize; + assert(framesize > 0); + curr_space += framesize; + if (curr_space < 0 || curr_space > INT32_MAX) { + // won't fit in signed 32-bit int + goto done; + } + max_space = curr_space > max_space ? curr_space : max_space; + if (first_valid_check_stack == NULL) { + first_valid_check_stack = corresponding_check_stack; + } + else { + // delete all but the first valid _CHECK_STACK_SPACE + corresponding_check_stack->opcode = _NOP; + } + corresponding_check_stack = NULL; } op(_UNPACK_SEQUENCE, (seq -- values[oparg])) { @@ -662,6 +719,22 @@ dummy_func(void) { } } + op(_CHECK_PEP_523, (--)) { + /* Setting the eval frame function invalidates + * all executors, so no need to check dynamically */ + if (_PyInterpreterState_GET()->eval_frame == NULL) { + REPLACE_OP(this_instr, _NOP, 0 ,0); + } + } + + op(_JUMP_TO_TOP, (--)) { + goto done; + } + + op(_EXIT_TRACE, (--)) { + goto done; + } + // END BYTECODES // diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 209be37..50f335e 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -46,9 +46,10 @@ case _LOAD_CONST: { _Py_UopsSymbol *value; - // There should be no LOAD_CONST. It should be all - // replaced by peephole_opt. - Py_UNREACHABLE(); + PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); + int opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; + REPLACE_OP(this_instr, opcode, 0, (uintptr_t)val); + OUT_OF_SPACE_IF_NULL(value = sym_new_const(ctx, val)); stack_pointer[0] = value; stack_pointer += 1; break; @@ -597,6 +598,18 @@ frame_pop(ctx); stack_pointer = ctx->frame->stack_pointer; res = retval; + /* Stack space handling */ + assert(corresponding_check_stack == NULL); + assert(co != NULL); + int framesize = co->co_framesize; + assert(framesize > 0); + assert(framesize <= curr_space); + curr_space -= framesize; + co = get_code(this_instr); + if (co == NULL) { + // might be impossible, but bailing is still safe + goto done; + } stack_pointer[0] = res; stack_pointer += 1; break; @@ -1529,6 +1542,11 @@ } case _CHECK_PEP_523: { + /* Setting the eval frame function invalidates + * all executors, so no need to check dynamically */ + if (_PyInterpreterState_GET()->eval_frame == NULL) { + REPLACE_OP(this_instr, _NOP, 0 ,0); + } break; } @@ -1547,6 +1565,8 @@ } case _CHECK_STACK_SPACE: { + assert(corresponding_check_stack == NULL); + corresponding_check_stack = this_instr; break; } @@ -1610,6 +1630,28 @@ ctx->frame = new_frame; ctx->curr_frame_depth++; stack_pointer = new_frame->stack_pointer; + co = get_code(this_instr); + if (co == NULL) { + // should be about to _EXIT_TRACE anyway + goto done; + } + /* Stack space handling */ + int framesize = co->co_framesize; + assert(framesize > 0); + curr_space += framesize; + if (curr_space < 0 || curr_space > INT32_MAX) { + // won't fit in signed 32-bit int + goto done; + } + max_space = curr_space > max_space ? curr_space : max_space; + if (first_valid_check_stack == NULL) { + first_valid_check_stack = corresponding_check_stack; + } + else { + // delete all but the first valid _CHECK_STACK_SPACE + corresponding_check_stack->opcode = _NOP; + } + corresponding_check_stack = NULL; break; } @@ -1899,6 +1941,7 @@ } case _JUMP_TO_TOP: { + goto done; break; } @@ -1907,6 +1950,11 @@ } case _CHECK_STACK_SPACE_OPERAND: { + uint32_t framesize = (uint32_t)this_instr->operand; + (void)framesize; + /* We should never see _CHECK_STACK_SPACE_OPERANDs. + * They are only created at the end of this pass. */ + Py_UNREACHABLE(); break; } @@ -1915,6 +1963,7 @@ } case _EXIT_TRACE: { + goto done; break; } |