diff options
author | Mark Shannon <mark@hotpy.org> | 2024-01-11 18:20:42 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-11 18:20:42 (GMT) |
commit | 55824d01f866d1fa0f21996d897fba0e07d09ac8 (patch) | |
tree | aeaec720d5d2df05fdad8389a542debfdb4b5032 /Python | |
parent | 0d8fec79ca30e67870752c6ad4e299f591271e69 (diff) | |
download | cpython-55824d01f866d1fa0f21996d897fba0e07d09ac8.zip cpython-55824d01f866d1fa0f21996d897fba0e07d09ac8.tar.gz cpython-55824d01f866d1fa0f21996d897fba0e07d09ac8.tar.bz2 |
GH-113853: Guarantee forward progress in executors (GH-113854)
Diffstat (limited to 'Python')
-rw-r--r-- | Python/bytecodes.c | 18 | ||||
-rw-r--r-- | Python/generated_cases.c.h | 18 | ||||
-rw-r--r-- | Python/optimizer.c | 155 |
3 files changed, 118 insertions, 73 deletions
diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f53ddae..2011d96 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2325,12 +2325,18 @@ dummy_func( // Double-check that the opcode isn't instrumented or something: if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) { OPT_STAT_INC(attempts); - int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer); + _Py_CODEUNIT *start = this_instr; + /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ + while (oparg > 255) { + oparg >>= 8; + start--; + } + int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer); ERROR_IF(optimized < 0, error); if (optimized) { // Rewind and enter the executor: - assert(this_instr->op.code == ENTER_EXECUTOR); - next_instr = this_instr; + assert(start->op.code == ENTER_EXECUTOR); + next_instr = start; this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); } else { @@ -2370,10 +2376,12 @@ dummy_func( GOTO_TIER_TWO(); } else { - code->co_executors->executors[oparg & 255] = NULL; + /* ENTER_EXECUTOR will be the first code unit of the instruction */ + assert(oparg < 256); + code->co_executors->executors[oparg] = NULL; opcode = this_instr->op.code = executor->vm_data.opcode; this_instr->op.arg = executor->vm_data.oparg; - oparg = (oparg & (~255)) | executor->vm_data.oparg; + oparg = executor->vm_data.oparg; Py_DECREF(executor); next_instr = this_instr; DISPATCH_GOTO(); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e693e3e..1e995b6 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2384,10 +2384,12 @@ GOTO_TIER_TWO(); } else { - code->co_executors->executors[oparg & 255] = NULL; + /* ENTER_EXECUTOR will be the first code unit of the instruction */ + assert(oparg < 256); + code->co_executors->executors[oparg] = NULL; opcode = this_instr->op.code = executor->vm_data.opcode; this_instr->op.arg = executor->vm_data.oparg; - oparg = (oparg & (~255)) | executor->vm_data.oparg; + oparg = executor->vm_data.oparg; Py_DECREF(executor); next_instr = this_instr; DISPATCH_GOTO(); @@ -3289,12 +3291,18 @@ // Double-check that the opcode isn't instrumented or something: if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) { OPT_STAT_INC(attempts); - int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer); + _Py_CODEUNIT *start = this_instr; + /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ + while (oparg > 255) { + oparg >>= 8; + start--; + } + int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer); if (optimized < 0) goto error; if (optimized) { // Rewind and enter the executor: - assert(this_instr->op.code == ENTER_EXECUTOR); - next_instr = this_instr; + assert(start->op.code == ENTER_EXECUTOR); + next_instr = start; this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1); } else { diff --git a/Python/optimizer.c b/Python/optimizer.c index 28e12db..227d6be 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -162,24 +162,22 @@ PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer) } int -_PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNIT *dest, PyObject **stack_pointer) +_PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer) { - assert(src->op.code == JUMP_BACKWARD); PyCodeObject *code = (PyCodeObject *)frame->f_executable; assert(PyCode_Check(code)); PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!has_space_for_executor(code, src)) { + if (!has_space_for_executor(code, start)) { return 0; } _PyOptimizerObject *opt = interp->optimizer; _PyExecutorObject *executor = NULL; - /* Start optimizing at the destination to guarantee forward progress */ - int err = opt->optimize(opt, code, dest, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame))); + int err = opt->optimize(opt, code, start, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame))); if (err <= 0) { assert(executor == NULL); return err; } - int index = get_index_for_executor(code, src); + int index = get_index_for_executor(code, start); if (index < 0) { /* Out of memory. Don't raise and assume that the * error will show up elsewhere. @@ -190,7 +188,7 @@ _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNI Py_DECREF(executor); return 0; } - insert_executor(code, src, index, executor); + insert_executor(code, start, index, executor); Py_DECREF(executor); return 1; } @@ -316,38 +314,6 @@ BRANCH_TO_GUARD[4][2] = { #define CONFIDENCE_RANGE 1000 #define CONFIDENCE_CUTOFF 333 -/* Returns 1 on success, - * 0 if it failed to produce a worthwhile trace, - * and -1 on an error. - */ -static int -translate_bytecode_to_trace( - PyCodeObject *code, - _Py_CODEUNIT *instr, - _PyUOpInstruction *trace, - int buffer_size, - _PyBloomFilter *dependencies) -{ - PyCodeObject *initial_code = code; - _Py_BloomFilter_Add(dependencies, initial_code); - _Py_CODEUNIT *initial_instr = instr; - int trace_length = 0; - int max_length = buffer_size; - struct { - PyCodeObject *code; - _Py_CODEUNIT *instr; - } trace_stack[TRACE_STACK_SIZE]; - int trace_stack_depth = 0; - int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions - -#ifdef Py_DEBUG - char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); - int lltrace = 0; - if (python_lltrace != NULL && *python_lltrace >= '0') { - lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that - } -#endif - #ifdef Py_DEBUG #define DPRINTF(level, ...) \ if (lltrace >= (level)) { printf(__VA_ARGS__); } @@ -403,6 +369,39 @@ translate_bytecode_to_trace( code = trace_stack[trace_stack_depth].code; \ instr = trace_stack[trace_stack_depth].instr; +/* Returns 1 on success, + * 0 if it failed to produce a worthwhile trace, + * and -1 on an error. + */ +static int +translate_bytecode_to_trace( + PyCodeObject *code, + _Py_CODEUNIT *instr, + _PyUOpInstruction *trace, + int buffer_size, + _PyBloomFilter *dependencies) +{ + bool progress_needed = true; + PyCodeObject *initial_code = code; + _Py_BloomFilter_Add(dependencies, initial_code); + _Py_CODEUNIT *initial_instr = instr; + int trace_length = 0; + int max_length = buffer_size; + struct { + PyCodeObject *code; + _Py_CODEUNIT *instr; + } trace_stack[TRACE_STACK_SIZE]; + int trace_stack_depth = 0; + int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions + +#ifdef Py_DEBUG + char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); + int lltrace = 0; + if (python_lltrace != NULL && *python_lltrace >= '0') { + lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that + } +#endif + DPRINTF(4, "Optimizing %s (%s:%d) at byte offset %d\n", PyUnicode_AsUTF8(code->co_qualname), @@ -410,6 +409,7 @@ translate_bytecode_to_trace( code->co_firstlineno, 2 * INSTR_IP(initial_instr, code)); uint32_t target = 0; + top: // Jump here after _PUSH_FRAME or likely branches for (;;) { target = INSTR_IP(instr, code); @@ -421,6 +421,15 @@ top: // Jump here after _PUSH_FRAME or likely branches uint32_t oparg = instr->op.arg; uint32_t extended = 0; + if (opcode == ENTER_EXECUTOR) { + assert(oparg < 256); + _PyExecutorObject *executor = + (_PyExecutorObject *)code->co_executors->executors[oparg]; + opcode = executor->vm_data.opcode; + DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]); + oparg = executor->vm_data.oparg; + } + if (opcode == EXTENDED_ARG) { instr++; extended = 1; @@ -431,13 +440,23 @@ top: // Jump here after _PUSH_FRAME or likely branches goto done; } } - - if (opcode == ENTER_EXECUTOR) { - _PyExecutorObject *executor = - (_PyExecutorObject *)code->co_executors->executors[oparg&255]; - opcode = executor->vm_data.opcode; - DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]); - oparg = (oparg & 0xffffff00) | executor->vm_data.oparg; + assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG); + + /* Special case the first instruction, + * so that we can guarantee forward progress */ + if (progress_needed) { + progress_needed = false; + if (opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_NO_INTERRUPT) { + instr += 1 + _PyOpcode_Caches[opcode] - (int32_t)oparg; + initial_instr = instr; + continue; + } + else { + if (OPCODE_HAS_DEOPT(opcode)) { + opcode = _PyOpcode_Deopt[opcode]; + } + assert(!OPCODE_HAS_DEOPT(opcode)); + } } switch (opcode) { @@ -480,7 +499,9 @@ top: // Jump here after _PUSH_FRAME or likely branches case JUMP_BACKWARD: case JUMP_BACKWARD_NO_INTERRUPT: { - if (instr + 2 - oparg == initial_instr && code == initial_code) { + _Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[opcode] - (int)oparg; + if (target == initial_instr) { + /* We have looped round to the start */ RESERVE(1); ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0); } @@ -641,19 +662,7 @@ done: } assert(code == initial_code); // Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE - if (trace_length > 4) { - ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); - DPRINTF(1, - "Created a trace for %s (%s:%d) at byte offset %d -- length %d\n", - PyUnicode_AsUTF8(code->co_qualname), - PyUnicode_AsUTF8(code->co_filename), - code->co_firstlineno, - 2 * INSTR_IP(initial_instr, code), - trace_length); - OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist); - return 1; - } - else { + if (progress_needed || trace_length < 5) { OPT_STAT_INC(trace_too_short); DPRINTF(4, "No trace for %s (%s:%d) at byte offset %d\n", @@ -661,15 +670,25 @@ done: PyUnicode_AsUTF8(code->co_filename), code->co_firstlineno, 2 * INSTR_IP(initial_instr, code)); + return 0; } - return 0; + ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); + DPRINTF(1, + "Created a trace for %s (%s:%d) at byte offset %d -- length %d\n", + PyUnicode_AsUTF8(code->co_qualname), + PyUnicode_AsUTF8(code->co_filename), + code->co_firstlineno, + 2 * INSTR_IP(initial_instr, code), + trace_length); + OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist); + return 1; +} #undef RESERVE #undef RESERVE_RAW #undef INSTR_IP #undef ADD_TO_TRACE #undef DPRINTF -} #define UNSET_BIT(array, bit) (array[(bit)>>5] &= ~(1<<((bit)&31))) #define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31))) @@ -854,10 +873,20 @@ counter_optimize( int Py_UNUSED(curr_stackentries) ) { + int oparg = instr->op.arg; + while (instr->op.code == EXTENDED_ARG) { + instr++; + oparg = (oparg << 8) | instr->op.arg; + } + if (instr->op.code != JUMP_BACKWARD) { + /* Counter optimizer can only handle backward edges */ + return 0; + } + _Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[JUMP_BACKWARD] - oparg; _PyUOpInstruction buffer[3] = { { .opcode = _LOAD_CONST_INLINE_BORROW, .operand = (uintptr_t)self }, { .opcode = _INTERNAL_INCREMENT_OPT_COUNTER }, - { .opcode = _EXIT_TRACE, .target = (uint32_t)(instr - _PyCode_CODE(code)) } + { .opcode = _EXIT_TRACE, .target = (uint32_t)(target - _PyCode_CODE(code)) } }; _PyBloomFilter empty; _Py_BloomFilter_Init(&empty); |