summaryrefslogtreecommitdiffstats
path: root/Python
diff options
context:
space:
mode:
authorMark Shannon <mark@hotpy.org>2024-01-11 18:20:42 (GMT)
committerGitHub <noreply@github.com>2024-01-11 18:20:42 (GMT)
commit55824d01f866d1fa0f21996d897fba0e07d09ac8 (patch)
treeaeaec720d5d2df05fdad8389a542debfdb4b5032 /Python
parent0d8fec79ca30e67870752c6ad4e299f591271e69 (diff)
downloadcpython-55824d01f866d1fa0f21996d897fba0e07d09ac8.zip
cpython-55824d01f866d1fa0f21996d897fba0e07d09ac8.tar.gz
cpython-55824d01f866d1fa0f21996d897fba0e07d09ac8.tar.bz2
GH-113853: Guarantee forward progress in executors (GH-113854)
Diffstat (limited to 'Python')
-rw-r--r--Python/bytecodes.c18
-rw-r--r--Python/generated_cases.c.h18
-rw-r--r--Python/optimizer.c155
3 files changed, 118 insertions, 73 deletions
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index f53ddae..2011d96 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -2325,12 +2325,18 @@ dummy_func(
// Double-check that the opcode isn't instrumented or something:
if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
OPT_STAT_INC(attempts);
- int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer);
+ _Py_CODEUNIT *start = this_instr;
+ /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
+ while (oparg > 255) {
+ oparg >>= 8;
+ start--;
+ }
+ int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer);
ERROR_IF(optimized < 0, error);
if (optimized) {
// Rewind and enter the executor:
- assert(this_instr->op.code == ENTER_EXECUTOR);
- next_instr = this_instr;
+ assert(start->op.code == ENTER_EXECUTOR);
+ next_instr = start;
this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
}
else {
@@ -2370,10 +2376,12 @@ dummy_func(
GOTO_TIER_TWO();
}
else {
- code->co_executors->executors[oparg & 255] = NULL;
+ /* ENTER_EXECUTOR will be the first code unit of the instruction */
+ assert(oparg < 256);
+ code->co_executors->executors[oparg] = NULL;
opcode = this_instr->op.code = executor->vm_data.opcode;
this_instr->op.arg = executor->vm_data.oparg;
- oparg = (oparg & (~255)) | executor->vm_data.oparg;
+ oparg = executor->vm_data.oparg;
Py_DECREF(executor);
next_instr = this_instr;
DISPATCH_GOTO();
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index e693e3e..1e995b6 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -2384,10 +2384,12 @@
GOTO_TIER_TWO();
}
else {
- code->co_executors->executors[oparg & 255] = NULL;
+ /* ENTER_EXECUTOR will be the first code unit of the instruction */
+ assert(oparg < 256);
+ code->co_executors->executors[oparg] = NULL;
opcode = this_instr->op.code = executor->vm_data.opcode;
this_instr->op.arg = executor->vm_data.oparg;
- oparg = (oparg & (~255)) | executor->vm_data.oparg;
+ oparg = executor->vm_data.oparg;
Py_DECREF(executor);
next_instr = this_instr;
DISPATCH_GOTO();
@@ -3289,12 +3291,18 @@
// Double-check that the opcode isn't instrumented or something:
if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
OPT_STAT_INC(attempts);
- int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer);
+ _Py_CODEUNIT *start = this_instr;
+ /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
+ while (oparg > 255) {
+ oparg >>= 8;
+ start--;
+ }
+ int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer);
if (optimized < 0) goto error;
if (optimized) {
// Rewind and enter the executor:
- assert(this_instr->op.code == ENTER_EXECUTOR);
- next_instr = this_instr;
+ assert(start->op.code == ENTER_EXECUTOR);
+ next_instr = start;
this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
}
else {
diff --git a/Python/optimizer.c b/Python/optimizer.c
index 28e12db..227d6be 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -162,24 +162,22 @@ PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
}
int
-_PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNIT *dest, PyObject **stack_pointer)
+_PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer)
{
- assert(src->op.code == JUMP_BACKWARD);
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
assert(PyCode_Check(code));
PyInterpreterState *interp = _PyInterpreterState_GET();
- if (!has_space_for_executor(code, src)) {
+ if (!has_space_for_executor(code, start)) {
return 0;
}
_PyOptimizerObject *opt = interp->optimizer;
_PyExecutorObject *executor = NULL;
- /* Start optimizing at the destination to guarantee forward progress */
- int err = opt->optimize(opt, code, dest, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
+ int err = opt->optimize(opt, code, start, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
if (err <= 0) {
assert(executor == NULL);
return err;
}
- int index = get_index_for_executor(code, src);
+ int index = get_index_for_executor(code, start);
if (index < 0) {
/* Out of memory. Don't raise and assume that the
* error will show up elsewhere.
@@ -190,7 +188,7 @@ _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNI
Py_DECREF(executor);
return 0;
}
- insert_executor(code, src, index, executor);
+ insert_executor(code, start, index, executor);
Py_DECREF(executor);
return 1;
}
@@ -316,38 +314,6 @@ BRANCH_TO_GUARD[4][2] = {
#define CONFIDENCE_RANGE 1000
#define CONFIDENCE_CUTOFF 333
-/* Returns 1 on success,
- * 0 if it failed to produce a worthwhile trace,
- * and -1 on an error.
- */
-static int
-translate_bytecode_to_trace(
- PyCodeObject *code,
- _Py_CODEUNIT *instr,
- _PyUOpInstruction *trace,
- int buffer_size,
- _PyBloomFilter *dependencies)
-{
- PyCodeObject *initial_code = code;
- _Py_BloomFilter_Add(dependencies, initial_code);
- _Py_CODEUNIT *initial_instr = instr;
- int trace_length = 0;
- int max_length = buffer_size;
- struct {
- PyCodeObject *code;
- _Py_CODEUNIT *instr;
- } trace_stack[TRACE_STACK_SIZE];
- int trace_stack_depth = 0;
- int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions
-
-#ifdef Py_DEBUG
- char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
- int lltrace = 0;
- if (python_lltrace != NULL && *python_lltrace >= '0') {
- lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
- }
-#endif
-
#ifdef Py_DEBUG
#define DPRINTF(level, ...) \
if (lltrace >= (level)) { printf(__VA_ARGS__); }
@@ -403,6 +369,39 @@ translate_bytecode_to_trace(
code = trace_stack[trace_stack_depth].code; \
instr = trace_stack[trace_stack_depth].instr;
+/* Returns 1 on success,
+ * 0 if it failed to produce a worthwhile trace,
+ * and -1 on an error.
+ */
+static int
+translate_bytecode_to_trace(
+ PyCodeObject *code,
+ _Py_CODEUNIT *instr,
+ _PyUOpInstruction *trace,
+ int buffer_size,
+ _PyBloomFilter *dependencies)
+{
+ bool progress_needed = true;
+ PyCodeObject *initial_code = code;
+ _Py_BloomFilter_Add(dependencies, initial_code);
+ _Py_CODEUNIT *initial_instr = instr;
+ int trace_length = 0;
+ int max_length = buffer_size;
+ struct {
+ PyCodeObject *code;
+ _Py_CODEUNIT *instr;
+ } trace_stack[TRACE_STACK_SIZE];
+ int trace_stack_depth = 0;
+ int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions
+
+#ifdef Py_DEBUG
+ char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
+ int lltrace = 0;
+ if (python_lltrace != NULL && *python_lltrace >= '0') {
+ lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
+ }
+#endif
+
DPRINTF(4,
"Optimizing %s (%s:%d) at byte offset %d\n",
PyUnicode_AsUTF8(code->co_qualname),
@@ -410,6 +409,7 @@ translate_bytecode_to_trace(
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code));
uint32_t target = 0;
+
top: // Jump here after _PUSH_FRAME or likely branches
for (;;) {
target = INSTR_IP(instr, code);
@@ -421,6 +421,15 @@ top: // Jump here after _PUSH_FRAME or likely branches
uint32_t oparg = instr->op.arg;
uint32_t extended = 0;
+ if (opcode == ENTER_EXECUTOR) {
+ assert(oparg < 256);
+ _PyExecutorObject *executor =
+ (_PyExecutorObject *)code->co_executors->executors[oparg];
+ opcode = executor->vm_data.opcode;
+ DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
+ oparg = executor->vm_data.oparg;
+ }
+
if (opcode == EXTENDED_ARG) {
instr++;
extended = 1;
@@ -431,13 +440,23 @@ top: // Jump here after _PUSH_FRAME or likely branches
goto done;
}
}
-
- if (opcode == ENTER_EXECUTOR) {
- _PyExecutorObject *executor =
- (_PyExecutorObject *)code->co_executors->executors[oparg&255];
- opcode = executor->vm_data.opcode;
- DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
- oparg = (oparg & 0xffffff00) | executor->vm_data.oparg;
+ assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
+
+ /* Special case the first instruction,
+ * so that we can guarantee forward progress */
+ if (progress_needed) {
+ progress_needed = false;
+ if (opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_NO_INTERRUPT) {
+ instr += 1 + _PyOpcode_Caches[opcode] - (int32_t)oparg;
+ initial_instr = instr;
+ continue;
+ }
+ else {
+ if (OPCODE_HAS_DEOPT(opcode)) {
+ opcode = _PyOpcode_Deopt[opcode];
+ }
+ assert(!OPCODE_HAS_DEOPT(opcode));
+ }
}
switch (opcode) {
@@ -480,7 +499,9 @@ top: // Jump here after _PUSH_FRAME or likely branches
case JUMP_BACKWARD:
case JUMP_BACKWARD_NO_INTERRUPT:
{
- if (instr + 2 - oparg == initial_instr && code == initial_code) {
+ _Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[opcode] - (int)oparg;
+ if (target == initial_instr) {
+ /* We have looped round to the start */
RESERVE(1);
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
}
@@ -641,19 +662,7 @@ done:
}
assert(code == initial_code);
// Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE
- if (trace_length > 4) {
- ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
- DPRINTF(1,
- "Created a trace for %s (%s:%d) at byte offset %d -- length %d\n",
- PyUnicode_AsUTF8(code->co_qualname),
- PyUnicode_AsUTF8(code->co_filename),
- code->co_firstlineno,
- 2 * INSTR_IP(initial_instr, code),
- trace_length);
- OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
- return 1;
- }
- else {
+ if (progress_needed || trace_length < 5) {
OPT_STAT_INC(trace_too_short);
DPRINTF(4,
"No trace for %s (%s:%d) at byte offset %d\n",
@@ -661,15 +670,25 @@ done:
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code));
+ return 0;
}
- return 0;
+ ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
+ DPRINTF(1,
+ "Created a trace for %s (%s:%d) at byte offset %d -- length %d\n",
+ PyUnicode_AsUTF8(code->co_qualname),
+ PyUnicode_AsUTF8(code->co_filename),
+ code->co_firstlineno,
+ 2 * INSTR_IP(initial_instr, code),
+ trace_length);
+ OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
+ return 1;
+}
#undef RESERVE
#undef RESERVE_RAW
#undef INSTR_IP
#undef ADD_TO_TRACE
#undef DPRINTF
-}
#define UNSET_BIT(array, bit) (array[(bit)>>5] &= ~(1<<((bit)&31)))
#define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31)))
@@ -854,10 +873,20 @@ counter_optimize(
int Py_UNUSED(curr_stackentries)
)
{
+ int oparg = instr->op.arg;
+ while (instr->op.code == EXTENDED_ARG) {
+ instr++;
+ oparg = (oparg << 8) | instr->op.arg;
+ }
+ if (instr->op.code != JUMP_BACKWARD) {
+ /* Counter optimizer can only handle backward edges */
+ return 0;
+ }
+ _Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[JUMP_BACKWARD] - oparg;
_PyUOpInstruction buffer[3] = {
{ .opcode = _LOAD_CONST_INLINE_BORROW, .operand = (uintptr_t)self },
{ .opcode = _INTERNAL_INCREMENT_OPT_COUNTER },
- { .opcode = _EXIT_TRACE, .target = (uint32_t)(instr - _PyCode_CODE(code)) }
+ { .opcode = _EXIT_TRACE, .target = (uint32_t)(target - _PyCode_CODE(code)) }
};
_PyBloomFilter empty;
_Py_BloomFilter_Init(&empty);