summaryrefslogtreecommitdiffstats
path: root/Python/optimizer.c
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2023-08-17 18:29:58 (GMT)
committerGitHub <noreply@github.com>2023-08-17 18:29:58 (GMT)
commit61c7249759ce88465ea655d5c19d17d03ff3f74b (patch)
treed6dd9d45ecbfdb2436ca462517982b95491179af /Python/optimizer.c
parent292a22bdc22f2aa70c96e9e53ca6d6b0c5f8d5bf (diff)
downloadcpython-61c7249759ce88465ea655d5c19d17d03ff3f74b.zip
cpython-61c7249759ce88465ea655d5c19d17d03ff3f74b.tar.gz
cpython-61c7249759ce88465ea655d5c19d17d03ff3f74b.tar.bz2
gh-106581: Project through calls (#108067)
This finishes the work begun in gh-107760. When, while projecting a superblock, we encounter a call to a short, simple function, the superblock will now enter the function using `_PUSH_FRAME`, continue through it, and leave it using `_POP_FRAME`, and then continue through the original code. Multiple frame pushes and pops are even possible. It is also possible to stop appending to the superblock in the middle of a called function, when running out of space or encountering an unsupported bytecode.
Diffstat (limited to 'Python/optimizer.c')
-rw-r--r--Python/optimizer.c90
1 files changed, 89 insertions, 1 deletions
diff --git a/Python/optimizer.c b/Python/optimizer.c
index 559c4ae..5751840 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -373,6 +373,8 @@ static PyTypeObject UOpExecutor_Type = {
.tp_as_sequence = &uop_as_sequence,
};
+#define TRACE_STACK_SIZE 5
+
static int
translate_bytecode_to_trace(
PyCodeObject *code,
@@ -380,10 +382,16 @@ translate_bytecode_to_trace(
_PyUOpInstruction *trace,
int buffer_size)
{
+ PyCodeObject *initial_code = code;
_Py_CODEUNIT *initial_instr = instr;
int trace_length = 0;
int max_length = buffer_size;
int reserved = 0;
+ struct {
+ PyCodeObject *code;
+ _Py_CODEUNIT *instr;
+ } trace_stack[TRACE_STACK_SIZE];
+ int trace_stack_depth = 0;
#ifdef Py_DEBUG
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
@@ -441,6 +449,24 @@ translate_bytecode_to_trace(
// Reserve space for main+stub uops, plus 2 for SAVE_IP and EXIT_TRACE
#define RESERVE(main, stub) RESERVE_RAW((main) + (stub) + 2, uop_name(opcode))
+// Trace stack operations (used by _PUSH_FRAME, _POP_FRAME)
+#define TRACE_STACK_PUSH() \
+ if (trace_stack_depth >= TRACE_STACK_SIZE) { \
+ DPRINTF(2, "Trace stack overflow\n"); \
+ ADD_TO_TRACE(SAVE_IP, 0, 0); \
+ goto done; \
+ } \
+ trace_stack[trace_stack_depth].code = code; \
+ trace_stack[trace_stack_depth].instr = instr; \
+ trace_stack_depth++;
+#define TRACE_STACK_POP() \
+ if (trace_stack_depth <= 0) { \
+ Py_FatalError("Trace stack underflow\n"); \
+ } \
+ trace_stack_depth--; \
+ code = trace_stack[trace_stack_depth].code; \
+ instr = trace_stack[trace_stack_depth].instr;
+
DPRINTF(4,
"Optimizing %s (%s:%d) at byte offset %d\n",
PyUnicode_AsUTF8(code->co_qualname),
@@ -448,6 +474,7 @@ translate_bytecode_to_trace(
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code));
+top: // Jump here after _PUSH_FRAME
for (;;) {
RESERVE_RAW(2, "epilogue"); // Always need space for SAVE_IP and EXIT_TRACE
ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code), 0);
@@ -508,7 +535,7 @@ pop_jump_if_bool:
case JUMP_BACKWARD:
{
- if (instr + 2 - oparg == initial_instr) {
+ if (instr + 2 - oparg == initial_instr && code == initial_code) {
RESERVE(1, 0);
ADD_TO_TRACE(JUMP_TO_TOP, 0, 0);
}
@@ -573,6 +600,14 @@ pop_jump_if_bool:
// Reserve space for nuops (+ SAVE_IP + EXIT_TRACE)
int nuops = expansion->nuops;
RESERVE(nuops, 0);
+ if (expansion->uops[nuops-1].uop == _POP_FRAME) {
+ // Check for trace stack underflow now:
+ // We can't bail e.g. in the middle of
+ // LOAD_CONST + _POP_FRAME.
+ if (trace_stack_depth == 0) {
+ DPRINTF(2, "Trace stack underflow\n");
+ goto done;}
+ }
uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM
for (int i = 0; i < nuops; i++) {
oparg = orig_oparg;
@@ -619,8 +654,57 @@ pop_jump_if_bool:
Py_FatalError("garbled expansion");
}
ADD_TO_TRACE(expansion->uops[i].uop, oparg, operand);
+ if (expansion->uops[i].uop == _POP_FRAME) {
+ TRACE_STACK_POP();
+ DPRINTF(2,
+ "Returning to %s (%s:%d) at byte offset %d\n",
+ PyUnicode_AsUTF8(code->co_qualname),
+ PyUnicode_AsUTF8(code->co_filename),
+ code->co_firstlineno,
+ 2 * INSTR_IP(instr, code));
+ goto top;
+ }
if (expansion->uops[i].uop == _PUSH_FRAME) {
assert(i + 1 == nuops);
+ int func_version_offset =
+ offsetof(_PyCallCache, func_version)/sizeof(_Py_CODEUNIT)
+ // Add one to account for the actual opcode/oparg pair:
+ + 1;
+ uint32_t func_version = read_u32(&instr[func_version_offset].cache);
+ PyFunctionObject *func = _PyFunction_LookupByVersion(func_version);
+ DPRINTF(3, "Function object: %p\n", func);
+ if (func != NULL) {
+ PyCodeObject *new_code = (PyCodeObject *)PyFunction_GET_CODE(func);
+ if (new_code == code) {
+ // Recursive call, bail (we could be here forever).
+ DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n",
+ PyUnicode_AsUTF8(new_code->co_qualname),
+ PyUnicode_AsUTF8(new_code->co_filename),
+ new_code->co_firstlineno);
+ ADD_TO_TRACE(SAVE_IP, 0, 0);
+ goto done;
+ }
+ if (new_code->co_version != func_version) {
+ // func.__code__ was updated.
+ // Perhaps it may happen again, so don't bother tracing.
+ // TODO: Reason about this -- is it better to bail or not?
+ DPRINTF(2, "Bailing because co_version != func_version\n");
+ ADD_TO_TRACE(SAVE_IP, 0, 0);
+ goto done;
+ }
+ // Increment IP to the return address
+ instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1;
+ TRACE_STACK_PUSH();
+ code = new_code;
+ instr = _PyCode_CODE(code);
+ DPRINTF(2,
+ "Continuing in %s (%s:%d) at byte offset %d\n",
+ PyUnicode_AsUTF8(code->co_qualname),
+ PyUnicode_AsUTF8(code->co_filename),
+ code->co_firstlineno,
+ 2 * INSTR_IP(instr, code));
+ goto top;
+ }
ADD_TO_TRACE(SAVE_IP, 0, 0);
goto done;
}
@@ -639,6 +723,10 @@ pop_jump_if_bool:
} // End for (;;)
done:
+ while (trace_stack_depth > 0) {
+ TRACE_STACK_POP();
+ }
+ assert(code == initial_code);
// Skip short traces like SAVE_IP, LOAD_FAST, SAVE_IP, EXIT_TRACE
if (trace_length > 3) {
ADD_TO_TRACE(EXIT_TRACE, 0, 0);