summaryrefslogtreecommitdiffstats
path: root/Python/optimizer.c
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2023-08-29 16:51:51 (GMT)
committerGitHub <noreply@github.com>2023-08-29 16:51:51 (GMT)
commit4f22152713d008cdd7c1d373a0f0c8dcf30e217e (patch)
tree08f63460bafae50b2d93b5c14a1ab59a4ae82491 /Python/optimizer.c
parent83e191ba7627cbcb06cebcc0a020b847f271ca59 (diff)
downloadcpython-4f22152713d008cdd7c1d373a0f0c8dcf30e217e.zip
cpython-4f22152713d008cdd7c1d373a0f0c8dcf30e217e.tar.gz
cpython-4f22152713d008cdd7c1d373a0f0c8dcf30e217e.tar.bz2
gh-107557: Remove unnecessary SAVE_IP instructions (#108583)
Also remove NOP instructions. The "stubs" are not optimized in this fashion (their SAVE_IP should always be preserved since it's where to jump next, and they don't contain NOPs by their nature).
Diffstat (limited to 'Python/optimizer.c')
-rw-r--r--Python/optimizer.c140
1 files changed, 116 insertions, 24 deletions
diff --git a/Python/optimizer.c b/Python/optimizer.c
index bbc1259..c311a03 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -372,6 +372,32 @@ static PyTypeObject UOpExecutor_Type = {
.tp_as_sequence = &uop_as_sequence,
};
+static int
+move_stubs(
+ _PyUOpInstruction *trace,
+ int trace_length,
+ int stubs_start,
+ int stubs_end
+)
+{
+ memmove(trace + trace_length,
+ trace + stubs_start,
+ (stubs_end - stubs_start) * sizeof(_PyUOpInstruction));
+ // Patch up the jump targets
+ for (int i = 0; i < trace_length; i++) {
+ if (trace[i].opcode == _POP_JUMP_IF_FALSE ||
+ trace[i].opcode == _POP_JUMP_IF_TRUE)
+ {
+ int target = trace[i].oparg;
+ if (target >= stubs_start) {
+ target += trace_length - stubs_start;
+ trace[i].oparg = target;
+ }
+ }
+ }
+ return trace_length + stubs_end - stubs_start;
+}
+
#define TRACE_STACK_SIZE 5
static int
@@ -730,36 +756,31 @@ done:
if (trace_length > 3) {
ADD_TO_TRACE(EXIT_TRACE, 0, 0);
DPRINTF(1,
- "Created a trace for %s (%s:%d) at byte offset %d -- length %d\n",
+ "Created a trace for %s (%s:%d) at byte offset %d -- length %d+%d\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code),
- trace_length);
- if (max_length < buffer_size && trace_length < max_length) {
- // Move the stubs back to be immediately after the main trace
- // (which ends at trace_length)
- DPRINTF(2,
- "Moving %d stub uops back by %d\n",
- buffer_size - max_length,
- max_length - trace_length);
- memmove(trace + trace_length,
- trace + max_length,
- (buffer_size - max_length) * sizeof(_PyUOpInstruction));
- // Patch up the jump targets
- for (int i = 0; i < trace_length; i++) {
- if (trace[i].opcode == _POP_JUMP_IF_FALSE ||
- trace[i].opcode == _POP_JUMP_IF_TRUE)
- {
- int target = trace[i].oparg;
- if (target >= max_length) {
- target += trace_length - max_length;
- trace[i].oparg = target;
- }
- }
+ trace_length,
+ buffer_size - max_length);
+ if (max_length < buffer_size) {
+ // There are stubs
+ if (trace_length < max_length) {
+ // There's a gap before the stubs
+ // Move the stubs back to be immediately after the main trace
+ // (which ends at trace_length)
+ DPRINTF(2,
+ "Moving %d stub uops back by %d\n",
+ buffer_size - max_length,
+ max_length - trace_length);
+ trace_length = move_stubs(trace, trace_length, max_length, buffer_size);
+ }
+ else {
+ assert(trace_length == max_length);
+ // There's no gap
+ trace_length = buffer_size;
}
}
- trace_length += buffer_size - max_length;
return trace_length;
}
else {
@@ -780,6 +801,76 @@ done:
}
static int
+remove_unneeded_uops(_PyUOpInstruction *trace, int trace_length)
+{
+ // Stage 1: Replace unneeded SAVE_IP uops with NOP.
+ // Note that we don't enter stubs, those SAVE_IPs are needed.
+ int last_save_ip = -1;
+ int last_instr = 0;
+ bool need_ip = true;
+ for (int pc = 0; pc < trace_length; pc++) {
+ int opcode = trace[pc].opcode;
+ if (opcode == SAVE_CURRENT_IP) {
+ // Special case: never remove preceding SAVE_IP
+ last_save_ip = -1;
+ }
+ else if (opcode == SAVE_IP) {
+ if (!need_ip && last_save_ip >= 0) {
+ trace[last_save_ip].opcode = NOP;
+ }
+ need_ip = false;
+ last_save_ip = pc;
+ }
+ else if (opcode == JUMP_TO_TOP || opcode == EXIT_TRACE) {
+ last_instr = pc + 1;
+ break;
+ }
+ else {
+ // If opcode has ERROR or DEOPT, set need_up to true
+ if (_PyOpcode_opcode_metadata[opcode].flags & (HAS_ERROR_FLAG | HAS_DEOPT_FLAG)) {
+ need_ip = true;
+ }
+ }
+ }
+ // Stage 2: Squash NOP opcodes (pre-existing or set above).
+ int dest = 0;
+ for (int pc = 0; pc < last_instr; pc++) {
+ int opcode = trace[pc].opcode;
+ if (opcode != NOP) {
+ if (pc != dest) {
+ trace[dest] = trace[pc];
+ }
+ dest++;
+ }
+ }
+ // Stage 3: Move the stubs back.
+ if (dest < last_instr) {
+ int new_trace_length = move_stubs(trace, dest, last_instr, trace_length);
+#ifdef Py_DEBUG
+ char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
+ int lltrace = 0;
+ if (uop_debug != NULL && *uop_debug >= '0') {
+ lltrace = *uop_debug - '0'; // TODO: Parse an int and all that
+ }
+ if (lltrace >= 2) {
+ printf("Optimized trace (length %d+%d = %d, saved %d):\n",
+ dest, trace_length - last_instr, new_trace_length,
+ trace_length - new_trace_length);
+ for (int pc = 0; pc < new_trace_length; pc++) {
+ printf("%4d: (%s, %d, %" PRIu64 ")\n",
+ pc,
+ uop_name(trace[pc].opcode),
+ (trace[pc].oparg),
+ (uint64_t)(trace[pc].operand));
+ }
+ }
+#endif
+ trace_length = new_trace_length;
+ }
+ return trace_length;
+}
+
+static int
uop_optimize(
_PyOptimizerObject *self,
PyCodeObject *code,
@@ -798,6 +889,7 @@ uop_optimize(
if (uop_optimize != NULL && *uop_optimize > '0') {
trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries);
}
+ trace_length = remove_unneeded_uops(trace, trace_length);
_PyUOpExecutorObject *executor = PyObject_NewVar(_PyUOpExecutorObject, &UOpExecutor_Type, trace_length);
if (executor == NULL) {
return -1;