From 529088100952b31797a29ef7e0f5716613b32d66 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Tue, 27 Jun 2023 20:24:58 +0100 Subject: gh-106149: move jump target resolution from optimizer to assembler (#106150) --- Include/internal/pycore_compile.h | 8 ++- Include/internal/pycore_flowgraph.h | 2 - Python/assemble.c | 111 ++++++++++++++++++++++++++++++++---- Python/compile.c | 17 ++---- Python/flowgraph.c | 80 +------------------------- 5 files changed, 113 insertions(+), 105 deletions(-) diff --git a/Include/internal/pycore_compile.h b/Include/internal/pycore_compile.h index e648173..e204d4d 100644 --- a/Include/internal/pycore_compile.h +++ b/Include/internal/pycore_compile.h @@ -28,7 +28,7 @@ extern int _PyAST_Optimize( int ff_features); typedef struct { - int h_offset; + int h_label; int h_startdepth; int h_preserve_lasti; } _PyCompile_ExceptHandlerInfo; @@ -38,6 +38,10 @@ typedef struct { int i_oparg; _PyCompilerSrcLocation i_loc; _PyCompile_ExceptHandlerInfo i_except_handler_info; + + /* Used by the assembler */ + int i_target; + int i_offset; } _PyCompile_Instruction; typedef struct { @@ -85,8 +89,6 @@ int _PyCompile_EnsureArrayLargeEnough( int _PyCompile_ConstCacheMergeOne(PyObject *const_cache, PyObject **obj); -int _PyCompile_InstrSize(int opcode, int oparg); - /* Access compiler internals for unit testing */ PyAPI_FUNC(PyObject*) _PyCompile_CodeGen( diff --git a/Include/internal/pycore_flowgraph.h b/Include/internal/pycore_flowgraph.h index 720feb1..4a01574 100644 --- a/Include/internal/pycore_flowgraph.h +++ b/Include/internal/pycore_flowgraph.h @@ -55,8 +55,6 @@ typedef struct _PyCfgBasicblock_ { int b_predecessors; /* depth of stack upon entry of block, computed by stackdepth() */ int b_startdepth; - /* instruction offset for block, computed by assemble_jump_offsets() */ - int b_offset; /* Basic block is an exception handler that preserves lasti */ unsigned b_preserve_lasti : 1; /* Used by compiler passes to mark whether they have visited a basic block. */ diff --git a/Python/assemble.c b/Python/assemble.c index d6213f8..5d566a3 100644 --- a/Python/assemble.c +++ b/Python/assemble.c @@ -4,6 +4,7 @@ #include "pycore_code.h" // write_location_entry_start() #include "pycore_compile.h" #include "pycore_opcode.h" // _PyOpcode_Caches[] and opcode category macros +#include "pycore_opcode_utils.h" // IS_BACKWARDS_JUMP_OPCODE #include "opcode_metadata.h" // IS_PSEUDO_INSTR @@ -34,6 +35,18 @@ same_location(location a, location b) a.end_col_offset == b.end_col_offset; } +static int +instr_size(instruction *instr) +{ + int opcode = instr->i_opcode; + int oparg = instr->i_oparg; + assert(!IS_PSEUDO_INSTR(opcode)); + assert(OPCODE_HAS_ARG(opcode) || oparg == 0); + int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg); + int caches = _PyOpcode_Caches[opcode]; + return extended_args + 1 + caches; +} + struct assembler { PyObject *a_bytecode; /* bytes containing bytecode */ int a_offset; /* offset into bytecode */ @@ -118,6 +131,7 @@ assemble_emit_exception_table_item(struct assembler *a, int value, int msb) static int assemble_emit_exception_table_entry(struct assembler *a, int start, int end, + int handler_offset, _PyCompile_ExceptHandlerInfo *handler) { Py_ssize_t len = PyBytes_GET_SIZE(a->a_except_table); @@ -126,7 +140,7 @@ assemble_emit_exception_table_entry(struct assembler *a, int start, int end, } int size = end-start; assert(end > start); - int target = handler->h_offset; + int target = handler_offset; int depth = handler->h_startdepth - 1; if (handler->h_preserve_lasti > 0) { depth -= 1; @@ -145,24 +159,30 @@ assemble_exception_table(struct assembler *a, instr_sequence *instrs) { int ioffset = 0; _PyCompile_ExceptHandlerInfo handler; - handler.h_offset = -1; + handler.h_label = -1; handler.h_startdepth = -1; handler.h_preserve_lasti = -1; int start = -1; for (int i = 0; i < instrs->s_used; i++) { instruction *instr = &instrs->s_instrs[i]; - if (instr->i_except_handler_info.h_offset != handler.h_offset) { - if (handler.h_offset >= 0) { + if (instr->i_except_handler_info.h_label != handler.h_label) { + if (handler.h_label >= 0) { + int handler_offset = instrs->s_instrs[handler.h_label].i_offset; RETURN_IF_ERROR( - assemble_emit_exception_table_entry(a, start, ioffset, &handler)); + assemble_emit_exception_table_entry(a, start, ioffset, + handler_offset, + &handler)); } start = ioffset; handler = instr->i_except_handler_info; } - ioffset += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg); + ioffset += instr_size(instr); } - if (handler.h_offset >= 0) { - RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset, &handler)); + if (handler.h_label >= 0) { + int handler_offset = instrs->s_instrs[handler.h_label].i_offset; + RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset, + handler_offset, + &handler)); } return SUCCESS; } @@ -329,7 +349,7 @@ assemble_location_info(struct assembler *a, instr_sequence *instrs, loc = instr->i_loc; size = 0; } - size += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg); + size += instr_size(instr); } RETURN_IF_ERROR(assemble_emit_location(a, loc, size)); return SUCCESS; @@ -385,7 +405,7 @@ assemble_emit_instr(struct assembler *a, instruction *instr) Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode); _Py_CODEUNIT *code; - int size = _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg); + int size = instr_size(instr); if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) { if (len > PY_SSIZE_T_MAX / 2) { return ERROR; @@ -585,12 +605,83 @@ error: return co; } +static int +resolve_jump_offsets(instr_sequence *instrs) +{ + /* Compute the size of each instruction and fixup jump args. + * Replace instruction index with position in bytecode. + */ + + for (int i = 0; i < instrs->s_used; i++) { + instruction *instr = &instrs->s_instrs[i]; + if (OPCODE_HAS_JUMP(instr->i_opcode)) { + instr->i_target = instr->i_oparg; + } + } + + int extended_arg_recompile; + + do { + int totsize = 0; + for (int i = 0; i < instrs->s_used; i++) { + instruction *instr = &instrs->s_instrs[i]; + instr->i_offset = totsize; + int isize = instr_size(instr); + totsize += isize; + } + extended_arg_recompile = 0; + + int offset = 0; + for (int i = 0; i < instrs->s_used; i++) { + instruction *instr = &instrs->s_instrs[i]; + int isize = instr_size(instr); + /* jump offsets are computed relative to + * the instruction pointer after fetching + * the jump instruction. + */ + offset += isize; + if (OPCODE_HAS_JUMP(instr->i_opcode)) { + instruction *target = &instrs->s_instrs[instr->i_target]; + instr->i_oparg = target->i_offset; + if (instr->i_oparg < offset) { + assert(IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode)); + instr->i_oparg = offset - instr->i_oparg; + } + else { + assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode)); + instr->i_oparg = instr->i_oparg - offset; + } + if (instr_size(instr) != isize) { + extended_arg_recompile = 1; + } + } + } + /* XXX: This is an awful hack that could hurt performance, but + on the bright side it should work until we come up + with a better solution. + + The issue is that in the first loop instr_size() is + called, and it requires i_oparg be set appropriately. + There is a bootstrap problem because i_oparg is + calculated in the second loop above. + + So we loop until we stop seeing new EXTENDED_ARGs. + The only EXTENDED_ARGs that could be popping up are + ones in jump instructions. So this should converge + fairly quickly. + */ + } while (extended_arg_recompile); + return SUCCESS; +} PyCodeObject * _PyAssemble_MakeCodeObject(_PyCompile_CodeUnitMetadata *umd, PyObject *const_cache, PyObject *consts, int maxdepth, instr_sequence *instrs, int nlocalsplus, int code_flags, PyObject *filename) { + if (resolve_jump_offsets(instrs) < 0) { + return NULL; + } PyCodeObject *co = NULL; struct assembler a; diff --git a/Python/compile.c b/Python/compile.c index 5a05605..d080144 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -131,16 +131,6 @@ enum { }; -int -_PyCompile_InstrSize(int opcode, int oparg) -{ - assert(!IS_PSEUDO_INSTR(opcode)); - assert(OPCODE_HAS_ARG(opcode) || oparg == 0); - int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg); - int caches = _PyOpcode_Caches[opcode]; - return extended_args + 1 + caches; -} - typedef _PyCompile_Instruction instruction; typedef _PyCompile_InstructionSequence instr_sequence; @@ -7717,17 +7707,20 @@ cfg_to_instr_sequence(cfg_builder *g, instr_sequence *seq) RETURN_IF_ERROR(instr_sequence_use_label(seq, b->b_label.id)); for (int i = 0; i < b->b_iused; i++) { cfg_instr *instr = &b->b_instr[i]; + if (OPCODE_HAS_JUMP(instr->i_opcode)) { + instr->i_oparg = instr->i_target->b_label.id; + } RETURN_IF_ERROR( instr_sequence_addop(seq, instr->i_opcode, instr->i_oparg, instr->i_loc)); _PyCompile_ExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info; if (instr->i_except != NULL) { - hi->h_offset = instr->i_except->b_offset; + hi->h_label = instr->i_except->b_label.id; hi->h_startdepth = instr->i_except->b_startdepth; hi->h_preserve_lasti = instr->i_except->b_preserve_lasti; } else { - hi->h_offset = -1; + hi->h_label = -1; } } } diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 39f780e..a6b2a9f 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -166,22 +166,6 @@ _PyBasicblock_InsertInstruction(basicblock *block, int pos, cfg_instr *instr) { return SUCCESS; } -static int -instr_size(cfg_instr *instruction) -{ - return _PyCompile_InstrSize(instruction->i_opcode, instruction->i_oparg); -} - -static int -blocksize(basicblock *b) -{ - int size = 0; - for (int i = 0; i < b->b_iused; i++) { - size += instr_size(&b->b_instr[i]); - } - return size; -} - /* For debugging purposes only */ #if 0 static void @@ -212,9 +196,9 @@ static void dump_basicblock(const basicblock *b) { const char *b_return = basicblock_returns(b) ? "return " : ""; - fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, offset: %d %s\n", + fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, %s\n", b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused, - b->b_startdepth, b->b_offset, b_return); + b->b_startdepth, b_return); if (b->b_instr) { int i; for (i = 0; i < b->b_iused; i++) { @@ -480,71 +464,11 @@ normalize_jumps(_PyCfgBuilder *g) return SUCCESS; } -static void -resolve_jump_offsets(basicblock *entryblock) -{ - int bsize, totsize, extended_arg_recompile; - - /* Compute the size of each block and fixup jump args. - Replace block pointer with position in bytecode. */ - do { - totsize = 0; - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - bsize = blocksize(b); - b->b_offset = totsize; - totsize += bsize; - } - extended_arg_recompile = 0; - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - bsize = b->b_offset; - for (int i = 0; i < b->b_iused; i++) { - cfg_instr *instr = &b->b_instr[i]; - int isize = instr_size(instr); - /* jump offsets are computed relative to - * the instruction pointer after fetching - * the jump instruction. - */ - bsize += isize; - if (is_jump(instr)) { - instr->i_oparg = instr->i_target->b_offset; - if (instr->i_oparg < bsize) { - assert(IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode)); - instr->i_oparg = bsize - instr->i_oparg; - } - else { - assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode)); - instr->i_oparg -= bsize; - } - if (instr_size(instr) != isize) { - extended_arg_recompile = 1; - } - } - } - } - - /* XXX: This is an awful hack that could hurt performance, but - on the bright side it should work until we come up - with a better solution. - - The issue is that in the first loop blocksize() is called - which calls instr_size() which requires i_oparg be set - appropriately. There is a bootstrap problem because - i_oparg is calculated in the second loop above. - - So we loop until we stop seeing new EXTENDED_ARGs. - The only EXTENDED_ARGs that could be popping up are - ones in jump instructions. So this should converge - fairly quickly. - */ - } while (extended_arg_recompile); -} - int _PyCfg_ResolveJumps(_PyCfgBuilder *g) { RETURN_IF_ERROR(normalize_jumps(g)); assert(no_redundant_jumps(g)); - resolve_jump_offsets(g->g_entryblock); return SUCCESS; } -- cgit v0.12