summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIrit Katriel <1055913+iritkatriel@users.noreply.github.com>2023-06-27 19:24:58 (GMT)
committerGitHub <noreply@github.com>2023-06-27 19:24:58 (GMT)
commit529088100952b31797a29ef7e0f5716613b32d66 (patch)
treead6c08c0f5d4a3435b4be33eb44dd132a4f832cc
parenteaa1eae55ea66d74c5303924320185dac74d4eb1 (diff)
downloadcpython-529088100952b31797a29ef7e0f5716613b32d66.zip
cpython-529088100952b31797a29ef7e0f5716613b32d66.tar.gz
cpython-529088100952b31797a29ef7e0f5716613b32d66.tar.bz2
gh-106149: move jump target resolution from optimizer to assembler (#106150)
-rw-r--r--Include/internal/pycore_compile.h8
-rw-r--r--Include/internal/pycore_flowgraph.h2
-rw-r--r--Python/assemble.c111
-rw-r--r--Python/compile.c17
-rw-r--r--Python/flowgraph.c80
5 files changed, 113 insertions, 105 deletions
diff --git a/Include/internal/pycore_compile.h b/Include/internal/pycore_compile.h
index e648173..e204d4d 100644
--- a/Include/internal/pycore_compile.h
+++ b/Include/internal/pycore_compile.h
@@ -28,7 +28,7 @@ extern int _PyAST_Optimize(
int ff_features);
typedef struct {
- int h_offset;
+ int h_label;
int h_startdepth;
int h_preserve_lasti;
} _PyCompile_ExceptHandlerInfo;
@@ -38,6 +38,10 @@ typedef struct {
int i_oparg;
_PyCompilerSrcLocation i_loc;
_PyCompile_ExceptHandlerInfo i_except_handler_info;
+
+ /* Used by the assembler */
+ int i_target;
+ int i_offset;
} _PyCompile_Instruction;
typedef struct {
@@ -85,8 +89,6 @@ int _PyCompile_EnsureArrayLargeEnough(
int _PyCompile_ConstCacheMergeOne(PyObject *const_cache, PyObject **obj);
-int _PyCompile_InstrSize(int opcode, int oparg);
-
/* Access compiler internals for unit testing */
PyAPI_FUNC(PyObject*) _PyCompile_CodeGen(
diff --git a/Include/internal/pycore_flowgraph.h b/Include/internal/pycore_flowgraph.h
index 720feb1..4a01574 100644
--- a/Include/internal/pycore_flowgraph.h
+++ b/Include/internal/pycore_flowgraph.h
@@ -55,8 +55,6 @@ typedef struct _PyCfgBasicblock_ {
int b_predecessors;
/* depth of stack upon entry of block, computed by stackdepth() */
int b_startdepth;
- /* instruction offset for block, computed by assemble_jump_offsets() */
- int b_offset;
/* Basic block is an exception handler that preserves lasti */
unsigned b_preserve_lasti : 1;
/* Used by compiler passes to mark whether they have visited a basic block. */
diff --git a/Python/assemble.c b/Python/assemble.c
index d6213f8..5d566a3 100644
--- a/Python/assemble.c
+++ b/Python/assemble.c
@@ -4,6 +4,7 @@
#include "pycore_code.h" // write_location_entry_start()
#include "pycore_compile.h"
#include "pycore_opcode.h" // _PyOpcode_Caches[] and opcode category macros
+#include "pycore_opcode_utils.h" // IS_BACKWARDS_JUMP_OPCODE
#include "opcode_metadata.h" // IS_PSEUDO_INSTR
@@ -34,6 +35,18 @@ same_location(location a, location b)
a.end_col_offset == b.end_col_offset;
}
+static int
+instr_size(instruction *instr)
+{
+ int opcode = instr->i_opcode;
+ int oparg = instr->i_oparg;
+ assert(!IS_PSEUDO_INSTR(opcode));
+ assert(OPCODE_HAS_ARG(opcode) || oparg == 0);
+ int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg);
+ int caches = _PyOpcode_Caches[opcode];
+ return extended_args + 1 + caches;
+}
+
struct assembler {
PyObject *a_bytecode; /* bytes containing bytecode */
int a_offset; /* offset into bytecode */
@@ -118,6 +131,7 @@ assemble_emit_exception_table_item(struct assembler *a, int value, int msb)
static int
assemble_emit_exception_table_entry(struct assembler *a, int start, int end,
+ int handler_offset,
_PyCompile_ExceptHandlerInfo *handler)
{
Py_ssize_t len = PyBytes_GET_SIZE(a->a_except_table);
@@ -126,7 +140,7 @@ assemble_emit_exception_table_entry(struct assembler *a, int start, int end,
}
int size = end-start;
assert(end > start);
- int target = handler->h_offset;
+ int target = handler_offset;
int depth = handler->h_startdepth - 1;
if (handler->h_preserve_lasti > 0) {
depth -= 1;
@@ -145,24 +159,30 @@ assemble_exception_table(struct assembler *a, instr_sequence *instrs)
{
int ioffset = 0;
_PyCompile_ExceptHandlerInfo handler;
- handler.h_offset = -1;
+ handler.h_label = -1;
handler.h_startdepth = -1;
handler.h_preserve_lasti = -1;
int start = -1;
for (int i = 0; i < instrs->s_used; i++) {
instruction *instr = &instrs->s_instrs[i];
- if (instr->i_except_handler_info.h_offset != handler.h_offset) {
- if (handler.h_offset >= 0) {
+ if (instr->i_except_handler_info.h_label != handler.h_label) {
+ if (handler.h_label >= 0) {
+ int handler_offset = instrs->s_instrs[handler.h_label].i_offset;
RETURN_IF_ERROR(
- assemble_emit_exception_table_entry(a, start, ioffset, &handler));
+ assemble_emit_exception_table_entry(a, start, ioffset,
+ handler_offset,
+ &handler));
}
start = ioffset;
handler = instr->i_except_handler_info;
}
- ioffset += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
+ ioffset += instr_size(instr);
}
- if (handler.h_offset >= 0) {
- RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset, &handler));
+ if (handler.h_label >= 0) {
+ int handler_offset = instrs->s_instrs[handler.h_label].i_offset;
+ RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset,
+ handler_offset,
+ &handler));
}
return SUCCESS;
}
@@ -329,7 +349,7 @@ assemble_location_info(struct assembler *a, instr_sequence *instrs,
loc = instr->i_loc;
size = 0;
}
- size += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
+ size += instr_size(instr);
}
RETURN_IF_ERROR(assemble_emit_location(a, loc, size));
return SUCCESS;
@@ -385,7 +405,7 @@ assemble_emit_instr(struct assembler *a, instruction *instr)
Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode);
_Py_CODEUNIT *code;
- int size = _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
+ int size = instr_size(instr);
if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) {
if (len > PY_SSIZE_T_MAX / 2) {
return ERROR;
@@ -585,12 +605,83 @@ error:
return co;
}
+static int
+resolve_jump_offsets(instr_sequence *instrs)
+{
+ /* Compute the size of each instruction and fixup jump args.
+ * Replace instruction index with position in bytecode.
+ */
+
+ for (int i = 0; i < instrs->s_used; i++) {
+ instruction *instr = &instrs->s_instrs[i];
+ if (OPCODE_HAS_JUMP(instr->i_opcode)) {
+ instr->i_target = instr->i_oparg;
+ }
+ }
+
+ int extended_arg_recompile;
+
+ do {
+ int totsize = 0;
+ for (int i = 0; i < instrs->s_used; i++) {
+ instruction *instr = &instrs->s_instrs[i];
+ instr->i_offset = totsize;
+ int isize = instr_size(instr);
+ totsize += isize;
+ }
+ extended_arg_recompile = 0;
+
+ int offset = 0;
+ for (int i = 0; i < instrs->s_used; i++) {
+ instruction *instr = &instrs->s_instrs[i];
+ int isize = instr_size(instr);
+ /* jump offsets are computed relative to
+ * the instruction pointer after fetching
+ * the jump instruction.
+ */
+ offset += isize;
+ if (OPCODE_HAS_JUMP(instr->i_opcode)) {
+ instruction *target = &instrs->s_instrs[instr->i_target];
+ instr->i_oparg = target->i_offset;
+ if (instr->i_oparg < offset) {
+ assert(IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
+ instr->i_oparg = offset - instr->i_oparg;
+ }
+ else {
+ assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
+ instr->i_oparg = instr->i_oparg - offset;
+ }
+ if (instr_size(instr) != isize) {
+ extended_arg_recompile = 1;
+ }
+ }
+ }
+ /* XXX: This is an awful hack that could hurt performance, but
+ on the bright side it should work until we come up
+ with a better solution.
+
+ The issue is that in the first loop instr_size() is
+ called, and it requires i_oparg be set appropriately.
+ There is a bootstrap problem because i_oparg is
+ calculated in the second loop above.
+
+ So we loop until we stop seeing new EXTENDED_ARGs.
+ The only EXTENDED_ARGs that could be popping up are
+ ones in jump instructions. So this should converge
+ fairly quickly.
+ */
+ } while (extended_arg_recompile);
+ return SUCCESS;
+}
PyCodeObject *
_PyAssemble_MakeCodeObject(_PyCompile_CodeUnitMetadata *umd, PyObject *const_cache,
PyObject *consts, int maxdepth, instr_sequence *instrs,
int nlocalsplus, int code_flags, PyObject *filename)
{
+ if (resolve_jump_offsets(instrs) < 0) {
+ return NULL;
+ }
PyCodeObject *co = NULL;
struct assembler a;
diff --git a/Python/compile.c b/Python/compile.c
index 5a05605..d080144 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -131,16 +131,6 @@ enum {
};
-int
-_PyCompile_InstrSize(int opcode, int oparg)
-{
- assert(!IS_PSEUDO_INSTR(opcode));
- assert(OPCODE_HAS_ARG(opcode) || oparg == 0);
- int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg);
- int caches = _PyOpcode_Caches[opcode];
- return extended_args + 1 + caches;
-}
-
typedef _PyCompile_Instruction instruction;
typedef _PyCompile_InstructionSequence instr_sequence;
@@ -7717,17 +7707,20 @@ cfg_to_instr_sequence(cfg_builder *g, instr_sequence *seq)
RETURN_IF_ERROR(instr_sequence_use_label(seq, b->b_label.id));
for (int i = 0; i < b->b_iused; i++) {
cfg_instr *instr = &b->b_instr[i];
+ if (OPCODE_HAS_JUMP(instr->i_opcode)) {
+ instr->i_oparg = instr->i_target->b_label.id;
+ }
RETURN_IF_ERROR(
instr_sequence_addop(seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
_PyCompile_ExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
if (instr->i_except != NULL) {
- hi->h_offset = instr->i_except->b_offset;
+ hi->h_label = instr->i_except->b_label.id;
hi->h_startdepth = instr->i_except->b_startdepth;
hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
}
else {
- hi->h_offset = -1;
+ hi->h_label = -1;
}
}
}
diff --git a/Python/flowgraph.c b/Python/flowgraph.c
index 39f780e..a6b2a9f 100644
--- a/Python/flowgraph.c
+++ b/Python/flowgraph.c
@@ -166,22 +166,6 @@ _PyBasicblock_InsertInstruction(basicblock *block, int pos, cfg_instr *instr) {
return SUCCESS;
}
-static int
-instr_size(cfg_instr *instruction)
-{
- return _PyCompile_InstrSize(instruction->i_opcode, instruction->i_oparg);
-}
-
-static int
-blocksize(basicblock *b)
-{
- int size = 0;
- for (int i = 0; i < b->b_iused; i++) {
- size += instr_size(&b->b_instr[i]);
- }
- return size;
-}
-
/* For debugging purposes only */
#if 0
static void
@@ -212,9 +196,9 @@ static void
dump_basicblock(const basicblock *b)
{
const char *b_return = basicblock_returns(b) ? "return " : "";
- fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, offset: %d %s\n",
+ fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, %s\n",
b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
- b->b_startdepth, b->b_offset, b_return);
+ b->b_startdepth, b_return);
if (b->b_instr) {
int i;
for (i = 0; i < b->b_iused; i++) {
@@ -480,71 +464,11 @@ normalize_jumps(_PyCfgBuilder *g)
return SUCCESS;
}
-static void
-resolve_jump_offsets(basicblock *entryblock)
-{
- int bsize, totsize, extended_arg_recompile;
-
- /* Compute the size of each block and fixup jump args.
- Replace block pointer with position in bytecode. */
- do {
- totsize = 0;
- for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
- bsize = blocksize(b);
- b->b_offset = totsize;
- totsize += bsize;
- }
- extended_arg_recompile = 0;
- for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
- bsize = b->b_offset;
- for (int i = 0; i < b->b_iused; i++) {
- cfg_instr *instr = &b->b_instr[i];
- int isize = instr_size(instr);
- /* jump offsets are computed relative to
- * the instruction pointer after fetching
- * the jump instruction.
- */
- bsize += isize;
- if (is_jump(instr)) {
- instr->i_oparg = instr->i_target->b_offset;
- if (instr->i_oparg < bsize) {
- assert(IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
- instr->i_oparg = bsize - instr->i_oparg;
- }
- else {
- assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
- instr->i_oparg -= bsize;
- }
- if (instr_size(instr) != isize) {
- extended_arg_recompile = 1;
- }
- }
- }
- }
-
- /* XXX: This is an awful hack that could hurt performance, but
- on the bright side it should work until we come up
- with a better solution.
-
- The issue is that in the first loop blocksize() is called
- which calls instr_size() which requires i_oparg be set
- appropriately. There is a bootstrap problem because
- i_oparg is calculated in the second loop above.
-
- So we loop until we stop seeing new EXTENDED_ARGs.
- The only EXTENDED_ARGs that could be popping up are
- ones in jump instructions. So this should converge
- fairly quickly.
- */
- } while (extended_arg_recompile);
-}
-
int
_PyCfg_ResolveJumps(_PyCfgBuilder *g)
{
RETURN_IF_ERROR(normalize_jumps(g));
assert(no_redundant_jumps(g));
- resolve_jump_offsets(g->g_entryblock);
return SUCCESS;
}