diff options
Diffstat (limited to 'Python/optimizer_analysis.c')
-rw-r--r-- | Python/optimizer_analysis.c | 555 |
1 files changed, 511 insertions, 44 deletions
diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index b14e695..e02ca4d 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1,3 +1,14 @@ +/* + * This file contains the support code for CPython's uops redundancy eliminator. + * It also performs some simple optimizations. + * It performs a traditional data-flow analysis[1] over the trace of uops. + * Using the information gained, it chooses to emit, or skip certain instructions + * if possible. + * + * [1] For information on data-flow analysis, please see + * https://clang.llvm.org/docs/DataFlowAnalysisIntro.html + * + * */ #include "Python.h" #include "opcode.h" #include "pycore_dict.h" @@ -9,10 +20,355 @@ #include "pycore_dict.h" #include "pycore_long.h" #include "cpython/optimizer.h" +#include "pycore_optimizer.h" +#include "pycore_object.h" +#include "pycore_dict.h" +#include "pycore_function.h" +#include "pycore_uop_metadata.h" +#include "pycore_uop_ids.h" +#include "pycore_range.h" + +#include <stdarg.h> #include <stdbool.h> #include <stdint.h> #include <stddef.h> -#include "pycore_optimizer.h" + +// Holds locals, stack, locals, stack ... co_consts (in that order) +#define MAX_ABSTRACT_INTERP_SIZE 4096 + +#define OVERALLOCATE_FACTOR 5 + +#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * OVERALLOCATE_FACTOR) + +// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH()) +#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2) + +#ifdef Py_DEBUG + static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG"; + static inline int get_lltrace(void) { + char *uop_debug = Py_GETENV(DEBUG_ENV); + int lltrace = 0; + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = *uop_debug - '0'; // TODO: Parse an int and all that + } + return lltrace; + } + #define DPRINTF(level, ...) \ + if (get_lltrace() >= (level)) { printf(__VA_ARGS__); } +#else + #define DPRINTF(level, ...) +#endif + + +// Flags for below. +#define KNOWN 1 << 0 +#define TRUE_CONST 1 << 1 +#define IS_NULL 1 << 2 +#define NOT_NULL 1 << 3 + +typedef struct { + int flags; + PyTypeObject *typ; + // constant propagated value (might be NULL) + PyObject *const_val; +} _Py_UOpsSymType; + + +typedef struct _Py_UOpsAbstractFrame { + // Max stacklen + int stack_len; + int locals_len; + + _Py_UOpsSymType **stack_pointer; + _Py_UOpsSymType **stack; + _Py_UOpsSymType **locals; +} _Py_UOpsAbstractFrame; + + +typedef struct ty_arena { + int ty_curr_number; + int ty_max_number; + _Py_UOpsSymType arena[TY_ARENA_SIZE]; +} ty_arena; + +// Tier 2 types meta interpreter +typedef struct _Py_UOpsAbstractInterpContext { + PyObject_HEAD + // The current "executing" frame. + _Py_UOpsAbstractFrame *frame; + _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH]; + int curr_frame_depth; + + // Arena for the symbolic types. + ty_arena t_arena; + + _Py_UOpsSymType **n_consumed; + _Py_UOpsSymType **limit; + _Py_UOpsSymType *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; +} _Py_UOpsAbstractInterpContext; + +static inline _Py_UOpsSymType* sym_new_unknown(_Py_UOpsAbstractInterpContext *ctx); + +// 0 on success, -1 on error. +static _Py_UOpsAbstractFrame * +ctx_frame_new( + _Py_UOpsAbstractInterpContext *ctx, + PyCodeObject *co, + _Py_UOpsSymType **localsplus_start, + int n_locals_already_filled, + int curr_stackentries +) +{ + assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH); + _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth]; + + frame->stack_len = co->co_stacksize; + frame->locals_len = co->co_nlocalsplus; + + frame->locals = localsplus_start; + frame->stack = frame->locals + co->co_nlocalsplus; + frame->stack_pointer = frame->stack + curr_stackentries; + ctx->n_consumed = localsplus_start + (co->co_nlocalsplus + co->co_stacksize); + if (ctx->n_consumed >= ctx->limit) { + return NULL; + } + + + // Initialize with the initial state of all local variables + for (int i = n_locals_already_filled; i < co->co_nlocalsplus; i++) { + _Py_UOpsSymType *local = sym_new_unknown(ctx); + if (local == NULL) { + return NULL; + } + frame->locals[i] = local; + } + + + // Initialize the stack as well + for (int i = 0; i < curr_stackentries; i++) { + _Py_UOpsSymType *stackvar = sym_new_unknown(ctx); + if (stackvar == NULL) { + return NULL; + } + frame->stack[i] = stackvar; + } + + return frame; +} + +static void +abstractcontext_fini(_Py_UOpsAbstractInterpContext *ctx) +{ + if (ctx == NULL) { + return; + } + ctx->curr_frame_depth = 0; + int tys = ctx->t_arena.ty_curr_number; + for (int i = 0; i < tys; i++) { + Py_CLEAR(ctx->t_arena.arena[i].const_val); + } +} + +static int +abstractcontext_init( + _Py_UOpsAbstractInterpContext *ctx, + PyCodeObject *co, + int curr_stacklen, + int ir_entries +) +{ + ctx->limit = ctx->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE; + ctx->n_consumed = ctx->locals_and_stack; +#ifdef Py_DEBUG // Aids debugging a little. There should never be NULL in the abstract interpreter. + for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) { + ctx->locals_and_stack[i] = NULL; + } +#endif + + // Setup the arena for sym expressions. + ctx->t_arena.ty_curr_number = 0; + ctx->t_arena.ty_max_number = TY_ARENA_SIZE; + + // Frame setup + ctx->curr_frame_depth = 0; + _Py_UOpsAbstractFrame *frame = ctx_frame_new(ctx, co, ctx->n_consumed, 0, curr_stacklen); + if (frame == NULL) { + return -1; + } + ctx->curr_frame_depth++; + ctx->frame = frame; + return 0; +} + + +static int +ctx_frame_pop( + _Py_UOpsAbstractInterpContext *ctx +) +{ + _Py_UOpsAbstractFrame *frame = ctx->frame; + + ctx->n_consumed = frame->locals; + ctx->curr_frame_depth--; + assert(ctx->curr_frame_depth >= 1); + ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1]; + + return 0; +} + + +// Takes a borrowed reference to const_val, turns that into a strong reference. +static _Py_UOpsSymType* +sym_new(_Py_UOpsAbstractInterpContext *ctx, + PyObject *const_val) +{ + _Py_UOpsSymType *self = &ctx->t_arena.arena[ctx->t_arena.ty_curr_number]; + if (ctx->t_arena.ty_curr_number >= ctx->t_arena.ty_max_number) { + OPT_STAT_INC(optimizer_failure_reason_no_memory); + DPRINTF(1, "out of space for symbolic expression type\n"); + return NULL; + } + ctx->t_arena.ty_curr_number++; + self->const_val = NULL; + self->typ = NULL; + self->flags = 0; + + if (const_val != NULL) { + self->const_val = Py_NewRef(const_val); + } + + return self; +} + +static inline void +sym_set_flag(_Py_UOpsSymType *sym, int flag) +{ + sym->flags |= flag; +} + +static inline void +sym_clear_flag(_Py_UOpsSymType *sym, int flag) +{ + sym->flags &= (~flag); +} + +static inline bool +sym_has_flag(_Py_UOpsSymType *sym, int flag) +{ + return (sym->flags & flag) != 0; +} + +static inline bool +sym_is_known(_Py_UOpsSymType *sym) +{ + return sym_has_flag(sym, KNOWN); +} + +static inline bool +sym_is_not_null(_Py_UOpsSymType *sym) +{ + return (sym->flags & (IS_NULL | NOT_NULL)) == NOT_NULL; +} + +static inline bool +sym_is_null(_Py_UOpsSymType *sym) +{ + return (sym->flags & (IS_NULL | NOT_NULL)) == IS_NULL; +} + +static inline void +sym_set_type(_Py_UOpsSymType *sym, PyTypeObject *tp) +{ + assert(PyType_Check(tp)); + sym->typ = tp; + sym_set_flag(sym, KNOWN); + sym_set_flag(sym, NOT_NULL); +} + +static inline void +sym_set_null(_Py_UOpsSymType *sym) +{ + sym_set_flag(sym, IS_NULL); + sym_set_flag(sym, KNOWN); +} + + +static inline _Py_UOpsSymType* +sym_new_unknown(_Py_UOpsAbstractInterpContext *ctx) +{ + return sym_new(ctx,NULL); +} + +static inline _Py_UOpsSymType* +sym_new_known_notnull(_Py_UOpsAbstractInterpContext *ctx) +{ + _Py_UOpsSymType *res = sym_new_unknown(ctx); + if (res == NULL) { + return NULL; + } + sym_set_flag(res, NOT_NULL); + return res; +} + +static inline _Py_UOpsSymType* +sym_new_known_type(_Py_UOpsAbstractInterpContext *ctx, + PyTypeObject *typ) +{ + _Py_UOpsSymType *res = sym_new(ctx,NULL); + if (res == NULL) { + return NULL; + } + sym_set_type(res, typ); + return res; +} + +// Takes a borrowed reference to const_val. +static inline _Py_UOpsSymType* +sym_new_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val) +{ + assert(const_val != NULL); + _Py_UOpsSymType *temp = sym_new( + ctx, + const_val + ); + if (temp == NULL) { + return NULL; + } + sym_set_type(temp, Py_TYPE(const_val)); + sym_set_flag(temp, TRUE_CONST); + sym_set_flag(temp, KNOWN); + sym_set_flag(temp, NOT_NULL); + return temp; +} + +static _Py_UOpsSymType* +sym_new_null(_Py_UOpsAbstractInterpContext *ctx) +{ + _Py_UOpsSymType *null_sym = sym_new_unknown(ctx); + if (null_sym == NULL) { + return NULL; + } + sym_set_null(null_sym); + return null_sym; +} + + +static inline bool +sym_matches_type(_Py_UOpsSymType *sym, PyTypeObject *typ) +{ + assert(typ == NULL || PyType_Check(typ)); + if (!sym_has_flag(sym, KNOWN)) { + return false; + } + return sym->typ == typ; +} + + +static inline bool +op_is_end(uint32_t opcode) +{ + return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP; +} static int get_mutations(PyObject* dict) { @@ -199,14 +555,138 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, builtins = func->func_builtins; break; } - case _JUMP_TO_TOP: - case _EXIT_TRACE: - return 1; + default: + if (op_is_end(opcode)) { + return 1; + } + break; + } + } + return 0; +} + + + +#define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack)) + +#define GETLOCAL(idx) ((ctx->frame->locals[idx])) + +#define REPLACE_OP(INST, OP, ARG, OPERAND) \ + INST->opcode = OP; \ + INST->oparg = ARG; \ + INST->operand = OPERAND; + +#define _LOAD_ATTR_NOT_NULL \ + do { \ + attr = sym_new_known_notnull(ctx); \ + if (attr == NULL) { \ + goto error; \ + } \ + null = sym_new_null(ctx); \ + if (null == NULL) { \ + goto error; \ + } \ + } while (0); + + +/* 1 for success, 0 for not ready, cannot error at the moment. */ +static int +uop_redundancy_eliminator( + PyCodeObject *co, + _PyUOpInstruction *trace, + int trace_len, + int curr_stacklen +) +{ + + _Py_UOpsAbstractInterpContext context; + _Py_UOpsAbstractInterpContext *ctx = &context; + + if (abstractcontext_init( + ctx, + co, curr_stacklen, + trace_len) < 0) { + goto out_of_space; + } + + for (_PyUOpInstruction *this_instr = trace; + this_instr < trace + trace_len && !op_is_end(this_instr->opcode); + this_instr++) { + + int oparg = this_instr->oparg; + uint32_t opcode = this_instr->opcode; + + _Py_UOpsSymType **stack_pointer = ctx->frame->stack_pointer; + + DPRINTF(3, "Abstract interpreting %s:%d ", + _PyOpcode_uop_name[opcode], + oparg); + switch (opcode) { +#include "tier2_redundancy_eliminator_cases.c.h" + + default: + DPRINTF(1, "Unknown opcode in abstract interpreter\n"); + Py_UNREACHABLE(); } + assert(ctx->frame != NULL); + DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); + ctx->frame->stack_pointer = stack_pointer; + assert(STACK_LEVEL() >= 0); } + + abstractcontext_fini(ctx); + return 1; + +out_of_space: + DPRINTF(1, "Out of space in abstract interpreter\n"); + abstractcontext_fini(ctx); + return 0; + +error: + DPRINTF(1, "Encountered error in abstract interpreter\n"); + abstractcontext_fini(ctx); return 0; } + +static void +remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) +{ + int last_set_ip = -1; + bool maybe_invalid = false; + for (int pc = 0; pc < buffer_size; pc++) { + int opcode = buffer[pc].opcode; + if (opcode == _SET_IP) { + buffer[pc].opcode = NOP; + last_set_ip = pc; + } + else if (opcode == _CHECK_VALIDITY) { + if (maybe_invalid) { + maybe_invalid = false; + } + else { + buffer[pc].opcode = NOP; + } + } + else if (op_is_end(opcode)) { + break; + } + else { + if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { + maybe_invalid = true; + if (last_set_ip >= 0) { + buffer[last_set_ip].opcode = _SET_IP; + } + } + if ((_PyUop_Flags[opcode] & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) { + if (last_set_ip >= 0) { + buffer[last_set_ip].opcode = _SET_IP; + } + } + } + } +} + static void peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size) { @@ -250,44 +730,9 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s } } -static void -remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) -{ - int last_set_ip = -1; - bool maybe_invalid = false; - for (int pc = 0; pc < buffer_size; pc++) { - int opcode = buffer[pc].opcode; - if (opcode == _SET_IP) { - buffer[pc].opcode = NOP; - last_set_ip = pc; - } - else if (opcode == _CHECK_VALIDITY) { - if (maybe_invalid) { - maybe_invalid = false; - } - else { - buffer[pc].opcode = NOP; - } - } - else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { - break; - } - else { - if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { - maybe_invalid = true; - if (last_set_ip >= 0) { - buffer[last_set_ip].opcode = _SET_IP; - } - } - if ((_PyUop_Flags[opcode] & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) { - if (last_set_ip >= 0) { - buffer[last_set_ip].opcode = _SET_IP; - } - } - } - } -} - +// 0 - failure, no error raised, just fall back to Tier 1 +// -1 - failure, and raise error +// 1 - optimizer success int _Py_uop_analyze_and_optimize( _PyInterpreterFrame *frame, @@ -297,11 +742,33 @@ _Py_uop_analyze_and_optimize( _PyBloomFilter *dependencies ) { + OPT_STAT_INC(optimizer_attempts); + int err = remove_globals(frame, buffer, buffer_size, dependencies); - if (err <= 0) { - return err; + if (err == 0) { + goto not_ready; + } + if (err < 0) { + goto error; } + peephole_opt(frame, buffer, buffer_size); + + err = uop_redundancy_eliminator( + (PyCodeObject *)frame->f_executable, buffer, + buffer_size, curr_stacklen); + + if (err == 0) { + goto not_ready; + } + assert(err == 1); + remove_unneeded_uops(buffer, buffer_size); + + OPT_STAT_INC(optimizer_successes); return 1; +not_ready: + return 0; +error: + return -1; } |