diff options
author | Saul Shanabrook <s.shanabrook@gmail.com> | 2024-06-08 09:41:45 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-08 09:41:45 (GMT) |
commit | 55402d3232ca400ebafe4fe3bd70f252304ebe07 (patch) | |
tree | ac32d1d0583c4a2e4f82a7352fdb836c49145d9a /Python | |
parent | 2080425154d235b4b7dcc9a8a2f58e71769125ca (diff) | |
download | cpython-55402d3232ca400ebafe4fe3bd70f252304ebe07.zip cpython-55402d3232ca400ebafe4fe3bd70f252304ebe07.tar.gz cpython-55402d3232ca400ebafe4fe3bd70f252304ebe07.tar.bz2 |
gh-119258: Eliminate Type Guards in Tier 2 Optimizer with Watcher (GH-119365)
Co-authored-by: parmeggiani <parmeggiani@spaziodati.eu>
Co-authored-by: dpdani <git@danieleparmeggiani.me>
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
Co-authored-by: Brandt Bucher <brandtbucher@microsoft.com>
Co-authored-by: Ken Jin <kenjin@python.org>
Diffstat (limited to 'Python')
-rw-r--r-- | Python/optimizer_analysis.c | 16 | ||||
-rw-r--r-- | Python/optimizer_bytecodes.c | 37 | ||||
-rw-r--r-- | Python/optimizer_cases.c.h | 33 | ||||
-rw-r--r-- | Python/optimizer_symbols.c | 46 |
4 files changed, 107 insertions, 25 deletions
diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index e5d3793..75d1d9f 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -79,6 +79,7 @@ increment_mutations(PyObject* dict) { * so we don't need to check that they haven't been used */ #define BUILTINS_WATCHER_ID 0 #define GLOBALS_WATCHER_ID 1 +#define TYPE_WATCHER_ID 0 static int globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict, @@ -92,6 +93,14 @@ globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict, return 0; } +static int +type_watcher_callback(PyTypeObject* type) +{ + _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), type, 1); + PyType_Unwatch(TYPE_WATCHER_ID, (PyObject *)type); + return 0; +} + static PyObject * convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj) { @@ -167,6 +176,9 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, if (interp->dict_state.watchers[GLOBALS_WATCHER_ID] == NULL) { interp->dict_state.watchers[GLOBALS_WATCHER_ID] = globals_watcher_callback; } + if (interp->type_watchers[TYPE_WATCHER_ID] == NULL) { + interp->type_watchers[TYPE_WATCHER_ID] = type_watcher_callback; + } for (int pc = 0; pc < buffer_size; pc++) { _PyUOpInstruction *inst = &buffer[pc]; int opcode = inst->opcode; @@ -310,9 +322,11 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_has_type _Py_uop_sym_has_type #define sym_get_type _Py_uop_sym_get_type #define sym_matches_type _Py_uop_sym_matches_type +#define sym_matches_type_version _Py_uop_sym_matches_type_version #define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM) #define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM) #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) +#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) #define sym_is_bottom _Py_uop_sym_is_bottom #define sym_truthiness _Py_uop_sym_truthiness @@ -395,7 +409,7 @@ optimize_uops( _PyUOpInstruction *corresponding_check_stack = NULL; _Py_uop_abstractcontext_init(ctx); - _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, ctx->n_consumed, 0, curr_stacklen); + _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, curr_stacklen, NULL, 0); if (frame == NULL) { return -1; } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index a2cb4c0..e6fb85a 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -21,11 +21,13 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_const _Py_uop_sym_new_const #define sym_new_null _Py_uop_sym_new_null #define sym_matches_type _Py_uop_sym_matches_type +#define sym_matches_type_version _Py_uop_sym_matches_type_version #define sym_get_type _Py_uop_sym_get_type #define sym_has_type _Py_uop_sym_has_type #define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM) #define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM) #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) +#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) #define sym_is_bottom _Py_uop_sym_is_bottom #define frame_new _Py_uop_frame_new @@ -113,6 +115,29 @@ dummy_func(void) { sym_set_type(right, &PyLong_Type); } + op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { + assert(type_version); + if (sym_matches_type_version(owner, type_version)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } else { + // add watcher so that whenever the type changes we invalidate this + PyTypeObject *type = _PyType_LookupByVersion(type_version); + // if the type is null, it was not found in the cache (there was a conflict) + // with the key, in which case we can't trust the version + if (type) { + // if the type version was set properly, then add a watcher + // if it wasn't this means that the type version was previously set to something else + // and we set the owner to bottom, so we don't need to add a watcher because we must have + // already added one earlier. + if (sym_set_type_version(owner, type_version)) { + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); + _Py_BloomFilter_Add(dependencies, type); + } + } + + } + } + op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) { if (sym_matches_type(left, &PyFloat_Type)) { if (sym_matches_type(right, &PyFloat_Type)) { @@ -563,16 +588,12 @@ dummy_func(void) { argcount++; } - _Py_UopsSymbol **localsplus_start = ctx->n_consumed; - int n_locals_already_filled = 0; - // Can determine statically, so we interleave the new locals - // and make the current stack the new locals. - // This also sets up for true call inlining. if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { - localsplus_start = args; - n_locals_already_filled = argcount; + new_frame = frame_new(ctx, co, 0, args, argcount); + } else { + new_frame = frame_new(ctx, co, 0, NULL, 0); + } - new_frame = frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0); } op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index b378734..18f3ca4 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -930,6 +930,28 @@ } case _GUARD_TYPE_VERSION: { + _Py_UopsSymbol *owner; + owner = stack_pointer[-1]; + uint32_t type_version = (uint32_t)this_instr->operand; + assert(type_version); + if (sym_matches_type_version(owner, type_version)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } else { + // add watcher so that whenever the type changes we invalidate this + PyTypeObject *type = _PyType_LookupByVersion(type_version); + // if the type is null, it was not found in the cache (there was a conflict) + // with the key, in which case we can't trust the version + if (type) { + // if the type version was set properly, then add a watcher + // if it wasn't this means that the type version was previously set to something else + // and we set the owner to bottom, so we don't need to add a watcher because we must have + // already added one earlier. + if (sym_set_type_version(owner, type_version)) { + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); + _Py_BloomFilter_Add(dependencies, type); + } + } + } break; } @@ -1583,16 +1605,11 @@ args--; argcount++; } - _Py_UopsSymbol **localsplus_start = ctx->n_consumed; - int n_locals_already_filled = 0; - // Can determine statically, so we interleave the new locals - // and make the current stack the new locals. - // This also sets up for true call inlining. if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { - localsplus_start = args; - n_locals_already_filled = argcount; + new_frame = frame_new(ctx, co, 0, args, argcount); + } else { + new_frame = frame_new(ctx, co, 0, NULL, 0); } - new_frame = frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0); stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)new_frame; stack_pointer += -1 - oparg; break; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index e546eef..f3d4078 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -52,7 +52,8 @@ static inline int get_lltrace(void) { static _Py_UopsSymbol NO_SPACE_SYMBOL = { .flags = IS_NULL | NOT_NULL | NO_SPACE, .typ = NULL, - .const_val = NULL + .const_val = NULL, + .type_version = 0, }; _Py_UopsSymbol * @@ -76,6 +77,7 @@ sym_new(_Py_UOpsContext *ctx) self->flags = 0; self->typ = NULL; self->const_val = NULL; + self->type_version = 0; return self; } @@ -152,6 +154,18 @@ _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *ty } } +bool +_Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version) +{ + // if the type version was already set, then it must be different and we should set it to bottom + if (sym->type_version) { + sym_set_bottom(ctx, sym); + return false; + } + sym->type_version = version; + return true; +} + void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val) { @@ -256,6 +270,12 @@ _Py_uop_sym_get_type(_Py_UopsSymbol *sym) return sym->typ; } +unsigned int +_Py_uop_sym_get_type_version(_Py_UopsSymbol *sym) +{ + return sym->type_version; +} + bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym) { @@ -272,6 +292,13 @@ _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ) return _Py_uop_sym_get_type(sym) == typ; } +bool +_Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int version) +{ + return _Py_uop_sym_get_type_version(sym) == version; +} + + int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym) { @@ -311,9 +338,9 @@ _Py_UOpsAbstractFrame * _Py_uop_frame_new( _Py_UOpsContext *ctx, PyCodeObject *co, - _Py_UopsSymbol **localsplus_start, - int n_locals_already_filled, - int curr_stackentries) + int curr_stackentries, + _Py_UopsSymbol **args, + int arg_len) { assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH); _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth]; @@ -321,19 +348,22 @@ _Py_uop_frame_new( frame->stack_len = co->co_stacksize; frame->locals_len = co->co_nlocalsplus; - frame->locals = localsplus_start; + frame->locals = ctx->n_consumed; frame->stack = frame->locals + co->co_nlocalsplus; frame->stack_pointer = frame->stack + curr_stackentries; - ctx->n_consumed = localsplus_start + (co->co_nlocalsplus + co->co_stacksize); + ctx->n_consumed = ctx->n_consumed + (co->co_nlocalsplus + co->co_stacksize); if (ctx->n_consumed >= ctx->limit) { ctx->done = true; ctx->out_of_space = true; return NULL; } - // Initialize with the initial state of all local variables - for (int i = n_locals_already_filled; i < co->co_nlocalsplus; i++) { + for (int i = 0; i < arg_len; i++) { + frame->locals[i] = args[i]; + } + + for (int i = arg_len; i < co->co_nlocalsplus; i++) { _Py_UopsSymbol *local = _Py_uop_sym_new_unknown(ctx); frame->locals[i] = local; } |