diff options
Diffstat (limited to 'Python/optimizer_analysis.c')
| -rw-r--r-- | Python/optimizer_analysis.c | 230 |
1 files changed, 223 insertions, 7 deletions
diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index d122599..2cfbf4b 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -1,10 +1,12 @@ #include "Python.h" #include "opcode.h" +#include "pycore_dict.h" #include "pycore_interp.h" #include "pycore_opcode_metadata.h" #include "pycore_opcode_utils.h" #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_uop_metadata.h" +#include "pycore_dict.h" #include "pycore_long.h" #include "cpython/optimizer.h" #include <stdbool.h> @@ -12,9 +14,210 @@ #include <stddef.h> #include "pycore_optimizer.h" +static int +get_mutations(PyObject* dict) { + assert(PyDict_CheckExact(dict)); + PyDictObject *d = (PyDictObject *)dict; + return (d->ma_version_tag >> DICT_MAX_WATCHERS) & ((1 << DICT_WATCHED_MUTATION_BITS)-1); +} + static void -peephole_opt(PyCodeObject *co, _PyUOpInstruction *buffer, int buffer_size) +increment_mutations(PyObject* dict) { + assert(PyDict_CheckExact(dict)); + PyDictObject *d = (PyDictObject *)dict; + d->ma_version_tag += (1 << DICT_MAX_WATCHERS); +} + +static int +globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict, + PyObject* key, PyObject* new_value) +{ + if (event == PyDict_EVENT_CLONED) { + return 0; + } + uint64_t watched_mutations = get_mutations(dict); + if (watched_mutations < _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS) { + _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), dict); + increment_mutations(dict); + } + else { + PyDict_Unwatch(1, dict); + } + return 0; +} + + +static void +global_to_const(_PyUOpInstruction *inst, PyObject *obj) +{ + assert(inst->opcode == _LOAD_GLOBAL_MODULE || inst->opcode == _LOAD_GLOBAL_BUILTINS); + assert(PyDict_CheckExact(obj)); + PyDictObject *dict = (PyDictObject *)obj; + assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); + assert(inst->operand <= UINT16_MAX); + PyObject *res = entries[inst->operand].me_value; + if (res == NULL) { + return; + } + if (_Py_IsImmortal(res)) { + inst->opcode = (inst->oparg & 1) ? _LOAD_CONST_INLINE_BORROW_WITH_NULL : _LOAD_CONST_INLINE_BORROW; + } + else { + inst->opcode = (inst->oparg & 1) ? _LOAD_CONST_INLINE_WITH_NULL : _LOAD_CONST_INLINE; + } + inst->operand = (uint64_t)res; +} + +static int +incorrect_keys(_PyUOpInstruction *inst, PyObject *obj) { + if (!PyDict_CheckExact(obj)) { + return 1; + } + PyDictObject *dict = (PyDictObject *)obj; + if (dict->ma_keys->dk_version != inst->operand) { + return 1; + } + return 0; +} + +/* The first two dict watcher IDs are reserved for CPython, + * so we don't need to check that they haven't been used */ +#define BUILTINS_WATCHER_ID 0 +#define GLOBALS_WATCHER_ID 1 + +/* Returns 1 if successfully optimized + * 0 if the trace is not suitable for optimization (yet) + * -1 if there was an error. */ +static int +remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, + int buffer_size, _PyBloomFilter *dependencies) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyObject *builtins = frame->f_builtins; + if (builtins != interp->builtins) { + return 1; + } + PyObject *globals = frame->f_globals; + assert(PyFunction_Check(((PyFunctionObject *)frame->f_funcobj))); + assert(((PyFunctionObject *)frame->f_funcobj)->func_builtins == builtins); + assert(((PyFunctionObject *)frame->f_funcobj)->func_globals == globals); + /* In order to treat globals as constants, we need to + * know that the globals dict is the one we expected, and + * that it hasn't changed + * In order to treat builtins as constants, we need to + * know that the builtins dict is the one we expected, and + * that it hasn't changed and that the global dictionary's + * keys have not changed */ + + /* These values represent stacks of booleans (one bool per bit). + * Pushing a frame shifts left, popping a frame shifts right. */ + uint32_t builtins_checked = 0; + uint32_t builtins_watched = 0; + uint32_t globals_checked = 0; + uint32_t globals_watched = 0; + if (interp->dict_state.watchers[1] == NULL) { + interp->dict_state.watchers[1] = globals_watcher_callback; + } + for (int pc = 0; pc < buffer_size; pc++) { + _PyUOpInstruction *inst = &buffer[pc]; + int opcode = inst->opcode; + switch(opcode) { + case _GUARD_BUILTINS_VERSION: + if (incorrect_keys(inst, builtins)) { + return 0; + } + if (interp->rare_events.builtin_dict >= _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS) { + continue; + } + if ((builtins_watched & 1) == 0) { + PyDict_Watch(BUILTINS_WATCHER_ID, builtins); + builtins_watched |= 1; + } + if (builtins_checked & 1) { + buffer[pc].opcode = NOP; + } + else { + buffer[pc].opcode = _CHECK_BUILTINS; + buffer[pc].operand = (uintptr_t)builtins; + builtins_checked |= 1; + } + break; + case _GUARD_GLOBALS_VERSION: + if (incorrect_keys(inst, globals)) { + return 0; + } + uint64_t watched_mutations = get_mutations(globals); + if (watched_mutations >= _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS) { + continue; + } + if ((globals_watched & 1) == 0) { + PyDict_Watch(GLOBALS_WATCHER_ID, globals); + _Py_BloomFilter_Add(dependencies, globals); + globals_watched |= 1; + } + if (globals_checked & 1) { + buffer[pc].opcode = NOP; + } + else { + buffer[pc].opcode = _CHECK_GLOBALS; + buffer[pc].operand = (uintptr_t)globals; + globals_checked |= 1; + } + break; + case _LOAD_GLOBAL_BUILTINS: + if (globals_checked & builtins_checked & globals_watched & builtins_watched & 1) { + global_to_const(inst, builtins); + } + break; + case _LOAD_GLOBAL_MODULE: + if (globals_checked & globals_watched & 1) { + global_to_const(inst, globals); + } + break; + case _PUSH_FRAME: + { + globals_checked <<= 1; + globals_watched <<= 1; + builtins_checked <<= 1; + builtins_watched <<= 1; + PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand; + if (func == NULL) { + return 1; + } + assert(PyFunction_Check(func)); + globals = func->func_globals; + builtins = func->func_builtins; + if (builtins != interp->builtins) { + return 1; + } + break; + } + case _POP_FRAME: + { + globals_checked >>= 1; + globals_watched >>= 1; + builtins_checked >>= 1; + builtins_watched >>= 1; + PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand; + assert(PyFunction_Check(func)); + globals = func->func_globals; + builtins = func->func_builtins; + break; + } + case _JUMP_TO_TOP: + case _EXIT_TRACE: + return 1; + } + } + return 0; +} + +static void +peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size) +{ + PyCodeObject *co = (PyCodeObject *)frame->f_executable; for (int pc = 0; pc < buffer_size; pc++) { int opcode = buffer[pc].opcode; switch(opcode) { @@ -36,8 +239,17 @@ peephole_opt(PyCodeObject *co, _PyUOpInstruction *buffer, int buffer_size) } case _PUSH_FRAME: case _POP_FRAME: - co = (PyCodeObject *)buffer[pc].operand; + { + PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand; + if (func == NULL) { + co = NULL; + } + else { + assert(PyFunction_Check(func)); + co = (PyCodeObject *)func->func_code; + } break; + } case _JUMP_TO_TOP: case _EXIT_TRACE: return; @@ -83,16 +295,20 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } } - int _Py_uop_analyze_and_optimize( - PyCodeObject *co, + _PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size, - int curr_stacklen + int curr_stacklen, + _PyBloomFilter *dependencies ) { - peephole_opt(co, buffer, buffer_size); + int err = remove_globals(frame, buffer, buffer_size, dependencies); + if (err <= 0) { + return err; + } + peephole_opt(frame, buffer, buffer_size); remove_unneeded_uops(buffer, buffer_size); - return 0; + return 1; } |
