#include "Python.h" #include "opcode.h" #include "pycore_dict.h" #include "pycore_interp.h" #include "pycore_opcode_metadata.h" #include "pycore_opcode_utils.h" #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_uop_metadata.h" #include "pycore_dict.h" #include "pycore_long.h" #include "cpython/optimizer.h" #include #include #include #include "pycore_optimizer.h" static int get_mutations(PyObject* dict) { assert(PyDict_CheckExact(dict)); PyDictObject *d = (PyDictObject *)dict; return (d->ma_version_tag >> DICT_MAX_WATCHERS) & ((1 << DICT_WATCHED_MUTATION_BITS)-1); } static void increment_mutations(PyObject* dict) { assert(PyDict_CheckExact(dict)); PyDictObject *d = (PyDictObject *)dict; d->ma_version_tag += (1 << DICT_MAX_WATCHERS); } /* The first two dict watcher IDs are reserved for CPython, * so we don't need to check that they haven't been used */ #define BUILTINS_WATCHER_ID 0 #define GLOBALS_WATCHER_ID 1 static int globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict, PyObject* key, PyObject* new_value) { RARE_EVENT_STAT_INC(watched_globals_modification); assert(get_mutations(dict) < _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS); _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), dict); increment_mutations(dict); PyDict_Unwatch(GLOBALS_WATCHER_ID, dict); return 0; } static void global_to_const(_PyUOpInstruction *inst, PyObject *obj) { assert(inst->opcode == _LOAD_GLOBAL_MODULE || inst->opcode == _LOAD_GLOBAL_BUILTINS); assert(PyDict_CheckExact(obj)); PyDictObject *dict = (PyDictObject *)obj; assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); assert(inst->operand <= UINT16_MAX); PyObject *res = entries[inst->operand].me_value; if (res == NULL) { return; } if (_Py_IsImmortal(res)) { inst->opcode = (inst->oparg & 1) ? _LOAD_CONST_INLINE_BORROW_WITH_NULL : _LOAD_CONST_INLINE_BORROW; } else { inst->opcode = (inst->oparg & 1) ? _LOAD_CONST_INLINE_WITH_NULL : _LOAD_CONST_INLINE; } inst->operand = (uint64_t)res; } static int incorrect_keys(_PyUOpInstruction *inst, PyObject *obj) { if (!PyDict_CheckExact(obj)) { return 1; } PyDictObject *dict = (PyDictObject *)obj; if (dict->ma_keys->dk_version != inst->operand) { return 1; } return 0; } /* Returns 1 if successfully optimized * 0 if the trace is not suitable for optimization (yet) * -1 if there was an error. */ static int remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size, _PyBloomFilter *dependencies) { PyInterpreterState *interp = _PyInterpreterState_GET(); PyObject *builtins = frame->f_builtins; if (builtins != interp->builtins) { return 1; } PyObject *globals = frame->f_globals; assert(PyFunction_Check(((PyFunctionObject *)frame->f_funcobj))); assert(((PyFunctionObject *)frame->f_funcobj)->func_builtins == builtins); assert(((PyFunctionObject *)frame->f_funcobj)->func_globals == globals); /* In order to treat globals as constants, we need to * know that the globals dict is the one we expected, and * that it hasn't changed * In order to treat builtins as constants, we need to * know that the builtins dict is the one we expected, and * that it hasn't changed and that the global dictionary's * keys have not changed */ /* These values represent stacks of booleans (one bool per bit). * Pushing a frame shifts left, popping a frame shifts right. */ uint32_t builtins_checked = 0; uint32_t builtins_watched = 0; uint32_t globals_checked = 0; uint32_t globals_watched = 0; if (interp->dict_state.watchers[GLOBALS_WATCHER_ID] == NULL) { interp->dict_state.watchers[GLOBALS_WATCHER_ID] = globals_watcher_callback; } for (int pc = 0; pc < buffer_size; pc++) { _PyUOpInstruction *inst = &buffer[pc]; int opcode = inst->opcode; switch(opcode) { case _GUARD_BUILTINS_VERSION: if (incorrect_keys(inst, builtins)) { return 0; } if (interp->rare_events.builtin_dict >= _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS) { continue; } if ((builtins_watched & 1) == 0) { PyDict_Watch(BUILTINS_WATCHER_ID, builtins); builtins_watched |= 1; } if (builtins_checked & 1) { buffer[pc].opcode = NOP; } else { buffer[pc].opcode = _CHECK_BUILTINS; buffer[pc].operand = (uintptr_t)builtins; builtins_checked |= 1; } break; case _GUARD_GLOBALS_VERSION: if (incorrect_keys(inst, globals)) { return 0; } uint64_t watched_mutations = get_mutations(globals); if (watched_mutations >= _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS) { continue; } if ((globals_watched & 1) == 0) { PyDict_Watch(GLOBALS_WATCHER_ID, globals); _Py_BloomFilter_Add(dependencies, globals); globals_watched |= 1; } if (globals_checked & 1) { buffer[pc].opcode = NOP; } else { buffer[pc].opcode = _CHECK_GLOBALS; buffer[pc].operand = (uintptr_t)globals; globals_checked |= 1; } break; case _LOAD_GLOBAL_BUILTINS: if (globals_checked & builtins_checked & globals_watched & builtins_watched & 1) { global_to_const(inst, builtins); } break; case _LOAD_GLOBAL_MODULE: if (globals_checked & globals_watched & 1) { global_to_const(inst, globals); } break; case _PUSH_FRAME: { globals_checked <<= 1; globals_watched <<= 1; builtins_checked <<= 1; builtins_watched <<= 1; PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand; if (func == NULL) { return 1; } assert(PyFunction_Check(func)); globals = func->func_globals; builtins = func->func_builtins; if (builtins != interp->builtins) { return 1; } break; } case _POP_FRAME: { globals_checked >>= 1; globals_watched >>= 1; builtins_checked >>= 1; builtins_watched >>= 1; PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand; assert(PyFunction_Check(func)); globals = func->func_globals; builtins = func->func_builtins; break; } case _JUMP_TO_TOP: case _EXIT_TRACE: return 1; } } return 0; } static void peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size) { PyCodeObject *co = (PyCodeObject *)frame->f_executable; for (int pc = 0; pc < buffer_size; pc++) { int opcode = buffer[pc].opcode; switch(opcode) { case _LOAD_CONST: { assert(co != NULL); PyObject *val = PyTuple_GET_ITEM(co->co_consts, buffer[pc].oparg); buffer[pc].opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; buffer[pc].operand = (uintptr_t)val; break; } case _CHECK_PEP_523: { /* Setting the eval frame function invalidates * all executors, so no need to check dynamically */ if (_PyInterpreterState_GET()->eval_frame == NULL) { buffer[pc].opcode = _NOP; } break; } case _PUSH_FRAME: case _POP_FRAME: { PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand; if (func == NULL) { co = NULL; } else { assert(PyFunction_Check(func)); co = (PyCodeObject *)func->func_code; } break; } case _JUMP_TO_TOP: case _EXIT_TRACE: return; } } } static void remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) { int last_set_ip = -1; bool maybe_invalid = false; for (int pc = 0; pc < buffer_size; pc++) { int opcode = buffer[pc].opcode; if (opcode == _SET_IP) { buffer[pc].opcode = NOP; last_set_ip = pc; } else if (opcode == _CHECK_VALIDITY) { if (maybe_invalid) { maybe_invalid = false; } else { buffer[pc].opcode = NOP; } } else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { break; } else { if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { maybe_invalid = true; if (last_set_ip >= 0) { buffer[last_set_ip].opcode = _SET_IP; } } if ((_PyUop_Flags[opcode] & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) { if (last_set_ip >= 0) { buffer[last_set_ip].opcode = _SET_IP; } } } } } int _Py_uop_analyze_and_optimize( _PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size, int curr_stacklen, _PyBloomFilter *dependencies ) { int err = remove_globals(frame, buffer, buffer_size, dependencies); if (err <= 0) { return err; } peephole_opt(frame, buffer, buffer_size); remove_unneeded_uops(buffer, buffer_size); return 1; }