summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/internal/pycore_code.h7
-rw-r--r--Include/opcode.h47
-rw-r--r--Lib/opcode.py1
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst1
-rw-r--r--Python/ceval.c45
-rw-r--r--Python/opcode_targets.h28
-rw-r--r--Python/specialize.c107
7 files changed, 176 insertions, 60 deletions
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index d464f3d..482bd7e 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -35,6 +35,12 @@ typedef struct {
PyObject *obj;
} _PyObjectCache;
+typedef struct {
+ uint32_t func_version;
+ uint16_t defaults_start;
+ uint16_t defaults_len;
+} _PyCallCache;
+
/* Add specialized versions of entries to this union.
*
* Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
@@ -51,6 +57,7 @@ typedef union {
_PyAttrCache attr;
_PyLoadGlobalCache load_global;
_PyObjectCache obj;
+ _PyCallCache call;
} SpecializedCacheEntry;
#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
diff --git a/Include/opcode.h b/Include/opcode.h
index 22d968e..f8c02b8 100644
--- a/Include/opcode.h
+++ b/Include/opcode.h
@@ -153,29 +153,30 @@ extern "C" {
#define CALL_FUNCTION_BUILTIN_FAST 45
#define CALL_FUNCTION_LEN 46
#define CALL_FUNCTION_ISINSTANCE 47
-#define JUMP_ABSOLUTE_QUICK 48
-#define LOAD_ATTR_ADAPTIVE 58
-#define LOAD_ATTR_INSTANCE_VALUE 80
-#define LOAD_ATTR_WITH_HINT 81
-#define LOAD_ATTR_SLOT 87
-#define LOAD_ATTR_MODULE 88
-#define LOAD_GLOBAL_ADAPTIVE 120
-#define LOAD_GLOBAL_MODULE 122
-#define LOAD_GLOBAL_BUILTIN 123
-#define LOAD_METHOD_ADAPTIVE 127
-#define LOAD_METHOD_CACHED 128
-#define LOAD_METHOD_CLASS 134
-#define LOAD_METHOD_MODULE 140
-#define LOAD_METHOD_NO_DICT 143
-#define STORE_ATTR_ADAPTIVE 149
-#define STORE_ATTR_INSTANCE_VALUE 150
-#define STORE_ATTR_SLOT 151
-#define STORE_ATTR_WITH_HINT 153
-#define LOAD_FAST__LOAD_FAST 154
-#define STORE_FAST__LOAD_FAST 158
-#define LOAD_FAST__LOAD_CONST 159
-#define LOAD_CONST__LOAD_FAST 167
-#define STORE_FAST__STORE_FAST 168
+#define CALL_FUNCTION_PY_SIMPLE 48
+#define JUMP_ABSOLUTE_QUICK 58
+#define LOAD_ATTR_ADAPTIVE 80
+#define LOAD_ATTR_INSTANCE_VALUE 81
+#define LOAD_ATTR_WITH_HINT 87
+#define LOAD_ATTR_SLOT 88
+#define LOAD_ATTR_MODULE 120
+#define LOAD_GLOBAL_ADAPTIVE 122
+#define LOAD_GLOBAL_MODULE 123
+#define LOAD_GLOBAL_BUILTIN 127
+#define LOAD_METHOD_ADAPTIVE 128
+#define LOAD_METHOD_CACHED 134
+#define LOAD_METHOD_CLASS 140
+#define LOAD_METHOD_MODULE 143
+#define LOAD_METHOD_NO_DICT 149
+#define STORE_ATTR_ADAPTIVE 150
+#define STORE_ATTR_INSTANCE_VALUE 151
+#define STORE_ATTR_SLOT 153
+#define STORE_ATTR_WITH_HINT 154
+#define LOAD_FAST__LOAD_FAST 158
+#define STORE_FAST__LOAD_FAST 159
+#define LOAD_FAST__LOAD_CONST 167
+#define LOAD_CONST__LOAD_FAST 168
+#define STORE_FAST__STORE_FAST 169
#define DO_TRACING 255
#ifdef NEED_OPCODE_JUMP_TABLES
static uint32_t _PyOpcode_RelativeJump[8] = {
diff --git a/Lib/opcode.py b/Lib/opcode.py
index fe6066f..5377ec3 100644
--- a/Lib/opcode.py
+++ b/Lib/opcode.py
@@ -237,6 +237,7 @@ _specialized_instructions = [
"CALL_FUNCTION_BUILTIN_FAST",
"CALL_FUNCTION_LEN",
"CALL_FUNCTION_ISINSTANCE",
+ "CALL_FUNCTION_PY_SIMPLE",
"JUMP_ABSOLUTE_QUICK",
"LOAD_ATTR_ADAPTIVE",
"LOAD_ATTR_INSTANCE_VALUE",
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst b/Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst
new file mode 100644
index 0000000..6ab1d05
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-10-20-11-57-31.bpo-44525.veL4lJ.rst
@@ -0,0 +1 @@
+Specialize simple calls to Python functions (no starargs, keyowrd dict, or closure)
diff --git a/Python/ceval.c b/Python/ceval.c
index 7632590..f4186da 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -4720,9 +4720,9 @@ check_eval_breaker:
TARGET(CALL_FUNCTION_ADAPTIVE) {
SpecializedCacheEntry *cache = GET_CACHE();
+ nargs = cache->adaptive.original_oparg;
if (cache->adaptive.counter == 0) {
next_instr--;
- int nargs = cache->adaptive.original_oparg;
if (_Py_Specialize_CallFunction(
PEEK(nargs + 1), next_instr, nargs, cache, BUILTINS()) < 0) {
goto error;
@@ -4732,9 +4732,48 @@ check_eval_breaker:
else {
STAT_INC(CALL_FUNCTION, deferred);
cache->adaptive.counter--;
- oparg = cache->adaptive.original_oparg;
- JUMP_TO_INSTRUCTION(CALL_FUNCTION);
+ oparg = nargs;
+ kwnames = NULL;
+ postcall_shrink = 1;
+ goto call_function;
+ }
+ }
+
+ TARGET(CALL_FUNCTION_PY_SIMPLE) {
+ SpecializedCacheEntry *caches = GET_CACHE();
+ _PyAdaptiveEntry *cache0 = &caches[0].adaptive;
+ int argcount = cache0->original_oparg;
+ _PyCallCache *cache1 = &caches[-1].call;
+ PyObject *callable = PEEK(argcount+1);
+ DEOPT_IF(!PyFunction_Check(callable), CALL_FUNCTION);
+ PyFunctionObject *func = (PyFunctionObject *)callable;
+ DEOPT_IF(func->func_version != cache1->func_version, CALL_FUNCTION);
+ /* PEP 523 */
+ DEOPT_IF(tstate->interp->eval_frame != NULL, CALL_FUNCTION);
+ STAT_INC(CALL_FUNCTION, hit);
+ record_cache_hit(cache0);
+ InterpreterFrame *new_frame = _PyThreadState_PushFrame(
+ tstate, PyFunction_AS_FRAME_CONSTRUCTOR(func), NULL);
+ if (new_frame == NULL) {
+ goto error;
+ }
+ STACK_SHRINK(argcount);
+ for (int i = 0; i < argcount; i++) {
+ new_frame->localsplus[i] = stack_pointer[i];
+ }
+ int deflen = cache1->defaults_len;
+ for (int i = 0; i < deflen; i++) {
+ PyObject *def = PyTuple_GET_ITEM(func->func_defaults, cache1->defaults_start+i);
+ Py_INCREF(def);
+ new_frame->localsplus[argcount+i] = def;
}
+ STACK_SHRINK(1);
+ Py_DECREF(func);
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ new_frame->previous = tstate->frame;
+ new_frame->depth = frame->depth + 1;
+ tstate->frame = frame = new_frame;
+ goto start_frame;
}
TARGET(CALL_FUNCTION_BUILTIN_O) {
diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h
index 4179689..5c7d3ad 100644
--- a/Python/opcode_targets.h
+++ b/Python/opcode_targets.h
@@ -47,7 +47,7 @@ static void *opcode_targets[256] = {
&&TARGET_CALL_FUNCTION_BUILTIN_FAST,
&&TARGET_CALL_FUNCTION_LEN,
&&TARGET_CALL_FUNCTION_ISINSTANCE,
- &&TARGET_JUMP_ABSOLUTE_QUICK,
+ &&TARGET_CALL_FUNCTION_PY_SIMPLE,
&&TARGET_WITH_EXCEPT_START,
&&TARGET_GET_AITER,
&&TARGET_GET_ANEXT,
@@ -57,7 +57,7 @@ static void *opcode_targets[256] = {
&&TARGET_INPLACE_ADD,
&&TARGET_INPLACE_SUBTRACT,
&&TARGET_INPLACE_MULTIPLY,
- &&TARGET_LOAD_ATTR_ADAPTIVE,
+ &&TARGET_JUMP_ABSOLUTE_QUICK,
&&TARGET_INPLACE_MODULO,
&&TARGET_STORE_SUBSCR,
&&TARGET_DELETE_SUBSCR,
@@ -79,15 +79,15 @@ static void *opcode_targets[256] = {
&&TARGET_INPLACE_AND,
&&TARGET_INPLACE_XOR,
&&TARGET_INPLACE_OR,
+ &&TARGET_LOAD_ATTR_ADAPTIVE,
&&TARGET_LOAD_ATTR_INSTANCE_VALUE,
- &&TARGET_LOAD_ATTR_WITH_HINT,
&&TARGET_LIST_TO_TUPLE,
&&TARGET_RETURN_VALUE,
&&TARGET_IMPORT_STAR,
&&TARGET_SETUP_ANNOTATIONS,
&&TARGET_YIELD_VALUE,
+ &&TARGET_LOAD_ATTR_WITH_HINT,
&&TARGET_LOAD_ATTR_SLOT,
- &&TARGET_LOAD_ATTR_MODULE,
&&TARGET_POP_EXCEPT,
&&TARGET_STORE_NAME,
&&TARGET_DELETE_NAME,
@@ -119,46 +119,46 @@ static void *opcode_targets[256] = {
&&TARGET_IS_OP,
&&TARGET_CONTAINS_OP,
&&TARGET_RERAISE,
- &&TARGET_LOAD_GLOBAL_ADAPTIVE,
+ &&TARGET_LOAD_ATTR_MODULE,
&&TARGET_JUMP_IF_NOT_EXC_MATCH,
+ &&TARGET_LOAD_GLOBAL_ADAPTIVE,
&&TARGET_LOAD_GLOBAL_MODULE,
- &&TARGET_LOAD_GLOBAL_BUILTIN,
&&TARGET_LOAD_FAST,
&&TARGET_STORE_FAST,
&&TARGET_DELETE_FAST,
+ &&TARGET_LOAD_GLOBAL_BUILTIN,
&&TARGET_LOAD_METHOD_ADAPTIVE,
- &&TARGET_LOAD_METHOD_CACHED,
&&TARGET_GEN_START,
&&TARGET_RAISE_VARARGS,
&&TARGET_CALL_FUNCTION,
&&TARGET_MAKE_FUNCTION,
&&TARGET_BUILD_SLICE,
- &&TARGET_LOAD_METHOD_CLASS,
+ &&TARGET_LOAD_METHOD_CACHED,
&&TARGET_MAKE_CELL,
&&TARGET_LOAD_CLOSURE,
&&TARGET_LOAD_DEREF,
&&TARGET_STORE_DEREF,
&&TARGET_DELETE_DEREF,
- &&TARGET_LOAD_METHOD_MODULE,
+ &&TARGET_LOAD_METHOD_CLASS,
&&TARGET_CALL_FUNCTION_KW,
&&TARGET_CALL_FUNCTION_EX,
- &&TARGET_LOAD_METHOD_NO_DICT,
+ &&TARGET_LOAD_METHOD_MODULE,
&&TARGET_EXTENDED_ARG,
&&TARGET_LIST_APPEND,
&&TARGET_SET_ADD,
&&TARGET_MAP_ADD,
&&TARGET_LOAD_CLASSDEREF,
+ &&TARGET_LOAD_METHOD_NO_DICT,
&&TARGET_STORE_ATTR_ADAPTIVE,
&&TARGET_STORE_ATTR_INSTANCE_VALUE,
- &&TARGET_STORE_ATTR_SLOT,
&&TARGET_MATCH_CLASS,
+ &&TARGET_STORE_ATTR_SLOT,
&&TARGET_STORE_ATTR_WITH_HINT,
- &&TARGET_LOAD_FAST__LOAD_FAST,
&&TARGET_FORMAT_VALUE,
&&TARGET_BUILD_CONST_KEY_MAP,
&&TARGET_BUILD_STRING,
+ &&TARGET_LOAD_FAST__LOAD_FAST,
&&TARGET_STORE_FAST__LOAD_FAST,
- &&TARGET_LOAD_FAST__LOAD_CONST,
&&TARGET_LOAD_METHOD,
&&TARGET_CALL_METHOD,
&&TARGET_LIST_EXTEND,
@@ -166,6 +166,7 @@ static void *opcode_targets[256] = {
&&TARGET_DICT_MERGE,
&&TARGET_DICT_UPDATE,
&&TARGET_CALL_METHOD_KW,
+ &&TARGET_LOAD_FAST__LOAD_CONST,
&&TARGET_LOAD_CONST__LOAD_FAST,
&&TARGET_STORE_FAST__STORE_FAST,
&&_unknown_opcode,
@@ -253,6 +254,5 @@ static void *opcode_targets[256] = {
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
- &&_unknown_opcode,
&&TARGET_DO_TRACING
};
diff --git a/Python/specialize.c b/Python/specialize.c
index ee573d2..5cc7082 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -249,7 +249,7 @@ static uint8_t cache_requirements[256] = {
[BINARY_ADD] = 0,
[BINARY_MULTIPLY] = 0,
[BINARY_SUBSCR] = 0,
- [CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache */
+ [CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
[STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
};
@@ -461,15 +461,20 @@ _Py_Quicken(PyCodeObject *code) {
#define SPEC_FAIL_NON_FUNCTION_SCOPE 11
#define SPEC_FAIL_DIFFERENT_TYPES 12
-/* Call function */
+/* Calls */
+#define SPEC_FAIL_GENERATOR 7
+#define SPEC_FAIL_COMPLEX_PARAMETERS 8
+#define SPEC_FAIL_WRONG_NUMBER_ARGUMENTS 9
+#define SPEC_FAIL_CO_NOT_OPTIMIZED 10
+/* SPEC_FAIL_METHOD defined as 11 above */
+#define SPEC_FAIL_FREE_VARS 12
+#define SPEC_FAIL_PYCFUNCTION 13
+#define SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS 14
+#define SPEC_FAIL_PYCFUNCTION_FAST_WITH_KEYWORDS 15
+#define SPEC_FAIL_PYCFUNCTION_NOARGS 16
+#define SPEC_FAIL_BAD_CALL_FLAGS 17
+#define SPEC_FAIL_CLASS 18
-#define SPEC_FAIL_PYCFUNCTION 10
-#define SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS 13
-#define SPEC_FAIL_PYCFUNCTION_FAST_WITH_KEYWORDS 14
-#define SPEC_FAIL_PYCFUNCTION_NOARGS 15
-#define SPEC_FAIL_BAD_CALL_FLAGS 16
-#define SPEC_FAIL_PYTHON_FUNCTION 17
-#define SPEC_FAIL_IMMUTABLE_CLASS 18
static int
specialize_module_load_attr(
@@ -1236,6 +1241,69 @@ success:
return 0;
}
+static int
+specialize_class_call(
+ PyObject *callable, _Py_CODEUNIT *instr,
+ int nargs, SpecializedCacheEntry *cache)
+{
+ SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CLASS);
+ return -1;
+}
+
+static int
+specialize_py_call(
+ PyFunctionObject *func, _Py_CODEUNIT *instr,
+ int nargs, SpecializedCacheEntry *cache)
+{
+ _PyCallCache *cache1 = &cache[-1].call;
+ /* Exclude generator or coroutines for now */
+ PyCodeObject *code = (PyCodeObject *)func->func_code;
+ int flags = code->co_flags;
+ if (flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) {
+ SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_GENERATOR);
+ return -1;
+ }
+ if ((flags & (CO_VARKEYWORDS | CO_VARARGS)) || code->co_kwonlyargcount) {
+ SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_COMPLEX_PARAMETERS);
+ return -1;
+ }
+ if ((flags & CO_OPTIMIZED) == 0) {
+ SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CO_NOT_OPTIMIZED);
+ return -1;
+ }
+ if (code->co_nfreevars) {
+ SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_FREE_VARS);
+ return -1;
+ }
+ int argcount = code->co_argcount;
+ int defcount = func->func_defaults == NULL ? 0 : (int)PyTuple_GET_SIZE(func->func_defaults);
+ assert(defcount <= argcount);
+ int min_args = argcount-defcount;
+ if (nargs > argcount || nargs < min_args) {
+ SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
+ return -1;
+ }
+ assert(nargs <= argcount && nargs >= min_args);
+ int defstart = nargs - min_args;
+ int deflen = argcount - nargs;
+ assert(defstart >= 0 && deflen >= 0);
+ assert(deflen == 0 || func->func_defaults != NULL);
+ if (defstart > 0xffff || deflen > 0xffff) {
+ SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_RANGE);
+ return -1;
+ }
+ int version = _PyFunction_GetVersionForCurrentState(func);
+ if (version == 0) {
+ SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_VERSIONS);
+ return -1;
+ }
+ cache1->func_version = version;
+ cache1->defaults_start = defstart;
+ cache1->defaults_len = deflen;
+ *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_PY_SIMPLE, _Py_OPARG(*instr));
+ return 0;
+}
+
#if COLLECT_SPECIALIZATION_STATS_DETAILED
static int
builtin_call_fail_kind(int ml_flags)
@@ -1315,11 +1383,7 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
static int
call_fail_kind(PyObject *callable)
{
- if (PyFunction_Check(callable)) {
- return SPEC_FAIL_PYTHON_FUNCTION;
- }
- // new-style bound methods
- else if (PyInstanceMethod_Check(callable)) {
+ if (PyInstanceMethod_Check(callable)) {
return SPEC_FAIL_METHOD;
}
else if (PyMethod_Check(callable)) {
@@ -1330,17 +1394,14 @@ call_fail_kind(PyObject *callable)
return SPEC_FAIL_METHOD;
}
else if (PyType_Check(callable)) {
- PyTypeObject *type = Py_TYPE(callable);
- return PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE) ?
- SPEC_FAIL_IMMUTABLE_CLASS : SPEC_FAIL_MUTABLE_CLASS;
+ return SPEC_FAIL_CLASS;
}
return SPEC_FAIL_OTHER;
}
#endif
/* TODO:
- - Specialize calling types.
- - Specialize python function calls.
+ - Specialize calling classes.
*/
int
_Py_Specialize_CallFunction(
@@ -1352,9 +1413,15 @@ _Py_Specialize_CallFunction(
if (PyCFunction_CheckExact(callable)) {
fail = specialize_c_call(callable, instr, nargs, cache, builtins);
}
+ else if (PyFunction_Check(callable)) {
+ fail = specialize_py_call((PyFunctionObject *)callable, instr, nargs, cache);
+ }
+ else if (PyType_Check(callable)) {
+ fail = specialize_class_call(callable, instr, nargs, cache);
+ }
else {
SPECIALIZATION_FAIL(CALL_FUNCTION, call_fail_kind(callable));
- fail = 1;
+ fail = -1;
}
_PyAdaptiveEntry *cache0 = &cache->adaptive;
if (fail) {