summaryrefslogtreecommitdiffstats
path: root/Python
diff options
context:
space:
mode:
authorJeremy Hylton <jeremy@alum.mit.edu>2003-02-05 23:13:00 (GMT)
committerJeremy Hylton <jeremy@alum.mit.edu>2003-02-05 23:13:00 (GMT)
commit985eba53f5d1ac45037fbf19b6955c74823c1ded (patch)
treef847567e9468a46df6f0a55b7d437a3697e7f248 /Python
parentf3f4af55214370e5c6df928e8c8242d14060e204 (diff)
downloadcpython-985eba53f5d1ac45037fbf19b6955c74823c1ded.zip
cpython-985eba53f5d1ac45037fbf19b6955c74823c1ded.tar.gz
cpython-985eba53f5d1ac45037fbf19b6955c74823c1ded.tar.bz2
Small function call optimization and special build option for call stats.
-DCALL_PROFILE: Count the number of function calls executed. When this symbol is defined, the ceval mainloop and helper functions count the number of function calls made. It keeps detailed statistics about what kind of object was called and whether the call hit any of the special fast paths in the code. Optimization: When we take the fast_function() path, which seems to be taken for most function calls, and there is minimal frame setup to do, avoid call PyEval_EvalCodeEx(). The eval code ex function does a lot of work to handle keywords args and star args, free variables, generators, etc. The inlined version simply allocates the frame and copies the arguments values into the frame. The optimization gets a little help from compile.c which adds a CO_NOFREE flag to code objects that don't have free variables or cell variables. This change allows fast_function() to get into the fast path with fewer tests. I measure a couple of percent speedup in pystone with this change, but there's surely more that can be done.
Diffstat (limited to 'Python')
-rw-r--r--Python/ceval.c154
-rw-r--r--Python/compile.c3
-rw-r--r--Python/sysmodule.c28
3 files changed, 171 insertions, 14 deletions
diff --git a/Python/ceval.c b/Python/ceval.c
index 8547f85..0f52a0b 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -87,6 +87,62 @@ static long dxp[256];
#endif
#endif
+/* Function call profile */
+#ifdef CALL_PROFILE
+#define PCALL_NUM 11
+static int pcall[PCALL_NUM];
+
+#define PCALL_ALL 0
+#define PCALL_FUNCTION 1
+#define PCALL_FAST_FUNCTION 2
+#define PCALL_FASTER_FUNCTION 3
+#define PCALL_METHOD 4
+#define PCALL_BOUND_METHOD 5
+#define PCALL_CFUNCTION 6
+#define PCALL_TYPE 7
+#define PCALL_GENERATOR 8
+#define PCALL_OTHER 9
+#define PCALL_POP 10
+
+/* Notes about the statistics
+
+ PCALL_FAST stats
+
+ FAST_FUNCTION means no argument tuple needs to be created.
+ FASTER_FUNCTION means that the fast-path frame setup code is used.
+
+ If there is a method call where the call can be optimized by changing
+ the argument tuple and calling the function directly, it gets recorded
+ twice.
+
+ As a result, the relationship among the statistics appears to be
+ PCALL_ALL == PCALL_FUNCTION + PCALL_METHOD - PCALL_BOUND_METHOD +
+ PCALL_CFUNCTION + PCALL_TYPE + PCALL_GENERATOR + PCALL_OTHER
+ PCALL_FUNCTION > PCALL_FAST_FUNCTION > PCALL_FASTER_FUNCTION
+ PCALL_METHOD > PCALL_BOUND_METHOD
+*/
+
+#define PCALL(POS) pcall[POS]++
+
+PyObject *
+PyEval_GetCallStats(PyObject *self)
+{
+ return Py_BuildValue("iiiiiiiiii",
+ pcall[0], pcall[1], pcall[2], pcall[3],
+ pcall[4], pcall[5], pcall[6], pcall[7],
+ pcall[8], pcall[9]);
+}
+#else
+#define PCALL(O)
+
+PyObject *
+PyEval_GetCallStats(PyObject *self)
+{
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+#endif
+
static PyTypeObject gentype;
typedef struct {
@@ -475,6 +531,7 @@ volatile int _Py_Ticker = 100;
PyObject *
PyEval_EvalCode(PyCodeObject *co, PyObject *globals, PyObject *locals)
{
+ /* XXX raise SystemError if globals is NULL */
return PyEval_EvalCodeEx(co,
globals, locals,
(PyObject **)NULL, 0,
@@ -1980,6 +2037,7 @@ eval_frame(PyFrameObject *f)
continue;
case CALL_FUNCTION:
+ PCALL(PCALL_ALL);
x = call_function(&stack_pointer, oparg);
PUSH(x);
if (x != NULL)
@@ -1995,6 +2053,7 @@ eval_frame(PyFrameObject *f)
int flags = (opcode - CALL_FUNCTION) & 3;
int n = na + 2 * nk;
PyObject **pfunc, *func;
+ PCALL(PCALL_ALL);
if (flags & CALL_FLAG_VAR)
n++;
if (flags & CALL_FLAG_KW)
@@ -2317,9 +2376,8 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
return NULL;
}
- f = PyFrame_New(tstate, /*back*/
- co, /*code*/
- globals, locals);
+ assert(globals != NULL);
+ f = PyFrame_New(tstate, co, globals, locals);
if (f == NULL)
return NULL;
@@ -2520,6 +2578,8 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
Py_XDECREF(f->f_back);
f->f_back = NULL;
+ PCALL(PCALL_GENERATOR);
+
/* Create a new generator that owns the ready to run frame
* and return that as the value. */
return gen_new(f);
@@ -3198,12 +3258,12 @@ call_function(PyObject ***pp_stack, int oparg)
PyObject *func = *pfunc;
PyObject *x, *w;
- /* Always dispatch PyCFunction first, because
- these are presumed to be the most frequent
- callable object.
+ /* Always dispatch PyCFunction first, because these are
+ presumed to be the most frequent callable object.
*/
if (PyCFunction_Check(func) && nk == 0) {
int flags = PyCFunction_GET_FLAGS(func);
+ PCALL(PCALL_CFUNCTION);
if (flags & (METH_NOARGS | METH_O)) {
PyCFunction meth = PyCFunction_GET_FUNCTION(func);
PyObject *self = PyCFunction_GET_SELF(func);
@@ -3229,6 +3289,8 @@ call_function(PyObject ***pp_stack, int oparg)
if (PyMethod_Check(func) && PyMethod_GET_SELF(func) != NULL) {
/* optimize access to bound methods */
PyObject *self = PyMethod_GET_SELF(func);
+ PCALL(PCALL_METHOD);
+ PCALL(PCALL_BOUND_METHOD);
Py_INCREF(self);
func = PyMethod_GET_FUNCTION(func);
Py_INCREF(func);
@@ -3245,35 +3307,75 @@ call_function(PyObject ***pp_stack, int oparg)
Py_DECREF(func);
}
+ /* What does this do? */
while ((*pp_stack) > pfunc) {
w = EXT_POP(*pp_stack);
Py_DECREF(w);
+ PCALL(PCALL_POP);
}
return x;
}
/* The fast_function() function optimize calls for which no argument
tuple is necessary; the objects are passed directly from the stack.
+ For the simplest case -- a function that takes only positional
+ arguments and is called with only positional arguments -- it
+ inlines the most primitive frame setup code from
+ PyEval_EvalCodeEx(), which vastly reduces the checks that must be
+ done before evaluating the frame.
*/
static PyObject *
fast_function(PyObject *func, PyObject ***pp_stack, int n, int na, int nk)
{
- PyObject *co = PyFunction_GET_CODE(func);
+ PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
PyObject *globals = PyFunction_GET_GLOBALS(func);
PyObject *argdefs = PyFunction_GET_DEFAULTS(func);
- PyObject *closure = PyFunction_GET_CLOSURE(func);
PyObject **d = NULL;
int nd = 0;
+ PCALL(PCALL_FUNCTION);
+ PCALL(PCALL_FAST_FUNCTION);
+ if (argdefs == NULL && co->co_argcount == n &&
+ co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) {
+ PyFrameObject *f;
+ PyObject *retval = NULL;
+ PyThreadState *tstate = PyThreadState_GET();
+ PyObject **fastlocals, **stack;
+ int i;
+
+ PCALL(PCALL_FASTER_FUNCTION);
+ assert(globals != NULL);
+ /* XXX Perhaps we should create a specialized
+ PyFrame_New() that doesn't take locals, but does
+ take builtins without sanity checking them.
+ */
+ f = PyFrame_New(tstate, co, globals, NULL);
+ if (f == NULL)
+ return NULL;
+
+ fastlocals = f->f_localsplus;
+ stack = (*pp_stack) - n;
+
+ for (i = 0; i < n; i++) {
+ Py_INCREF(*stack);
+ fastlocals[i] = *stack++;
+ }
+ retval = eval_frame(f);
+ assert(tstate != NULL);
+ ++tstate->recursion_depth;
+ Py_DECREF(f);
+ --tstate->recursion_depth;
+ return retval;
+ }
if (argdefs != NULL) {
d = &PyTuple_GET_ITEM(argdefs, 0);
nd = ((PyTupleObject *)argdefs)->ob_size;
}
- return PyEval_EvalCodeEx((PyCodeObject *)co, globals,
- (PyObject *)NULL, (*pp_stack)-n, na,
- (*pp_stack)-2*nk, nk, d, nd,
- closure);
+ return PyEval_EvalCodeEx(co, globals,
+ (PyObject *)NULL, (*pp_stack)-n, na,
+ (*pp_stack)-2*nk, nk, d, nd,
+ PyFunction_GET_CLOSURE(func));
}
static PyObject *
@@ -3371,6 +3473,20 @@ do_call(PyObject *func, PyObject ***pp_stack, int na, int nk)
callargs = load_args(pp_stack, na);
if (callargs == NULL)
goto call_fail;
+#ifdef CALL_PROFILE
+ /* At this point, we have to look at the type of func to
+ update the call stats properly. Do it here so as to avoid
+ exposing the call stats machinery outside ceval.c
+ */
+ if (PyFunction_Check(func))
+ PCALL(PCALL_FUNCTION);
+ else if (PyMethod_Check(func))
+ PCALL(PCALL_METHOD);
+ else if (PyType_Check(func))
+ PCALL(PCALL_TYPE);
+ else
+ PCALL(PCALL_OTHER);
+#endif
result = PyObject_Call(func, callargs, kwdict);
call_fail:
Py_XDECREF(callargs);
@@ -3426,6 +3542,20 @@ ext_do_call(PyObject *func, PyObject ***pp_stack, int flags, int na, int nk)
callargs = update_star_args(na, nstar, stararg, pp_stack);
if (callargs == NULL)
goto ext_call_fail;
+#ifdef CALL_PROFILE
+ /* At this point, we have to look at the type of func to
+ update the call stats properly. Do it here so as to avoid
+ exposing the call stats machinery outside ceval.c
+ */
+ if (PyFunction_Check(func))
+ PCALL(PCALL_FUNCTION);
+ else if (PyMethod_Check(func))
+ PCALL(PCALL_METHOD);
+ else if (PyType_Check(func))
+ PCALL(PCALL_TYPE);
+ else
+ PCALL(PCALL_OTHER);
+#endif
result = PyObject_Call(func, callargs, kwdict);
ext_call_fail:
Py_XDECREF(callargs);
diff --git a/Python/compile.c b/Python/compile.c
index 49e57d1..01e961b 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -385,6 +385,9 @@ PyCode_New(int argcount, int nlocals, int stacksize, int flags,
co->co_firstlineno = firstlineno;
Py_INCREF(lnotab);
co->co_lnotab = lnotab;
+ if (PyTuple_GET_SIZE(freevars) == 0 &&
+ PyTuple_GET_SIZE(cellvars) == 0)
+ co->co_flags |= CO_NOFREE;
}
return co;
}
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 2b4c6b4..765621e 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -562,6 +562,28 @@ sys_getframe(PyObject *self, PyObject *args)
return (PyObject*)f;
}
+PyDoc_STRVAR(callstats_doc,
+"callstats() -> tuple of integers\n\
+\n\
+Return a tuple of function call statistics, if CALL_PROFILE was defined\n\
+when Python was built. Otherwise, return None.\n\
+\n\
+When enabled, this function returns detailed, implementation-specific\n\
+details about the number of function calls executed. The return value is\n\
+a 11-tuple where the entries in the tuple are counts of:\n\
+0. all function calls\n\
+1. calls to PyFunction_Type objects\n\
+2. PyFunction calls that do not create an argument tuple\n\
+3. PyFunction calls that do not create an argument tuple\n\
+ and bypass PyEval_EvalCodeEx()\n\
+4. PyMethod calls\n\
+5. PyMethod calls on bound methods\n\
+6. PyType calls\n\
+7. PyCFunction calls\n\
+8. generator calls\n\
+9. All other calls\n\
+10. Number of stack pops performed by call_function()"
+);
#ifdef Py_TRACE_REFS
/* Defined in objects.c because it uses static globals if that file */
@@ -575,13 +597,15 @@ extern PyObject *_Py_GetDXProfile(PyObject *, PyObject *);
static PyMethodDef sys_methods[] = {
/* Might as well keep this in alphabetic order */
+ {"callstats", (PyCFunction)PyEval_GetCallStats, METH_NOARGS,
+ callstats_doc},
{"displayhook", sys_displayhook, METH_O, displayhook_doc},
{"exc_info", (PyCFunction)sys_exc_info, METH_NOARGS, exc_info_doc},
{"excepthook", sys_excepthook, METH_VARARGS, excepthook_doc},
{"exit", sys_exit, METH_VARARGS, exit_doc},
#ifdef Py_USING_UNICODE
- {"getdefaultencoding", (PyCFunction)sys_getdefaultencoding, METH_NOARGS,
- getdefaultencoding_doc},
+ {"getdefaultencoding", (PyCFunction)sys_getdefaultencoding,
+ METH_NOARGS, getdefaultencoding_doc},
#endif
#ifdef HAVE_DLOPEN
{"getdlopenflags", (PyCFunction)sys_getdlopenflags, METH_NOARGS,