summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/ceval.h2
-rw-r--r--Include/compile.h6
-rw-r--r--Misc/SpecialBuilds.txt10
-rw-r--r--Python/ceval.c154
-rw-r--r--Python/compile.c3
-rw-r--r--Python/sysmodule.c28
6 files changed, 189 insertions, 14 deletions
diff --git a/Include/ceval.h b/Include/ceval.h
index e1af801..9bb145d 100644
--- a/Include/ceval.h
+++ b/Include/ceval.h
@@ -48,6 +48,8 @@ PyAPI_FUNC(int) Py_GetRecursionLimit(void);
PyAPI_FUNC(char *) PyEval_GetFuncName(PyObject *);
PyAPI_FUNC(char *) PyEval_GetFuncDesc(PyObject *);
+PyAPI_FUNC(PyObject *) PyEval_GetCallStats(PyObject *);
+
/* this used to be handled on a per-thread basis - now just two globals */
PyAPI_DATA(volatile int) _Py_Ticker;
PyAPI_DATA(int) _Py_CheckInterval;
diff --git a/Include/compile.h b/Include/compile.h
index a462d77..594d7df 100644
--- a/Include/compile.h
+++ b/Include/compile.h
@@ -34,6 +34,12 @@ typedef struct {
#define CO_VARKEYWORDS 0x0008
#define CO_NESTED 0x0010
#define CO_GENERATOR 0x0020
+/* The CO_NOFREE flag is set if there are no free or cell variables.
+ This information is redundant, but it allows a single flag test
+ to determine whether there is any extra work to be done when the
+ call frame it setup.
+*/
+#define CO_NOFREE 0x0040
/* XXX Temporary hack. Until generators are a permanent part of the
language, we need a way for a code object to record that generators
were *possible* when it was compiled. This is so code dynamically
diff --git a/Misc/SpecialBuilds.txt b/Misc/SpecialBuilds.txt
index f48817f..a4226c9 100644
--- a/Misc/SpecialBuilds.txt
+++ b/Misc/SpecialBuilds.txt
@@ -199,3 +199,13 @@ sprayed to stdout, such as every opcode and opcode argument and values
pushed onto and popped off the value stack.
Not useful very often, but very useful when needed.
+
+---------------------------------------------------------------------------
+CALL_PROFILE introduced for Python 2.3
+
+Count the number of function calls executed.
+
+When this symbol is defined, the ceval mainloop and helper functions
+count the number of function calls made. It keeps detailed statistics
+about what kind of object was called and whether the call hit any of
+the special fast paths in the code.
diff --git a/Python/ceval.c b/Python/ceval.c
index 8547f85..0f52a0b 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -87,6 +87,62 @@ static long dxp[256];
#endif
#endif
+/* Function call profile */
+#ifdef CALL_PROFILE
+#define PCALL_NUM 11
+static int pcall[PCALL_NUM];
+
+#define PCALL_ALL 0
+#define PCALL_FUNCTION 1
+#define PCALL_FAST_FUNCTION 2
+#define PCALL_FASTER_FUNCTION 3
+#define PCALL_METHOD 4
+#define PCALL_BOUND_METHOD 5
+#define PCALL_CFUNCTION 6
+#define PCALL_TYPE 7
+#define PCALL_GENERATOR 8
+#define PCALL_OTHER 9
+#define PCALL_POP 10
+
+/* Notes about the statistics
+
+ PCALL_FAST stats
+
+ FAST_FUNCTION means no argument tuple needs to be created.
+ FASTER_FUNCTION means that the fast-path frame setup code is used.
+
+ If there is a method call where the call can be optimized by changing
+ the argument tuple and calling the function directly, it gets recorded
+ twice.
+
+ As a result, the relationship among the statistics appears to be
+ PCALL_ALL == PCALL_FUNCTION + PCALL_METHOD - PCALL_BOUND_METHOD +
+ PCALL_CFUNCTION + PCALL_TYPE + PCALL_GENERATOR + PCALL_OTHER
+ PCALL_FUNCTION > PCALL_FAST_FUNCTION > PCALL_FASTER_FUNCTION
+ PCALL_METHOD > PCALL_BOUND_METHOD
+*/
+
+#define PCALL(POS) pcall[POS]++
+
+PyObject *
+PyEval_GetCallStats(PyObject *self)
+{
+ return Py_BuildValue("iiiiiiiiii",
+ pcall[0], pcall[1], pcall[2], pcall[3],
+ pcall[4], pcall[5], pcall[6], pcall[7],
+ pcall[8], pcall[9]);
+}
+#else
+#define PCALL(O)
+
+PyObject *
+PyEval_GetCallStats(PyObject *self)
+{
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+#endif
+
static PyTypeObject gentype;
typedef struct {
@@ -475,6 +531,7 @@ volatile int _Py_Ticker = 100;
PyObject *
PyEval_EvalCode(PyCodeObject *co, PyObject *globals, PyObject *locals)
{
+ /* XXX raise SystemError if globals is NULL */
return PyEval_EvalCodeEx(co,
globals, locals,
(PyObject **)NULL, 0,
@@ -1980,6 +2037,7 @@ eval_frame(PyFrameObject *f)
continue;
case CALL_FUNCTION:
+ PCALL(PCALL_ALL);
x = call_function(&stack_pointer, oparg);
PUSH(x);
if (x != NULL)
@@ -1995,6 +2053,7 @@ eval_frame(PyFrameObject *f)
int flags = (opcode - CALL_FUNCTION) & 3;
int n = na + 2 * nk;
PyObject **pfunc, *func;
+ PCALL(PCALL_ALL);
if (flags & CALL_FLAG_VAR)
n++;
if (flags & CALL_FLAG_KW)
@@ -2317,9 +2376,8 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
return NULL;
}
- f = PyFrame_New(tstate, /*back*/
- co, /*code*/
- globals, locals);
+ assert(globals != NULL);
+ f = PyFrame_New(tstate, co, globals, locals);
if (f == NULL)
return NULL;
@@ -2520,6 +2578,8 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
Py_XDECREF(f->f_back);
f->f_back = NULL;
+ PCALL(PCALL_GENERATOR);
+
/* Create a new generator that owns the ready to run frame
* and return that as the value. */
return gen_new(f);
@@ -3198,12 +3258,12 @@ call_function(PyObject ***pp_stack, int oparg)
PyObject *func = *pfunc;
PyObject *x, *w;
- /* Always dispatch PyCFunction first, because
- these are presumed to be the most frequent
- callable object.
+ /* Always dispatch PyCFunction first, because these are
+ presumed to be the most frequent callable object.
*/
if (PyCFunction_Check(func) && nk == 0) {
int flags = PyCFunction_GET_FLAGS(func);
+ PCALL(PCALL_CFUNCTION);
if (flags & (METH_NOARGS | METH_O)) {
PyCFunction meth = PyCFunction_GET_FUNCTION(func);
PyObject *self = PyCFunction_GET_SELF(func);
@@ -3229,6 +3289,8 @@ call_function(PyObject ***pp_stack, int oparg)
if (PyMethod_Check(func) && PyMethod_GET_SELF(func) != NULL) {
/* optimize access to bound methods */
PyObject *self = PyMethod_GET_SELF(func);
+ PCALL(PCALL_METHOD);
+ PCALL(PCALL_BOUND_METHOD);
Py_INCREF(self);
func = PyMethod_GET_FUNCTION(func);
Py_INCREF(func);
@@ -3245,35 +3307,75 @@ call_function(PyObject ***pp_stack, int oparg)
Py_DECREF(func);
}
+ /* What does this do? */
while ((*pp_stack) > pfunc) {
w = EXT_POP(*pp_stack);
Py_DECREF(w);
+ PCALL(PCALL_POP);
}
return x;
}
/* The fast_function() function optimize calls for which no argument
tuple is necessary; the objects are passed directly from the stack.
+ For the simplest case -- a function that takes only positional
+ arguments and is called with only positional arguments -- it
+ inlines the most primitive frame setup code from
+ PyEval_EvalCodeEx(), which vastly reduces the checks that must be
+ done before evaluating the frame.
*/
static PyObject *
fast_function(PyObject *func, PyObject ***pp_stack, int n, int na, int nk)
{
- PyObject *co = PyFunction_GET_CODE(func);
+ PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
PyObject *globals = PyFunction_GET_GLOBALS(func);
PyObject *argdefs = PyFunction_GET_DEFAULTS(func);
- PyObject *closure = PyFunction_GET_CLOSURE(func);
PyObject **d = NULL;
int nd = 0;
+ PCALL(PCALL_FUNCTION);
+ PCALL(PCALL_FAST_FUNCTION);
+ if (argdefs == NULL && co->co_argcount == n &&
+ co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) {
+ PyFrameObject *f;
+ PyObject *retval = NULL;
+ PyThreadState *tstate = PyThreadState_GET();
+ PyObject **fastlocals, **stack;
+ int i;
+
+ PCALL(PCALL_FASTER_FUNCTION);
+ assert(globals != NULL);
+ /* XXX Perhaps we should create a specialized
+ PyFrame_New() that doesn't take locals, but does
+ take builtins without sanity checking them.
+ */
+ f = PyFrame_New(tstate, co, globals, NULL);
+ if (f == NULL)
+ return NULL;
+
+ fastlocals = f->f_localsplus;
+ stack = (*pp_stack) - n;
+
+ for (i = 0; i < n; i++) {
+ Py_INCREF(*stack);
+ fastlocals[i] = *stack++;
+ }
+ retval = eval_frame(f);
+ assert(tstate != NULL);
+ ++tstate->recursion_depth;
+ Py_DECREF(f);
+ --tstate->recursion_depth;
+ return retval;
+ }
if (argdefs != NULL) {
d = &PyTuple_GET_ITEM(argdefs, 0);
nd = ((PyTupleObject *)argdefs)->ob_size;
}
- return PyEval_EvalCodeEx((PyCodeObject *)co, globals,
- (PyObject *)NULL, (*pp_stack)-n, na,
- (*pp_stack)-2*nk, nk, d, nd,
- closure);
+ return PyEval_EvalCodeEx(co, globals,
+ (PyObject *)NULL, (*pp_stack)-n, na,
+ (*pp_stack)-2*nk, nk, d, nd,
+ PyFunction_GET_CLOSURE(func));
}
static PyObject *
@@ -3371,6 +3473,20 @@ do_call(PyObject *func, PyObject ***pp_stack, int na, int nk)
callargs = load_args(pp_stack, na);
if (callargs == NULL)
goto call_fail;
+#ifdef CALL_PROFILE
+ /* At this point, we have to look at the type of func to
+ update the call stats properly. Do it here so as to avoid
+ exposing the call stats machinery outside ceval.c
+ */
+ if (PyFunction_Check(func))
+ PCALL(PCALL_FUNCTION);
+ else if (PyMethod_Check(func))
+ PCALL(PCALL_METHOD);
+ else if (PyType_Check(func))
+ PCALL(PCALL_TYPE);
+ else
+ PCALL(PCALL_OTHER);
+#endif
result = PyObject_Call(func, callargs, kwdict);
call_fail:
Py_XDECREF(callargs);
@@ -3426,6 +3542,20 @@ ext_do_call(PyObject *func, PyObject ***pp_stack, int flags, int na, int nk)
callargs = update_star_args(na, nstar, stararg, pp_stack);
if (callargs == NULL)
goto ext_call_fail;
+#ifdef CALL_PROFILE
+ /* At this point, we have to look at the type of func to
+ update the call stats properly. Do it here so as to avoid
+ exposing the call stats machinery outside ceval.c
+ */
+ if (PyFunction_Check(func))
+ PCALL(PCALL_FUNCTION);
+ else if (PyMethod_Check(func))
+ PCALL(PCALL_METHOD);
+ else if (PyType_Check(func))
+ PCALL(PCALL_TYPE);
+ else
+ PCALL(PCALL_OTHER);
+#endif
result = PyObject_Call(func, callargs, kwdict);
ext_call_fail:
Py_XDECREF(callargs);
diff --git a/Python/compile.c b/Python/compile.c
index 49e57d1..01e961b 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -385,6 +385,9 @@ PyCode_New(int argcount, int nlocals, int stacksize, int flags,
co->co_firstlineno = firstlineno;
Py_INCREF(lnotab);
co->co_lnotab = lnotab;
+ if (PyTuple_GET_SIZE(freevars) == 0 &&
+ PyTuple_GET_SIZE(cellvars) == 0)
+ co->co_flags |= CO_NOFREE;
}
return co;
}
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 2b4c6b4..765621e 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -562,6 +562,28 @@ sys_getframe(PyObject *self, PyObject *args)
return (PyObject*)f;
}
+PyDoc_STRVAR(callstats_doc,
+"callstats() -> tuple of integers\n\
+\n\
+Return a tuple of function call statistics, if CALL_PROFILE was defined\n\
+when Python was built. Otherwise, return None.\n\
+\n\
+When enabled, this function returns detailed, implementation-specific\n\
+details about the number of function calls executed. The return value is\n\
+a 11-tuple where the entries in the tuple are counts of:\n\
+0. all function calls\n\
+1. calls to PyFunction_Type objects\n\
+2. PyFunction calls that do not create an argument tuple\n\
+3. PyFunction calls that do not create an argument tuple\n\
+ and bypass PyEval_EvalCodeEx()\n\
+4. PyMethod calls\n\
+5. PyMethod calls on bound methods\n\
+6. PyType calls\n\
+7. PyCFunction calls\n\
+8. generator calls\n\
+9. All other calls\n\
+10. Number of stack pops performed by call_function()"
+);
#ifdef Py_TRACE_REFS
/* Defined in objects.c because it uses static globals if that file */
@@ -575,13 +597,15 @@ extern PyObject *_Py_GetDXProfile(PyObject *, PyObject *);
static PyMethodDef sys_methods[] = {
/* Might as well keep this in alphabetic order */
+ {"callstats", (PyCFunction)PyEval_GetCallStats, METH_NOARGS,
+ callstats_doc},
{"displayhook", sys_displayhook, METH_O, displayhook_doc},
{"exc_info", (PyCFunction)sys_exc_info, METH_NOARGS, exc_info_doc},
{"excepthook", sys_excepthook, METH_VARARGS, excepthook_doc},
{"exit", sys_exit, METH_VARARGS, exit_doc},
#ifdef Py_USING_UNICODE
- {"getdefaultencoding", (PyCFunction)sys_getdefaultencoding, METH_NOARGS,
- getdefaultencoding_doc},
+ {"getdefaultencoding", (PyCFunction)sys_getdefaultencoding,
+ METH_NOARGS, getdefaultencoding_doc},
#endif
#ifdef HAVE_DLOPEN
{"getdlopenflags", (PyCFunction)sys_getdlopenflags, METH_NOARGS,