summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/lib/libsys.tex10
-rw-r--r--Include/pystate.h3
-rw-r--r--Misc/NEWS3
-rw-r--r--Python/ceval.c152
-rw-r--r--Python/pystate.c3
-rw-r--r--Python/sysmodule.c30
-rw-r--r--README1
-rwxr-xr-xconfigure28
-rw-r--r--configure.in13
-rw-r--r--pyconfig.h.in3
10 files changed, 243 insertions, 3 deletions
diff --git a/Doc/lib/libsys.tex b/Doc/lib/libsys.tex
index 3b81384..ceced16 100644
--- a/Doc/lib/libsys.tex
+++ b/Doc/lib/libsys.tex
@@ -475,6 +475,16 @@ else:
debugged.
\end{funcdesc}
+\begin{funcdesc}{settscdump}{on_flag}
+ Activate dumping of VM measurements using the Pentium timestamp
+ counter, if \var{on_flag} is true. Deactivate these dumps if
+ \var{on_flag} is off. The function is available only if Python
+ was compiled with \code{--with-tsc}. To understand the output
+ of this dump, read \code{Python/ceval.c}.
+
+ \versionadded{2.4}
+\end{funcdesc}
+
\begin{datadesc}{stdin}
\dataline{stdout}
\dataline{stderr}
diff --git a/Include/pystate.h b/Include/pystate.h
index 4239108..1ff2626 100644
--- a/Include/pystate.h
+++ b/Include/pystate.h
@@ -29,6 +29,9 @@ typedef struct _is {
#ifdef HAVE_DLOPEN
int dlopenflags;
#endif
+#ifdef WITH_TSC
+ int tscdump;
+#endif
} PyInterpreterState;
diff --git a/Misc/NEWS b/Misc/NEWS
index cfd296e..a07d887 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -499,6 +499,9 @@ Tools/Demos
Build
-----
+- Profiling the VM using the Pentium TSC is now possible if Python
+ is configured --with-tsc.
+
- In order to find libraries, setup.py now also looks in /lib64, for use
on AMD64.
diff --git a/Python/ceval.c b/Python/ceval.c
index 2d60014..a52168c 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -17,6 +17,26 @@
#include <ctype.h>
+#ifdef WITH_TSC
+#include <asm/msr.h>
+
+typedef unsigned long long uint64;
+
+void dump_tsc(int opcode, int ticked, uint64 inst0, uint64 inst1,
+ uint64 loop0, uint64 loop1, uint64 intr0, uint64 intr1)
+{
+ uint64 intr, inst, loop;
+ PyThreadState *tstate = PyThreadState_Get();
+ if (!tstate->interp->tscdump)
+ return;
+ intr = intr1 - intr0;
+ inst = inst1 - inst0 - intr;
+ loop = loop1 - loop0 - intr;
+ fprintf(stderr, "opcode=%03d t=%d inst=%06lld loop=%06lld\n",
+ opcode, ticked, inst, loop);
+}
+#endif
+
/* Turn this on if your compiler chokes on the big switch: */
/* #define CASE_TOO_BIG 1 */
@@ -30,7 +50,11 @@ typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *);
/* Forward declarations */
static PyObject *eval_frame(PyFrameObject *);
+#ifdef WITH_TSC
+static PyObject *call_function(PyObject ***, int, uint64*, uint64*);
+#else
static PyObject *call_function(PyObject ***, int);
+#endif
static PyObject *fast_function(PyObject *, PyObject ***, int, int, int);
static PyObject *do_call(PyObject *, PyObject ***, int, int);
static PyObject *ext_do_call(PyObject *, PyObject ***, int, int, int);
@@ -485,6 +509,44 @@ eval_frame(PyFrameObject *f)
#define GETITEM(v, i) PyTuple_GetItem((v), (i))
#endif
+#ifdef WITH_TSC
+/* Use Pentium timestamp counter to mark certain events:
+ inst0 -- beginning of switch statement for opcode dispatch
+ inst1 -- end of switch statement (may be skipped)
+ loop0 -- the top of the mainloop
+ loop1 -- place where control returns again to top of mainloop
+ (may be skipped)
+ intr1 -- beginning of long interruption
+ intr2 -- end of long interruption
+
+ Many opcodes call out to helper C functions. In some cases, the
+ time in those functions should be counted towards the time for the
+ opcode, but not in all cases. For example, a CALL_FUNCTION opcode
+ calls another Python function; there's no point in charge all the
+ bytecode executed by the called function to the caller.
+
+ It's hard to make a useful judgement statically. In the presence
+ of operator overloading, it's impossible to tell if a call will
+ execute new Python code or not.
+
+ It's a case-by-case judgement. I'll use intr1 for the following
+ cases:
+
+ EXEC_STMT
+ IMPORT_STAR
+ IMPORT_FROM
+ CALL_FUNCTION (and friends)
+
+ */
+ uint64 inst0, inst1, loop0, loop1, intr0 = 0, intr1 = 0;
+ int ticked = 0;
+
+ rdtscll(inst0);
+ rdtscll(inst1);
+ rdtscll(loop0);
+ rdtscll(loop1);
+#endif
+
/* Code access macros */
#define INSTR_OFFSET() (next_instr - first_instr)
@@ -643,6 +705,23 @@ eval_frame(PyFrameObject *f)
w = NULL;
for (;;) {
+#ifdef WITH_TSC
+ if (inst1 == 0) {
+ /* Almost surely, the opcode executed a break
+ or a continue, preventing inst1 from being set
+ on the way out of the loop.
+ */
+ rdtscll(inst1);
+ loop1 = inst1;
+ }
+ dump_tsc(opcode, ticked, inst0, inst1, loop0, loop1,
+ intr0, intr1);
+ ticked = 0;
+ inst1 = 0;
+ intr0 = 0;
+ intr1 = 0;
+ rdtscll(loop0);
+#endif
assert(stack_pointer >= f->f_valuestack); /* else underflow */
assert(STACK_LEVEL() <= f->f_stacksize); /* else overflow */
@@ -662,6 +741,9 @@ eval_frame(PyFrameObject *f)
}
_Py_Ticker = _Py_CheckInterval;
tstate->tick_counter++;
+#ifdef WITH_TSC
+ ticked = 1;
+#endif
if (things_to_do) {
if (Py_MakePendingCalls() < 0) {
why = WHY_EXCEPTION;
@@ -752,6 +834,9 @@ eval_frame(PyFrameObject *f)
#endif
/* Main switch on opcode */
+#ifdef WITH_TSC
+ rdtscll(inst0);
+#endif
switch (opcode) {
@@ -1493,7 +1578,13 @@ eval_frame(PyFrameObject *f)
v = SECOND();
u = THIRD();
STACKADJ(-3);
+#ifdef WITH_TSC
+ rdtscll(intr0);
+#endif
err = exec_statement(f, u, v, w);
+#ifdef WITH_TSC
+ rdtscll(intr1);
+#endif
Py_DECREF(u);
Py_DECREF(v);
Py_DECREF(w);
@@ -1855,7 +1946,13 @@ eval_frame(PyFrameObject *f)
x = NULL;
break;
}
+#ifdef WITH_TSC
+ rdtscll(intr0);
+#endif
x = PyEval_CallObject(x, w);
+#ifdef WITH_TSC
+ rdtscll(intr1);
+#endif
Py_DECREF(w);
SET_TOP(x);
if (x != NULL) continue;
@@ -1869,7 +1966,13 @@ eval_frame(PyFrameObject *f)
"no locals found during 'import *'");
break;
}
+#ifdef WITH_TSC
+ rdtscll(intr0);
+#endif
err = import_all_from(x, v);
+#ifdef WITH_TSC
+ rdtscll(intr1);
+#endif
PyFrame_LocalsToFast(f, 0);
Py_DECREF(v);
if (err == 0) continue;
@@ -1878,7 +1981,13 @@ eval_frame(PyFrameObject *f)
case IMPORT_FROM:
w = GETITEM(names, oparg);
v = TOP();
+#ifdef WITH_TSC
+ rdtscll(intr0);
+#endif
x = import_from(v, w);
+#ifdef WITH_TSC
+ rdtscll(intr1);
+#endif
PUSH(x);
if (x != NULL) continue;
break;
@@ -1987,7 +2096,11 @@ eval_frame(PyFrameObject *f)
case CALL_FUNCTION:
PCALL(PCALL_ALL);
+#ifdef WITH_TSC
+ x = call_function(&stack_pointer, oparg, &intr0, &intr1);
+#else
x = call_function(&stack_pointer, oparg);
+#endif
PUSH(x);
if (x != NULL)
continue;
@@ -2022,7 +2135,13 @@ eval_frame(PyFrameObject *f)
n++;
} else
Py_INCREF(func);
+#ifdef WITH_TSC
+ rdtscll(intr0);
+#endif
x = ext_do_call(func, &stack_pointer, flags, na, nk);
+#ifdef WITH_TSC
+ rdtscll(intr1);
+#endif
Py_DECREF(func);
while (stack_pointer > pfunc) {
@@ -2134,6 +2253,10 @@ eval_frame(PyFrameObject *f)
on_error:
+#ifdef WITH_TSC
+ rdtscll(inst1);
+#endif
+
/* Quickly continue if no error occurred */
if (why == WHY_NOT) {
@@ -2143,9 +2266,15 @@ eval_frame(PyFrameObject *f)
if (PyErr_Occurred())
fprintf(stderr,
"XXX undetected error\n");
- else
+ else {
+#endif
+#ifdef WITH_TSC
+ rdtscll(loop1);
#endif
continue; /* Normal, fast path */
+#ifdef CHECKEXC
+ }
+#endif
}
why = WHY_EXCEPTION;
x = Py_None;
@@ -2260,6 +2389,9 @@ fast_block_end:
if (why != WHY_NOT)
break;
+#ifdef WITH_TSC
+ rdtscll(loop1);
+#endif
} /* main loop */
@@ -3331,7 +3463,11 @@ if (tstate->use_tracing) { \
static PyObject *
-call_function(PyObject ***pp_stack, int oparg)
+call_function(PyObject ***pp_stack, int oparg
+#ifdef WITH_TSC
+ , uint64* pintr0, uint64* pintr1
+#endif
+ )
{
int na = oparg & 0xff;
int nk = (oparg>>8) & 0xff;
@@ -3374,7 +3510,13 @@ call_function(PyObject ***pp_stack, int oparg)
PyObject *callargs;
callargs = load_args(pp_stack, na);
BEGIN_C_TRACE
+#ifdef WITH_TSC
+ rdtscll(*pintr0);
+#endif
x = PyCFunction_Call(func, callargs, NULL);
+#ifdef WITH_TSC
+ rdtscll(*pintr1);
+#endif
END_C_TRACE
Py_XDECREF(callargs);
}
@@ -3393,10 +3535,16 @@ call_function(PyObject ***pp_stack, int oparg)
n++;
} else
Py_INCREF(func);
+#ifdef WITH_TSC
+ rdtscll(*pintr0);
+#endif
if (PyFunction_Check(func))
x = fast_function(func, pp_stack, n, na, nk);
else
x = do_call(func, pp_stack, na, nk);
+#ifdef WITH_TSC
+ rdtscll(*pintr1);
+#endif
Py_DECREF(func);
}
diff --git a/Python/pystate.c b/Python/pystate.c
index b0eb4c4..6a8beb9 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -59,6 +59,9 @@ PyInterpreterState_New(void)
interp->dlopenflags = RTLD_LAZY;
#endif
#endif
+#ifdef WITH_TSC
+ interp->tscdump = 0;
+#endif
HEAD_LOCK();
interp->next = interp_head;
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 09e411e..55c0425 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -442,6 +442,33 @@ PyDoc_STRVAR(getcheckinterval_doc,
"getcheckinterval() -> current check interval; see setcheckinterval()."
);
+#ifdef WITH_TSC
+static PyObject *
+sys_settscdump(PyObject *self, PyObject *args)
+{
+ int bool;
+ PyThreadState *tstate = PyThreadState_Get();
+
+ if (!PyArg_ParseTuple(args, "i:settscdump", &bool))
+ return NULL;
+ if (bool)
+ tstate->interp->tscdump = 1;
+ else
+ tstate->interp->tscdump = 0;
+ Py_INCREF(Py_None);
+ return Py_None;
+
+}
+
+PyDoc_STRVAR(settscdump_doc,
+"settscdump(bool)\n\
+\n\
+If true, tell the Python interpreter to dump VM measurements to\n\
+stderr. If false, turn off dump. The measurements are based on the\n\
+Pentium time-stamp counter."
+);
+#endif TSC
+
static PyObject *
sys_setrecursionlimit(PyObject *self, PyObject *args)
{
@@ -743,6 +770,9 @@ static PyMethodDef sys_methods[] = {
{"setprofile", sys_setprofile, METH_O, setprofile_doc},
{"setrecursionlimit", sys_setrecursionlimit, METH_VARARGS,
setrecursionlimit_doc},
+#ifdef WITH_TSC
+ {"settscdump", sys_settscdump, METH_VARARGS, settscdump_doc},
+#endif
{"settrace", sys_settrace, METH_O, settrace_doc},
{"call_tracing", sys_call_tracing, METH_VARARGS, call_tracing_doc},
{NULL, NULL} /* sentinel */
diff --git a/README b/README
index 9997ea2..f020d3a 100644
--- a/README
+++ b/README
@@ -1026,6 +1026,7 @@ Modules/getpath.o.
in files. Python code can open a file with open(file, 'U') to
read it in universal newline mode. THIS OPTION IS UNSUPPORTED.
+--with-tsc: Profile using the Pentium timestamping counter (TSC).
Building for multiple architectures (using the VPATH feature)
-------------------------------------------------------------
diff --git a/configure b/configure
index 5619b2c..697f575 100755
--- a/configure
+++ b/configure
@@ -1,5 +1,5 @@
#! /bin/sh
-# From configure.in Revision: 1.455 .
+# From configure.in Revision: 1.456 .
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.57 for python 2.4.
#
@@ -867,6 +867,7 @@ Optional Packages:
deprecated; use --with(out)-threads
--with-pth use GNU pth threading libraries
--with(out)-doc-strings disable/enable documentation strings
+ --with(out)-tsc enable/disable timestamp counter profile
--with(out)-pymalloc disable/enable specialized mallocs
--with-wctype-functions use wctype.h functions
--with-fpectl enable SIGFPE catching
@@ -12006,6 +12007,31 @@ echo "$as_me:$LINENO: result: $with_doc_strings" >&5
echo "${ECHO_T}$with_doc_strings" >&6
# Check for Python-specific malloc support
+echo "$as_me:$LINENO: checking for --with-tsc" >&5
+echo $ECHO_N "checking for --with-tsc... $ECHO_C" >&6
+
+# Check whether --with-tsc or --without-tsc was given.
+if test "${with_tsc+set}" = set; then
+ withval="$with_tsc"
+
+if test "$withval" != no
+then
+
+cat >>confdefs.h <<\_ACEOF
+#define WITH_TSC 1
+_ACEOF
+
+ echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+else echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi;
+
+# Check for Python-specific malloc support
echo "$as_me:$LINENO: checking for --with-pymalloc" >&5
echo $ECHO_N "checking for --with-pymalloc... $ECHO_C" >&6
diff --git a/configure.in b/configure.in
index cd84ea9..cff0ed3 100644
--- a/configure.in
+++ b/configure.in
@@ -1929,6 +1929,19 @@ fi
AC_MSG_RESULT($with_doc_strings)
# Check for Python-specific malloc support
+AC_MSG_CHECKING(for --with-tsc)
+AC_ARG_WITH(tsc,
+[ --with(out)-tsc enable/disable timestamp counter profile], [
+if test "$withval" != no
+then
+ AC_DEFINE(WITH_TSC, 1,
+ [Define to profile with the Pentium timestamp counter])
+ AC_MSG_RESULT(yes)
+else AC_MSG_RESULT(no)
+fi],
+[AC_MSG_RESULT(no)])
+
+# Check for Python-specific malloc support
AC_MSG_CHECKING(for --with-pymalloc)
AC_ARG_WITH(pymalloc,
AC_HELP_STRING(--with(out)-pymalloc, disable/enable specialized mallocs))
diff --git a/pyconfig.h.in b/pyconfig.h.in
index fd4a124..8cb0cff 100644
--- a/pyconfig.h.in
+++ b/pyconfig.h.in
@@ -780,6 +780,9 @@
/* Define if you want to compile in rudimentary thread support */
#undef WITH_THREAD
+/* Define to profile with the Pentium timestamp counter */
+#undef WITH_TSC
+
/* Define to 1 if your processor stores words with the most significant byte
first (like Motorola and SPARC, unlike Intel and VAX). */
#undef WORDS_BIGENDIAN