From f30d60edbc0d5047a2fdd01e25868e4b814107e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= Date: Tue, 8 Jun 2004 08:17:44 +0000 Subject: Patch #510695: Add TSC profiling for the VM. --- Doc/lib/libsys.tex | 10 ++++ Include/pystate.h | 3 ++ Misc/NEWS | 3 ++ Python/ceval.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++- Python/pystate.c | 3 ++ Python/sysmodule.c | 30 +++++++++++ README | 1 + configure | 28 +++++++++- configure.in | 13 +++++ pyconfig.h.in | 3 ++ 10 files changed, 243 insertions(+), 3 deletions(-) diff --git a/Doc/lib/libsys.tex b/Doc/lib/libsys.tex index 3b81384..ceced16 100644 --- a/Doc/lib/libsys.tex +++ b/Doc/lib/libsys.tex @@ -475,6 +475,16 @@ else: debugged. \end{funcdesc} +\begin{funcdesc}{settscdump}{on_flag} + Activate dumping of VM measurements using the Pentium timestamp + counter, if \var{on_flag} is true. Deactivate these dumps if + \var{on_flag} is off. The function is available only if Python + was compiled with \code{--with-tsc}. To understand the output + of this dump, read \code{Python/ceval.c}. + + \versionadded{2.4} +\end{funcdesc} + \begin{datadesc}{stdin} \dataline{stdout} \dataline{stderr} diff --git a/Include/pystate.h b/Include/pystate.h index 4239108..1ff2626 100644 --- a/Include/pystate.h +++ b/Include/pystate.h @@ -29,6 +29,9 @@ typedef struct _is { #ifdef HAVE_DLOPEN int dlopenflags; #endif +#ifdef WITH_TSC + int tscdump; +#endif } PyInterpreterState; diff --git a/Misc/NEWS b/Misc/NEWS index cfd296e..a07d887 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -499,6 +499,9 @@ Tools/Demos Build ----- +- Profiling the VM using the Pentium TSC is now possible if Python + is configured --with-tsc. + - In order to find libraries, setup.py now also looks in /lib64, for use on AMD64. diff --git a/Python/ceval.c b/Python/ceval.c index 2d60014..a52168c 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -17,6 +17,26 @@ #include +#ifdef WITH_TSC +#include + +typedef unsigned long long uint64; + +void dump_tsc(int opcode, int ticked, uint64 inst0, uint64 inst1, + uint64 loop0, uint64 loop1, uint64 intr0, uint64 intr1) +{ + uint64 intr, inst, loop; + PyThreadState *tstate = PyThreadState_Get(); + if (!tstate->interp->tscdump) + return; + intr = intr1 - intr0; + inst = inst1 - inst0 - intr; + loop = loop1 - loop0 - intr; + fprintf(stderr, "opcode=%03d t=%d inst=%06lld loop=%06lld\n", + opcode, ticked, inst, loop); +} +#endif + /* Turn this on if your compiler chokes on the big switch: */ /* #define CASE_TOO_BIG 1 */ @@ -30,7 +50,11 @@ typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *); /* Forward declarations */ static PyObject *eval_frame(PyFrameObject *); +#ifdef WITH_TSC +static PyObject *call_function(PyObject ***, int, uint64*, uint64*); +#else static PyObject *call_function(PyObject ***, int); +#endif static PyObject *fast_function(PyObject *, PyObject ***, int, int, int); static PyObject *do_call(PyObject *, PyObject ***, int, int); static PyObject *ext_do_call(PyObject *, PyObject ***, int, int, int); @@ -485,6 +509,44 @@ eval_frame(PyFrameObject *f) #define GETITEM(v, i) PyTuple_GetItem((v), (i)) #endif +#ifdef WITH_TSC +/* Use Pentium timestamp counter to mark certain events: + inst0 -- beginning of switch statement for opcode dispatch + inst1 -- end of switch statement (may be skipped) + loop0 -- the top of the mainloop + loop1 -- place where control returns again to top of mainloop + (may be skipped) + intr1 -- beginning of long interruption + intr2 -- end of long interruption + + Many opcodes call out to helper C functions. In some cases, the + time in those functions should be counted towards the time for the + opcode, but not in all cases. For example, a CALL_FUNCTION opcode + calls another Python function; there's no point in charge all the + bytecode executed by the called function to the caller. + + It's hard to make a useful judgement statically. In the presence + of operator overloading, it's impossible to tell if a call will + execute new Python code or not. + + It's a case-by-case judgement. I'll use intr1 for the following + cases: + + EXEC_STMT + IMPORT_STAR + IMPORT_FROM + CALL_FUNCTION (and friends) + + */ + uint64 inst0, inst1, loop0, loop1, intr0 = 0, intr1 = 0; + int ticked = 0; + + rdtscll(inst0); + rdtscll(inst1); + rdtscll(loop0); + rdtscll(loop1); +#endif + /* Code access macros */ #define INSTR_OFFSET() (next_instr - first_instr) @@ -643,6 +705,23 @@ eval_frame(PyFrameObject *f) w = NULL; for (;;) { +#ifdef WITH_TSC + if (inst1 == 0) { + /* Almost surely, the opcode executed a break + or a continue, preventing inst1 from being set + on the way out of the loop. + */ + rdtscll(inst1); + loop1 = inst1; + } + dump_tsc(opcode, ticked, inst0, inst1, loop0, loop1, + intr0, intr1); + ticked = 0; + inst1 = 0; + intr0 = 0; + intr1 = 0; + rdtscll(loop0); +#endif assert(stack_pointer >= f->f_valuestack); /* else underflow */ assert(STACK_LEVEL() <= f->f_stacksize); /* else overflow */ @@ -662,6 +741,9 @@ eval_frame(PyFrameObject *f) } _Py_Ticker = _Py_CheckInterval; tstate->tick_counter++; +#ifdef WITH_TSC + ticked = 1; +#endif if (things_to_do) { if (Py_MakePendingCalls() < 0) { why = WHY_EXCEPTION; @@ -752,6 +834,9 @@ eval_frame(PyFrameObject *f) #endif /* Main switch on opcode */ +#ifdef WITH_TSC + rdtscll(inst0); +#endif switch (opcode) { @@ -1493,7 +1578,13 @@ eval_frame(PyFrameObject *f) v = SECOND(); u = THIRD(); STACKADJ(-3); +#ifdef WITH_TSC + rdtscll(intr0); +#endif err = exec_statement(f, u, v, w); +#ifdef WITH_TSC + rdtscll(intr1); +#endif Py_DECREF(u); Py_DECREF(v); Py_DECREF(w); @@ -1855,7 +1946,13 @@ eval_frame(PyFrameObject *f) x = NULL; break; } +#ifdef WITH_TSC + rdtscll(intr0); +#endif x = PyEval_CallObject(x, w); +#ifdef WITH_TSC + rdtscll(intr1); +#endif Py_DECREF(w); SET_TOP(x); if (x != NULL) continue; @@ -1869,7 +1966,13 @@ eval_frame(PyFrameObject *f) "no locals found during 'import *'"); break; } +#ifdef WITH_TSC + rdtscll(intr0); +#endif err = import_all_from(x, v); +#ifdef WITH_TSC + rdtscll(intr1); +#endif PyFrame_LocalsToFast(f, 0); Py_DECREF(v); if (err == 0) continue; @@ -1878,7 +1981,13 @@ eval_frame(PyFrameObject *f) case IMPORT_FROM: w = GETITEM(names, oparg); v = TOP(); +#ifdef WITH_TSC + rdtscll(intr0); +#endif x = import_from(v, w); +#ifdef WITH_TSC + rdtscll(intr1); +#endif PUSH(x); if (x != NULL) continue; break; @@ -1987,7 +2096,11 @@ eval_frame(PyFrameObject *f) case CALL_FUNCTION: PCALL(PCALL_ALL); +#ifdef WITH_TSC + x = call_function(&stack_pointer, oparg, &intr0, &intr1); +#else x = call_function(&stack_pointer, oparg); +#endif PUSH(x); if (x != NULL) continue; @@ -2022,7 +2135,13 @@ eval_frame(PyFrameObject *f) n++; } else Py_INCREF(func); +#ifdef WITH_TSC + rdtscll(intr0); +#endif x = ext_do_call(func, &stack_pointer, flags, na, nk); +#ifdef WITH_TSC + rdtscll(intr1); +#endif Py_DECREF(func); while (stack_pointer > pfunc) { @@ -2134,6 +2253,10 @@ eval_frame(PyFrameObject *f) on_error: +#ifdef WITH_TSC + rdtscll(inst1); +#endif + /* Quickly continue if no error occurred */ if (why == WHY_NOT) { @@ -2143,9 +2266,15 @@ eval_frame(PyFrameObject *f) if (PyErr_Occurred()) fprintf(stderr, "XXX undetected error\n"); - else + else { +#endif +#ifdef WITH_TSC + rdtscll(loop1); #endif continue; /* Normal, fast path */ +#ifdef CHECKEXC + } +#endif } why = WHY_EXCEPTION; x = Py_None; @@ -2260,6 +2389,9 @@ fast_block_end: if (why != WHY_NOT) break; +#ifdef WITH_TSC + rdtscll(loop1); +#endif } /* main loop */ @@ -3331,7 +3463,11 @@ if (tstate->use_tracing) { \ static PyObject * -call_function(PyObject ***pp_stack, int oparg) +call_function(PyObject ***pp_stack, int oparg +#ifdef WITH_TSC + , uint64* pintr0, uint64* pintr1 +#endif + ) { int na = oparg & 0xff; int nk = (oparg>>8) & 0xff; @@ -3374,7 +3510,13 @@ call_function(PyObject ***pp_stack, int oparg) PyObject *callargs; callargs = load_args(pp_stack, na); BEGIN_C_TRACE +#ifdef WITH_TSC + rdtscll(*pintr0); +#endif x = PyCFunction_Call(func, callargs, NULL); +#ifdef WITH_TSC + rdtscll(*pintr1); +#endif END_C_TRACE Py_XDECREF(callargs); } @@ -3393,10 +3535,16 @@ call_function(PyObject ***pp_stack, int oparg) n++; } else Py_INCREF(func); +#ifdef WITH_TSC + rdtscll(*pintr0); +#endif if (PyFunction_Check(func)) x = fast_function(func, pp_stack, n, na, nk); else x = do_call(func, pp_stack, na, nk); +#ifdef WITH_TSC + rdtscll(*pintr1); +#endif Py_DECREF(func); } diff --git a/Python/pystate.c b/Python/pystate.c index b0eb4c4..6a8beb9 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -59,6 +59,9 @@ PyInterpreterState_New(void) interp->dlopenflags = RTLD_LAZY; #endif #endif +#ifdef WITH_TSC + interp->tscdump = 0; +#endif HEAD_LOCK(); interp->next = interp_head; diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 09e411e..55c0425 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -442,6 +442,33 @@ PyDoc_STRVAR(getcheckinterval_doc, "getcheckinterval() -> current check interval; see setcheckinterval()." ); +#ifdef WITH_TSC +static PyObject * +sys_settscdump(PyObject *self, PyObject *args) +{ + int bool; + PyThreadState *tstate = PyThreadState_Get(); + + if (!PyArg_ParseTuple(args, "i:settscdump", &bool)) + return NULL; + if (bool) + tstate->interp->tscdump = 1; + else + tstate->interp->tscdump = 0; + Py_INCREF(Py_None); + return Py_None; + +} + +PyDoc_STRVAR(settscdump_doc, +"settscdump(bool)\n\ +\n\ +If true, tell the Python interpreter to dump VM measurements to\n\ +stderr. If false, turn off dump. The measurements are based on the\n\ +Pentium time-stamp counter." +); +#endif TSC + static PyObject * sys_setrecursionlimit(PyObject *self, PyObject *args) { @@ -743,6 +770,9 @@ static PyMethodDef sys_methods[] = { {"setprofile", sys_setprofile, METH_O, setprofile_doc}, {"setrecursionlimit", sys_setrecursionlimit, METH_VARARGS, setrecursionlimit_doc}, +#ifdef WITH_TSC + {"settscdump", sys_settscdump, METH_VARARGS, settscdump_doc}, +#endif {"settrace", sys_settrace, METH_O, settrace_doc}, {"call_tracing", sys_call_tracing, METH_VARARGS, call_tracing_doc}, {NULL, NULL} /* sentinel */ diff --git a/README b/README index 9997ea2..f020d3a 100644 --- a/README +++ b/README @@ -1026,6 +1026,7 @@ Modules/getpath.o. in files. Python code can open a file with open(file, 'U') to read it in universal newline mode. THIS OPTION IS UNSUPPORTED. +--with-tsc: Profile using the Pentium timestamping counter (TSC). Building for multiple architectures (using the VPATH feature) ------------------------------------------------------------- diff --git a/configure b/configure index 5619b2c..697f575 100755 --- a/configure +++ b/configure @@ -1,5 +1,5 @@ #! /bin/sh -# From configure.in Revision: 1.455 . +# From configure.in Revision: 1.456 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.57 for python 2.4. # @@ -867,6 +867,7 @@ Optional Packages: deprecated; use --with(out)-threads --with-pth use GNU pth threading libraries --with(out)-doc-strings disable/enable documentation strings + --with(out)-tsc enable/disable timestamp counter profile --with(out)-pymalloc disable/enable specialized mallocs --with-wctype-functions use wctype.h functions --with-fpectl enable SIGFPE catching @@ -12006,6 +12007,31 @@ echo "$as_me:$LINENO: result: $with_doc_strings" >&5 echo "${ECHO_T}$with_doc_strings" >&6 # Check for Python-specific malloc support +echo "$as_me:$LINENO: checking for --with-tsc" >&5 +echo $ECHO_N "checking for --with-tsc... $ECHO_C" >&6 + +# Check whether --with-tsc or --without-tsc was given. +if test "${with_tsc+set}" = set; then + withval="$with_tsc" + +if test "$withval" != no +then + +cat >>confdefs.h <<\_ACEOF +#define WITH_TSC 1 +_ACEOF + + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 +else echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi; + +# Check for Python-specific malloc support echo "$as_me:$LINENO: checking for --with-pymalloc" >&5 echo $ECHO_N "checking for --with-pymalloc... $ECHO_C" >&6 diff --git a/configure.in b/configure.in index cd84ea9..cff0ed3 100644 --- a/configure.in +++ b/configure.in @@ -1929,6 +1929,19 @@ fi AC_MSG_RESULT($with_doc_strings) # Check for Python-specific malloc support +AC_MSG_CHECKING(for --with-tsc) +AC_ARG_WITH(tsc, +[ --with(out)-tsc enable/disable timestamp counter profile], [ +if test "$withval" != no +then + AC_DEFINE(WITH_TSC, 1, + [Define to profile with the Pentium timestamp counter]) + AC_MSG_RESULT(yes) +else AC_MSG_RESULT(no) +fi], +[AC_MSG_RESULT(no)]) + +# Check for Python-specific malloc support AC_MSG_CHECKING(for --with-pymalloc) AC_ARG_WITH(pymalloc, AC_HELP_STRING(--with(out)-pymalloc, disable/enable specialized mallocs)) diff --git a/pyconfig.h.in b/pyconfig.h.in index fd4a124..8cb0cff 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -780,6 +780,9 @@ /* Define if you want to compile in rudimentary thread support */ #undef WITH_THREAD +/* Define to profile with the Pentium timestamp counter */ +#undef WITH_TSC + /* Define to 1 if your processor stores words with the most significant byte first (like Motorola and SPARC, unlike Intel and VAX). */ #undef WORDS_BIGENDIAN -- cgit v0.12