From a871ef2b3e924f058ec1b0aed7d4c83a546414b7 Mon Sep 17 00:00:00 2001 From: Armin Rigo Date: Wed, 8 Feb 2006 12:53:56 +0000 Subject: Added the cProfile module. Based on lsprof (patch #1212837) by Brett Rosen and Ted Czotter. With further editing by Michael Hudson and myself. History in svn repo: http://codespeak.net/svn/user/arigo/hack/misc/lsprof * Module/_lsprof.c is the internal C module, Lib/cProfile.py a wrapper. * pstats.py updated to display cProfile's caller/callee timings if available. * setup.py and NEWS updated. * documentation updates in the profiler section: - explain the differences between the three profilers that we have now - profile and cProfile can use a unified documentation, like (c)Pickle - mention that hotshot is "for specialized usage" now - removed references to the "old profiler" that no longer exists * test updates: - extended test_profile to cover delicate cases like recursion - added tests for the caller/callee displays - added test_cProfile, performing the same tests for cProfile * TO-DO: - cProfile gives a nicer name to built-in, particularly built-in methods, which could be backported to profile. - not tested on Windows recently! --- Doc/lib/lib.tex | 2 +- Doc/lib/libhotshot.tex | 11 + Doc/lib/libprofile.tex | 153 ++++++-- Lib/cProfile.py | 190 +++++++++ Lib/pstats.py | 46 ++- Lib/test/output/test_cProfile | 79 ++++ Lib/test/output/test_profile | 78 +++- Lib/test/test_cProfile.py | 123 ++++++ Lib/test/test_profile.py | 104 ++--- Misc/NEWS | 5 + Modules/_lsprof.c | 867 ++++++++++++++++++++++++++++++++++++++++++ Modules/rotatingtree.c | 121 ++++++ Modules/rotatingtree.h | 27 ++ setup.py | 4 +- 14 files changed, 1701 insertions(+), 109 deletions(-) create mode 100755 Lib/cProfile.py create mode 100644 Lib/test/output/test_cProfile create mode 100644 Lib/test/test_cProfile.py create mode 100644 Modules/_lsprof.c create mode 100644 Modules/rotatingtree.c create mode 100644 Modules/rotatingtree.h diff --git a/Doc/lib/lib.tex b/Doc/lib/lib.tex index 9e4dcb5..44e7c96 100644 --- a/Doc/lib/lib.tex +++ b/Doc/lib/lib.tex @@ -358,7 +358,7 @@ and how to embed it in other applications. \input{libpdb} % The Python Debugger \input{libprofile} % The Python Profiler -\input{libhotshot} % New profiler +\input{libhotshot} % unmaintained C profiler \input{libtimeit} diff --git a/Doc/lib/libhotshot.tex b/Doc/lib/libhotshot.tex index 33d6a06..98e0b6d 100644 --- a/Doc/lib/libhotshot.tex +++ b/Doc/lib/libhotshot.tex @@ -14,6 +14,17 @@ Hotshot is a replacement for the existing \refmodule{profile} module. As it's written mostly in C, it should result in a much smaller performance impact than the existing \refmodule{profile} module. +\begin{notice}[note] + The \module{hotshot} module focuses on minimizing the overhead + while profiling, at the expense of long data post-processing times. + For common usages it is recommended to use \module{cProfile} instead. + \module{hotshot} is not maintained and might be removed from the + standard library in the future. +\end{notice} + +\versionchanged[the results should be more meaningful than in the +past: the timing core contained a critical bug]{2.5} + \begin{notice}[warning] The \module{hotshot} profiler does not yet work well with threads. It is useful to use an unthreaded script to run the profiler over diff --git a/Doc/lib/libprofile.tex b/Doc/lib/libprofile.tex index ddbae73..afc9694 100644 --- a/Doc/lib/libprofile.tex +++ b/Doc/lib/libprofile.tex @@ -1,4 +1,4 @@ -\chapter{The Python Profiler \label{profile}} +\chapter{The Python Profilers \label{profile}} \sectionauthor{James Roskind}{} @@ -6,8 +6,9 @@ Copyright \copyright{} 1994, by InfoSeek Corporation, all rights reserved. \index{InfoSeek Corporation} Written by James Roskind.\footnote{ - Updated and converted to \LaTeX\ by Guido van Rossum. The references to - the old profiler are left in the text, although it no longer exists.} + Updated and converted to \LaTeX\ by Guido van Rossum. + Further updated by Armin Rigo to integrate the documentation for the new + \module{cProfile} module of Python 2.5.} Permission to use, copy, modify, and distribute this Python software and its associated documentation for any purpose (subject to the @@ -41,7 +42,7 @@ ways at times. Please send suggestions for improvements to: I'd appreciate the feedback. -\section{Introduction to the profiler} +\section{Introduction to the profilers} \nodename{Profiler Introduction} A \dfn{profiler} is a program that describes the run time performance @@ -54,6 +55,31 @@ examine the results of a profile operation. \index{deterministic profiling} \index{profiling, deterministic} +The Python standard library provides three different profilers: + +\begin{enumerate} +\item \module{profile}, a pure Python module, described in the sequel. + Copyright \copyright{} 1994, by InfoSeek Corporation. + \versionchanged[also reports the time spent in calls to built-in + functions and methods]{2.4} + +\item \module{cProfile}, a module written in C, with a reasonable + overhead that makes it suitable for profiling long-running programs. + Based on \module{lsprof}, contributed by Brett Rosen and Ted Czotter. + \versionadded{2.5} + +\item \module{hotshot}, a C module focusing on minimizing the overhead + while profiling, at the expense of long data post-processing times. + \versionchanged[the results should be more meaningful than in the + past: the timing core contained a critical bug]{2.5} +\end{enumerate} + +The \module{profile} and \module{cProfile} modules export the same +interface, so they are mostly interchangeables; \module{cProfile} has a +much lower overhead but is not so far as well-tested and might not be +available on all systems. \module{cProfile} is really a compatibility +layer on top of the internal \module{_lsprof} module. The +\module{hotshot} module is reserved to specialized usages. %\section{How Is This Profiler Different From The Old Profiler?} %\nodename{Profiler Changes} @@ -108,10 +134,13 @@ To profile an application with a main entry point of \function{foo()}, you would add the following to your module: \begin{verbatim} -import profile -profile.run('foo()') +import cProfile +cProfile.run('foo()') \end{verbatim} +(Use \module{profile} instead of \module{cProfile} if the latter is not +available on your system.) + The above action would cause \function{foo()} to be run, and a series of informative lines (the profile) to be printed. The above approach is most useful when working with the interpreter. If you would like to @@ -120,21 +149,21 @@ can supply a file name as the second argument to the \function{run()} function: \begin{verbatim} -import profile -profile.run('foo()', 'fooprof') +import cProfile +cProfile.run('foo()', 'fooprof') \end{verbatim} -The file \file{profile.py} can also be invoked as +The file \file{cProfile.py} can also be invoked as a script to profile another script. For example: \begin{verbatim} -python -m profile myscript.py +python -m cProfile myscript.py \end{verbatim} -\file{profile.py} accepts two optional arguments on the command line: +\file{cProfile.py} accepts two optional arguments on the command line: \begin{verbatim} -profile.py [-o output_file] [-s sort_order] +cProfile.py [-o output_file] [-s sort_order] \end{verbatim} \programopt{-s} only applies to standard output (\programopt{-o} is @@ -153,7 +182,7 @@ p = pstats.Stats('fooprof') The class \class{Stats} (the above code just created an instance of this class) has a variety of methods for manipulating and printing the data that was just read into \code{p}. When you ran -\function{profile.run()} above, what was printed was the result of three +\function{cProfile.run()} above, what was printed was the result of three method calls: \begin{verbatim} @@ -162,8 +191,9 @@ p.strip_dirs().sort_stats(-1).print_stats() The first method removed the extraneous path from all the module names. The second method sorted all the entries according to the -standard module/line/name string that is printed (this is to comply -with the semantics of the old profiler). The third method printed out +standard module/line/name string that is printed. +%(this is to comply with the semantics of the old profiler). +The third method printed out all the statistics. You might try the following sort calls: \begin{verbatim} @@ -268,15 +298,17 @@ times in this profiler allows statistics for recursive implementations of algorithms to be directly compared to iterative implementations. -\section{Reference Manual} +\section{Reference Manual -- \module{profile} and \module{cProfile}} \declaremodule{standard}{profile} +\declaremodule{standard}{cProfile} \modulesynopsis{Python profiler} The primary entry point for the profiler is the global function -\function{profile.run()}. It is typically used to create any profile +\function{profile.run()} (resp. \function{cProfile.run()}). +It is typically used to create any profile information. The reports are formatted and printed using methods of the class \class{pstats.Stats}. The following is a description of all of these standard entry points and functions. For a more in-depth @@ -296,7 +328,6 @@ standard name string (file/line/function-name) that is presented in each line. The following is a typical output from such a call: \begin{verbatim} - main() 2706 function calls (2004 primitive calls) in 4.504 CPU seconds Ordered by: standard name @@ -307,9 +338,7 @@ ncalls tottime percall cumtime percall filename:lineno(function) ... \end{verbatim} -The first line indicates that this profile was generated by the call:\\ -\code{profile.run('main()')}, and hence the exec'ed string is -\code{'main()'}. The second line indicates that 2706 calls were +The first line indicates that 2706 calls were monitored. Of those calls, 2004 were \dfn{primitive}. We define \dfn{primitive} to mean that the call was not induced via recursion. The next line: \code{Ordered by:\ standard name}, indicates that @@ -350,7 +379,7 @@ figure is printed. \end{funcdesc} \begin{funcdesc}{runctx}{command, globals, locals\optional{, filename}} -This function is similar to \function{profile.run()}, with added +This function is similar to \function{run()}, with added arguments to supply the globals and locals dictionaries for the \var{command} string. \end{funcdesc} @@ -368,10 +397,12 @@ from a \var{filename} (or set of filenames). \class{Stats} objects are manipulated by methods, in order to print useful reports. The file selected by the above constructor must have been created by -the corresponding version of \module{profile}. To be specific, there is +the corresponding version of \module{profile} or \module{cProfile}. +To be specific, there is \emph{no} file compatibility guaranteed with future versions of this profiler, and there is no compatibility with files produced by other -profilers (such as the old system profiler). +profilers. +%(such as the old system profiler). If several files are provided, all the statistics for identical functions will be coalesced, so that an overall view of several @@ -403,7 +434,8 @@ statistics for these two entries are accumulated into a single entry. This method of the \class{Stats} class accumulates additional profiling information into the current profiling object. Its arguments should refer to filenames created by the corresponding -version of \function{profile.run()}. Statistics for identically named +version of \function{profile.run()} or \function{cProfile.run()}. +Statistics for identically named (re: file, line, name) functions are automatically accumulated into single function statistics. \end{methoddesc} @@ -412,7 +444,8 @@ single function statistics. Save the data loaded into the \class{Stats} object to a file named \var{filename}. The file is created if it does not exist, and is overwritten if it already exists. This is equivalent to the method of -the same name on the \class{profile.Profile} class. +the same name on the \class{profile.Profile} and +\class{cProfile.Profile} classes. \versionadded{2.3} \end{methoddesc} @@ -456,7 +489,8 @@ string order 20, 3 and 40. In contrast, \code{'nfl'} does a numeric compare of the line numbers. In fact, \code{sort_stats('nfl')} is the same as \code{sort_stats('name', 'file', 'line')}. -For compatibility with the old profiler, the numeric arguments +%For compatibility with the old profiler, +For backward-compatibility reasons, the numeric arguments \code{-1}, \code{0}, \code{1}, and \code{2} are permitted. They are interpreted as \code{'stdname'}, \code{'calls'}, \code{'time'}, and \code{'cumulative'} respectively. If this old style format (numeric) @@ -467,10 +501,10 @@ additional arguments will be silently ignored. \begin{methoddesc}[Stats]{reverse_order}{} This method for the \class{Stats} class reverses the ordering of the basic -list within the object. This method is provided primarily for -compatibility with the old profiler. Its utility is questionable -now that ascending vs descending order is properly selected based on -the sort key of choice. +list within the object. %This method is provided primarily for +%compatibility with the old profiler. +Note that by default ascending vs descending order is properly selected +based on the sort key of choice. \end{methoddesc} \begin{methoddesc}[Stats]{print_stats}{\optional{restriction, \moreargs}} @@ -512,10 +546,21 @@ and then proceed to only print the first 10\% of them. This method for the \class{Stats} class prints a list of all functions that called each function in the profiled database. The ordering is identical to that provided by \method{print_stats()}, and the definition -of the restricting argument is also identical. For convenience, a -number is shown in parentheses after each caller to show how many -times this specific call was made. A second non-parenthesized number -is the cumulative time spent in the function at the right. +of the restricting argument is also identical. Each caller is reported on +its own line. The format differs slightly depending on the profiler that +produced the stats: + +\begin{itemize} +\item With \module{profile}, a number is shown in parentheses after each + caller to show how many times this specific call was made. For + convenience, a second non-parenthesized number repeats the cumulative + time spent in the function at the right. + +\item With \module{cProfile}, each caller is preceeded by three numbers: + the number of times this specific call was made, and the total and + cumulative times spent in the current function while it was invoked by + this specific caller. +\end{itemize} \end{methoddesc} \begin{methoddesc}[Stats]{print_callees}{\optional{restriction, \moreargs}} @@ -546,7 +591,10 @@ is once again executing. As a result, functions that are called many times, or call many functions, will typically accumulate this error. The error that accumulates in this fashion is typically less than the accuracy of the clock (less than one clock tick), but it -\emph{can} accumulate and become very significant. This profiler +\emph{can} accumulate and become very significant. + +The problem is more important with \module{profile} than with the +lower-overhead \module{cProfile}. For this reason, \module{profile} provides a means of calibrating itself for a given platform so that this error can be probabilistically (on the average) removed. After the profiler is calibrated, it will be more accurate (in a least @@ -560,7 +608,7 @@ calibration. \section{Calibration \label{profile-calibration}} -The profiler subtracts a constant from each +The profiler of the \module{profile} module subtracts a constant from each event handling time to compensate for the overhead of calling the time function, and socking away the results. By default, the constant is 0. The following procedure can @@ -614,11 +662,12 @@ statistics. \section{Extensions --- Deriving Better Profilers} \nodename{Profiler Extensions} -The \class{Profile} class of module \module{profile} was written so that +The \class{Profile} class of both modules, \module{profile} and +\module{cProfile}, were written so that derived classes could be developed to extend the profiler. The details are not described here, as doing this successfully requires an expert understanding of how the \class{Profile} class works internally. Study -the source code of module \module{profile} carefully if you want to +the source code of the module carefully if you want to pursue this. If all you want to do is change how current time is determined (for @@ -630,8 +679,11 @@ constructor: pr = profile.Profile(your_time_func) \end{verbatim} -The resulting profiler will then call \code{your_time_func()}. -The function should return a single number, or a list of +The resulting profiler will then call \function{your_time_func()}. + +\begin{description} +\item[\class{profile.Profile}] +\function{your_time_func()} should return a single number, or a list of numbers whose sum is the current time (like what \function{os.times()} returns). If the function returns a single time number, or the list of returned numbers has length 2, then you will get an especially fast @@ -646,3 +698,22 @@ you want to substitute a better timer in the cleanest fashion, derive a class and hardwire a replacement dispatch method that best handles your timer call, along with the appropriate calibration constant. + +\item[\class{cProfile.Profile}] +\function{your_time_func()} should return a single number. If it returns +plain integers, you can also invoke the class constructor with a second +argument specifying the real duration of one unit of time. For example, +if \function{your_integer_time_func()} returns times measured in thousands +of seconds, you would constuct the \class{Profile} instance as follows: + +\begin{verbatim} +pr = profile.Profile(your_integer_time_func, 0.001) +\end{verbatim} + +As the \module{cProfile.Profile} class cannot be calibrated, custom +timer functions should be used with care and should be as fast as +possible. For the best results with a custom timer, it might be +necessary to hard-code it in the C source of the internal +\module{_lsprof} module. + +\end{description} diff --git a/Lib/cProfile.py b/Lib/cProfile.py new file mode 100755 index 0000000..19d5804 --- /dev/null +++ b/Lib/cProfile.py @@ -0,0 +1,190 @@ +#! /usr/bin/env python + +"""Python interface for the 'lsprof' profiler. + Compatible with the 'profile' module. +""" + +__all__ = ["run", "runctx", "help", "Profile"] + +import _lsprof + +# ____________________________________________________________ +# Simple interface + +def run(statement, filename=None, sort=-1): + """Run statement under profiler optionally saving results in filename + + This function takes a single argument that can be passed to the + "exec" statement, and an optional file name. In all cases this + routine attempts to "exec" its first argument and gather profiling + statistics from the execution. If no file name is present, then this + function automatically prints a simple profiling report, sorted by the + standard name string (file/line/function-name) that is presented in + each line. + """ + prof = Profile() + result = None + try: + try: + prof = prof.run(statement) + except SystemExit: + pass + finally: + if filename is not None: + prof.dump_stats(filename) + else: + result = prof.print_stats(sort) + return result + +def runctx(statement, globals, locals, filename=None): + """Run statement under profiler, supplying your own globals and locals, + optionally saving results in filename. + + statement and filename have the same semantics as profile.run + """ + prof = Profile() + result = None + try: + try: + prof = prof.runctx(statement, globals, locals) + except SystemExit: + pass + finally: + if filename is not None: + prof.dump_stats(filename) + else: + result = prof.print_stats() + return result + +# Backwards compatibility. +def help(): + print "Documentation for the profile/cProfile modules can be found " + print "in the Python Library Reference, section 'The Python Profiler'." + +# ____________________________________________________________ + +class Profile(_lsprof.Profiler): + """Profile(custom_timer=None, time_unit=None, subcalls=True, builtins=True) + + Builds a profiler object using the specified timer function. + The default timer is a fast built-in one based on real time. + For custom timer functions returning integers, time_unit can + be a float specifying a scale (i.e. how long each integer unit + is, in seconds). + """ + + # Most of the functionality is in the base class. + # This subclass only adds convenient and backward-compatible methods. + + def print_stats(self, sort=-1): + import pstats + pstats.Stats(self).strip_dirs().sort_stats(sort).print_stats() + + def dump_stats(self, file): + import marshal + f = open(file, 'wb') + self.create_stats() + marshal.dump(self.stats, f) + f.close() + + def create_stats(self): + self.disable() + self.snapshot_stats() + + def snapshot_stats(self): + entries = self.getstats() + self.stats = {} + callersdicts = {} + # call information + for entry in entries: + func = label(entry.code) + nc = entry.callcount # ncalls column of pstats (before '/') + cc = nc - entry.reccallcount # ncalls column of pstats (after '/') + tt = entry.inlinetime # tottime column of pstats + ct = entry.totaltime # cumtime column of pstats + callers = {} + callersdicts[id(entry.code)] = callers + self.stats[func] = cc, nc, tt, ct, callers + # subcall information + for entry in entries: + if entry.calls: + func = label(entry.code) + for subentry in entry.calls: + try: + callers = callersdicts[id(subentry.code)] + except KeyError: + continue + nc = subentry.callcount + cc = nc - subentry.reccallcount + tt = subentry.inlinetime + ct = subentry.totaltime + if func in callers: + prev = callers[func] + nc += prev[0] + cc += prev[1] + tt += prev[2] + ct += prev[3] + callers[func] = nc, cc, tt, ct + + # The following two methods can be called by clients to use + # a profiler to profile a statement, given as a string. + + def run(self, cmd): + import __main__ + dict = __main__.__dict__ + return self.runctx(cmd, dict, dict) + + def runctx(self, cmd, globals, locals): + self.enable() + try: + exec cmd in globals, locals + finally: + self.disable() + return self + + # This method is more useful to profile a single function call. + def runcall(self, func, *args, **kw): + self.enable() + try: + return func(*args, **kw) + finally: + self.disable() + +# ____________________________________________________________ + +def label(code): + if isinstance(code, str): + return ('~', 0, code) # built-in functions ('~' sorts at the end) + else: + return (code.co_filename, code.co_firstlineno, code.co_name) + +# ____________________________________________________________ + +def main(): + import os, sys + from optparse import OptionParser + usage = "cProfile.py [-o output_file_path] [-s sort] scriptfile [arg] ..." + parser = OptionParser(usage=usage) + parser.allow_interspersed_args = False + parser.add_option('-o', '--outfile', dest="outfile", + help="Save stats to ", default=None) + parser.add_option('-s', '--sort', dest="sort", + help="Sort order when printing to stdout, based on pstats.Stats class", default=-1) + + if not sys.argv[1:]: + parser.print_usage() + sys.exit(2) + + (options, args) = parser.parse_args() + sys.argv[:] = args + + if (len(sys.argv) > 0): + sys.path.insert(0, os.path.dirname(sys.argv[0])) + run('execfile(%r)' % (sys.argv[0],), options.outfile, options.sort) + else: + parser.print_usage() + return parser + +# When invoked as main program, invoke the profiler on a script +if __name__ == '__main__': + main() diff --git a/Lib/pstats.py b/Lib/pstats.py index 98f351f..930cc6d 100644 --- a/Lib/pstats.py +++ b/Lib/pstats.py @@ -371,27 +371,47 @@ class Stats: self.print_call_heading(width, "was called by...") for func in list: cc, nc, tt, ct, callers = self.stats[func] - self.print_call_line(width, func, callers) + self.print_call_line(width, func, callers, "<-") print print return self def print_call_heading(self, name_size, column_title): print "Function ".ljust(name_size) + column_title - - def print_call_line(self, name_size, source, call_dict): - print func_std_string(source).ljust(name_size), + # print sub-header only if we have new-style callers + subheader = False + for cc, nc, tt, ct, callers in self.stats.itervalues(): + if callers: + value = callers.itervalues().next() + subheader = isinstance(value, tuple) + break + if subheader: + print " "*name_size + " ncalls tottime cumtime" + + def print_call_line(self, name_size, source, call_dict, arrow="->"): + print func_std_string(source).ljust(name_size) + arrow, if not call_dict: - print "--" + print return clist = call_dict.keys() clist.sort() - name_size = name_size + 1 indent = "" for func in clist: name = func_std_string(func) - print indent*name_size + name + '(%r)' % (call_dict[func],), \ - f8(self.stats[func][3]) + value = call_dict[func] + if isinstance(value, tuple): + nc, cc, tt, ct = value + if nc != cc: + substats = '%d/%d' % (nc, cc) + else: + substats = '%d' % (nc,) + substats = '%s %s %s %s' % (substats.rjust(7+2*len(indent)), + f8(tt), f8(ct), name) + left_width = name_size + 1 + else: + substats = '%s(%r) %s' % (name, value, f8(self.stats[func][3])) + left_width = name_size + 3 + print indent*left_width + substats indent = " " def print_title(self): @@ -448,7 +468,15 @@ def func_get_function_name(func): return func[2] def func_std_string(func_name): # match what old profile produced - return "%s:%d(%s)" % func_name + if func_name[:2] == ('~', 0): + # special case for built-in functions + name = func_name[2] + if name.startswith('<') and name.endswith('>'): + return '{%s}' % name[1:-1] + else: + return name + else: + return "%s:%d(%s)" % func_name #************************************************************************** # The following functions combine statists for pairs functions. diff --git a/Lib/test/output/test_cProfile b/Lib/test/output/test_cProfile new file mode 100644 index 0000000..fff3568 --- /dev/null +++ b/Lib/test/output/test_cProfile @@ -0,0 +1,79 @@ +test_cProfile + 126 function calls (106 primitive calls) in 1.000 CPU seconds + + Ordered by: standard name + + ncalls tottime percall cumtime percall filename:lineno(function) + 1 0.000 0.000 1.000 1.000 :1() + 8 0.064 0.008 0.080 0.010 test_cProfile.py:103(subhelper) + 28 0.028 0.001 0.028 0.001 test_cProfile.py:115(__getattr__) + 1 0.270 0.270 1.000 1.000 test_cProfile.py:30(testfunc) + 23/3 0.150 0.007 0.170 0.057 test_cProfile.py:40(factorial) + 20 0.020 0.001 0.020 0.001 test_cProfile.py:53(mul) + 2 0.040 0.020 0.600 0.300 test_cProfile.py:60(helper) + 4 0.116 0.029 0.120 0.030 test_cProfile.py:78(helper1) + 2 0.000 0.000 0.140 0.070 test_cProfile.py:89(helper2_indirect) + 8 0.312 0.039 0.400 0.050 test_cProfile.py:93(helper2) + 12 0.000 0.000 0.012 0.001 {hasattr} + 4 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects} + 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} + 8 0.000 0.000 0.000 0.000 {range} + 4 0.000 0.000 0.000 0.000 {sys.exc_info} + + + Ordered by: standard name + +Function called... + ncalls tottime cumtime +:1() -> 1 0.270 1.000 test_cProfile.py:30(testfunc) +test_cProfile.py:103(subhelper) -> 16 0.016 0.016 test_cProfile.py:115(__getattr__) + 8 0.000 0.000 {range} +test_cProfile.py:115(__getattr__) -> +test_cProfile.py:30(testfunc) -> 1 0.014 0.130 test_cProfile.py:40(factorial) + 2 0.040 0.600 test_cProfile.py:60(helper) +test_cProfile.py:40(factorial) -> 20/3 0.130 0.147 test_cProfile.py:40(factorial) + 20 0.020 0.020 test_cProfile.py:53(mul) +test_cProfile.py:53(mul) -> +test_cProfile.py:60(helper) -> 4 0.116 0.120 test_cProfile.py:78(helper1) + 2 0.000 0.140 test_cProfile.py:89(helper2_indirect) + 6 0.234 0.300 test_cProfile.py:93(helper2) +test_cProfile.py:78(helper1) -> 4 0.000 0.004 {hasattr} + 4 0.000 0.000 {method 'append' of 'list' objects} + 4 0.000 0.000 {sys.exc_info} +test_cProfile.py:89(helper2_indirect) -> 2 0.006 0.040 test_cProfile.py:40(factorial) + 2 0.078 0.100 test_cProfile.py:93(helper2) +test_cProfile.py:93(helper2) -> 8 0.064 0.080 test_cProfile.py:103(subhelper) + 8 0.000 0.008 {hasattr} +{hasattr} -> 12 0.012 0.012 test_cProfile.py:115(__getattr__) +{method 'append' of 'list' objects} -> +{method 'disable' of '_lsprof.Profiler' objects} -> +{range} -> +{sys.exc_info} -> + + + Ordered by: standard name + +Function was called by... + ncalls tottime cumtime +:1() <- +test_cProfile.py:103(subhelper) <- 8 0.064 0.080 test_cProfile.py:93(helper2) +test_cProfile.py:115(__getattr__) <- 16 0.016 0.016 test_cProfile.py:103(subhelper) + 12 0.012 0.012 {hasattr} +test_cProfile.py:30(testfunc) <- 1 0.270 1.000 :1() +test_cProfile.py:40(factorial) <- 1 0.014 0.130 test_cProfile.py:30(testfunc) + 20/3 0.130 0.147 test_cProfile.py:40(factorial) + 2 0.006 0.040 test_cProfile.py:89(helper2_indirect) +test_cProfile.py:53(mul) <- 20 0.020 0.020 test_cProfile.py:40(factorial) +test_cProfile.py:60(helper) <- 2 0.040 0.600 test_cProfile.py:30(testfunc) +test_cProfile.py:78(helper1) <- 4 0.116 0.120 test_cProfile.py:60(helper) +test_cProfile.py:89(helper2_indirect) <- 2 0.000 0.140 test_cProfile.py:60(helper) +test_cProfile.py:93(helper2) <- 6 0.234 0.300 test_cProfile.py:60(helper) + 2 0.078 0.100 test_cProfile.py:89(helper2_indirect) +{hasattr} <- 4 0.000 0.004 test_cProfile.py:78(helper1) + 8 0.000 0.008 test_cProfile.py:93(helper2) +{method 'append' of 'list' objects} <- 4 0.000 0.000 test_cProfile.py:78(helper1) +{method 'disable' of '_lsprof.Profiler' objects} <- +{range} <- 8 0.000 0.000 test_cProfile.py:103(subhelper) +{sys.exc_info} <- 4 0.000 0.000 test_cProfile.py:78(helper1) + + diff --git a/Lib/test/output/test_profile b/Lib/test/output/test_profile index e745075..96bd77f 100644 --- a/Lib/test/output/test_profile +++ b/Lib/test/output/test_profile @@ -1,20 +1,84 @@ test_profile - 74 function calls in 1.000 CPU seconds + 127 function calls (107 primitive calls) in 1.000 CPU seconds Ordered by: standard name ncalls tottime percall cumtime percall filename:lineno(function) + 4 0.000 0.000 0.000 0.000 :0(append) + 4 0.000 0.000 0.000 0.000 :0(exc_info) 12 0.000 0.000 0.012 0.001 :0(hasattr) 8 0.000 0.000 0.000 0.000 :0(range) 1 0.000 0.000 0.000 0.000 :0(setprofile) 1 0.000 0.000 1.000 1.000 :1() 0 0.000 0.000 profile:0(profiler) 1 0.000 0.000 1.000 1.000 profile:0(testfunc()) - 1 0.400 0.400 1.000 1.000 test_profile.py:23(testfunc) - 2 0.080 0.040 0.600 0.300 test_profile.py:32(helper) - 4 0.116 0.029 0.120 0.030 test_profile.py:50(helper1) - 8 0.312 0.039 0.400 0.050 test_profile.py:58(helper2) - 8 0.064 0.008 0.080 0.010 test_profile.py:68(subhelper) - 28 0.028 0.001 0.028 0.001 test_profile.py:80(__getattr__) + 8 0.064 0.008 0.080 0.010 test_profile.py:103(subhelper) + 28 0.028 0.001 0.028 0.001 test_profile.py:115(__getattr__) + 1 0.270 0.270 1.000 1.000 test_profile.py:30(testfunc) + 23/3 0.150 0.007 0.170 0.057 test_profile.py:40(factorial) + 20 0.020 0.001 0.020 0.001 test_profile.py:53(mul) + 2 0.040 0.020 0.600 0.300 test_profile.py:60(helper) + 4 0.116 0.029 0.120 0.030 test_profile.py:78(helper1) + 2 0.000 0.000 0.140 0.070 test_profile.py:89(helper2_indirect) + 8 0.312 0.039 0.400 0.050 test_profile.py:93(helper2) + + + Ordered by: standard name + +Function called... +:0(append) -> +:0(exc_info) -> +:0(hasattr) -> test_profile.py:115(__getattr__)(12) 0.028 +:0(range) -> +:0(setprofile) -> +:1() -> test_profile.py:30(testfunc)(1) 1.000 +profile:0(profiler) -> profile:0(testfunc())(1) 1.000 +profile:0(testfunc()) -> :0(setprofile)(1) 0.000 + :1()(1) 1.000 +test_profile.py:103(subhelper) -> :0(range)(8) 0.000 + test_profile.py:115(__getattr__)(16) 0.028 +test_profile.py:115(__getattr__) -> +test_profile.py:30(testfunc) -> test_profile.py:40(factorial)(1) 0.170 + test_profile.py:60(helper)(2) 0.600 +test_profile.py:40(factorial) -> test_profile.py:40(factorial)(20) 0.170 + test_profile.py:53(mul)(20) 0.020 +test_profile.py:53(mul) -> +test_profile.py:60(helper) -> test_profile.py:78(helper1)(4) 0.120 + test_profile.py:89(helper2_indirect)(2) 0.140 + test_profile.py:93(helper2)(6) 0.400 +test_profile.py:78(helper1) -> :0(append)(4) 0.000 + :0(exc_info)(4) 0.000 + :0(hasattr)(4) 0.012 +test_profile.py:89(helper2_indirect) -> test_profile.py:40(factorial)(2) 0.170 + test_profile.py:93(helper2)(2) 0.400 +test_profile.py:93(helper2) -> :0(hasattr)(8) 0.012 + test_profile.py:103(subhelper)(8) 0.080 + + + Ordered by: standard name + +Function was called by... +:0(append) <- test_profile.py:78(helper1)(4) 0.120 +:0(exc_info) <- test_profile.py:78(helper1)(4) 0.120 +:0(hasattr) <- test_profile.py:78(helper1)(4) 0.120 + test_profile.py:93(helper2)(8) 0.400 +:0(range) <- test_profile.py:103(subhelper)(8) 0.080 +:0(setprofile) <- profile:0(testfunc())(1) 1.000 +:1() <- profile:0(testfunc())(1) 1.000 +profile:0(profiler) <- +profile:0(testfunc()) <- profile:0(profiler)(1) 0.000 +test_profile.py:103(subhelper) <- test_profile.py:93(helper2)(8) 0.400 +test_profile.py:115(__getattr__) <- :0(hasattr)(12) 0.012 + test_profile.py:103(subhelper)(16) 0.080 +test_profile.py:30(testfunc) <- :1()(1) 1.000 +test_profile.py:40(factorial) <- test_profile.py:30(testfunc)(1) 1.000 + test_profile.py:40(factorial)(20) 0.170 + test_profile.py:89(helper2_indirect)(2) 0.140 +test_profile.py:53(mul) <- test_profile.py:40(factorial)(20) 0.170 +test_profile.py:60(helper) <- test_profile.py:30(testfunc)(2) 1.000 +test_profile.py:78(helper1) <- test_profile.py:60(helper)(4) 0.600 +test_profile.py:89(helper2_indirect) <- test_profile.py:60(helper)(2) 0.600 +test_profile.py:93(helper2) <- test_profile.py:60(helper)(6) 0.600 + test_profile.py:89(helper2_indirect)(2) 0.140 diff --git a/Lib/test/test_cProfile.py b/Lib/test/test_cProfile.py new file mode 100644 index 0000000..07b2a9b --- /dev/null +++ b/Lib/test/test_cProfile.py @@ -0,0 +1,123 @@ +"""Test suite for the cProfile module.""" + +import cProfile, pstats, sys + +# In order to have reproducible time, we simulate a timer in the global +# variable 'ticks', which represents simulated time in milliseconds. +# (We can't use a helper function increment the timer since it would be +# included in the profile and would appear to consume all the time.) +ticks = 0 + +# IMPORTANT: this is an output test. *ALL* NUMBERS in the expected +# output are relevant. If you change the formatting of pstats, +# please don't just regenerate output/test_cProfile without checking +# very carefully that not a single number has changed. + +def test_main(): + global ticks + ticks = 42000 + prof = cProfile.Profile(timer, 0.001) + prof.runctx("testfunc()", globals(), locals()) + assert ticks == 43000, ticks + st = pstats.Stats(prof) + st.strip_dirs().sort_stats('stdname').print_stats() + st.print_callees() + st.print_callers() + +def timer(): + return ticks + +def testfunc(): + # 1 call + # 1000 ticks total: 270 ticks local, 730 ticks in subfunctions + global ticks + ticks += 99 + helper() # 300 + helper() # 300 + ticks += 171 + factorial(14) # 130 + +def factorial(n): + # 23 calls total + # 170 ticks total, 150 ticks local + # 3 primitive calls, 130, 20 and 20 ticks total + # including 116, 17, 17 ticks local + global ticks + if n > 0: + ticks += n + return mul(n, factorial(n-1)) + else: + ticks += 11 + return 1 + +def mul(a, b): + # 20 calls + # 1 tick, local + global ticks + ticks += 1 + return a * b + +def helper(): + # 2 calls + # 300 ticks total: 20 ticks local, 260 ticks in subfunctions + global ticks + ticks += 1 + helper1() # 30 + ticks += 2 + helper1() # 30 + ticks += 6 + helper2() # 50 + ticks += 3 + helper2() # 50 + ticks += 2 + helper2() # 50 + ticks += 5 + helper2_indirect() # 70 + ticks += 1 + +def helper1(): + # 4 calls + # 30 ticks total: 29 ticks local, 1 tick in subfunctions + global ticks + ticks += 10 + hasattr(C(), "foo") # 1 + ticks += 19 + lst = [] + lst.append(42) # 0 + sys.exc_info() # 0 + +def helper2_indirect(): + helper2() # 50 + factorial(3) # 20 + +def helper2(): + # 8 calls + # 50 ticks local: 39 ticks local, 11 ticks in subfunctions + global ticks + ticks += 11 + hasattr(C(), "bar") # 1 + ticks += 13 + subhelper() # 10 + ticks += 15 + +def subhelper(): + # 8 calls + # 10 ticks total: 8 ticks local, 2 ticks in subfunctions + global ticks + ticks += 2 + for i in range(2): # 0 + try: + C().foo # 1 x 2 + except AttributeError: + ticks += 3 # 3 x 2 + +class C: + def __getattr__(self, name): + # 28 calls + # 1 tick, local + global ticks + ticks += 1 + raise AttributeError + +if __name__ == "__main__": + test_main() diff --git a/Lib/test/test_profile.py b/Lib/test/test_profile.py index aa0f26c..95ad8d2 100644 --- a/Lib/test/test_profile.py +++ b/Lib/test/test_profile.py @@ -1,8 +1,6 @@ """Test suite for the profile module.""" -import profile -import os -from test.test_support import TESTFN, vereq +import profile, pstats, sys # In order to have reproducible time, we simulate a timer in the global # variable 'ticks', which represents simulated time in milliseconds. @@ -10,50 +8,87 @@ from test.test_support import TESTFN, vereq # included in the profile and would appear to consume all the time.) ticks = 0 -def test_1(): +# IMPORTANT: this is an output test. *ALL* NUMBERS in the expected +# output are relevant. If you change the formatting of pstats, +# please don't just regenerate output/test_profile without checking +# very carefully that not a single number has changed. + +def test_main(): global ticks - ticks = 0 + ticks = 42000 prof = profile.Profile(timer) - prof.runctx("testfunc()", globals(), globals()) - prof.print_stats() + prof.runctx("testfunc()", globals(), locals()) + assert ticks == 43000, ticks + st = pstats.Stats(prof) + st.strip_dirs().sort_stats('stdname').print_stats() + st.print_callees() + st.print_callers() def timer(): return ticks*0.001 def testfunc(): # 1 call - # 1000 ticks total: 400 ticks local, 600 ticks in subfunctions + # 1000 ticks total: 270 ticks local, 730 ticks in subfunctions global ticks - ticks += 199 + ticks += 99 helper() # 300 helper() # 300 - ticks += 201 + ticks += 171 + factorial(14) # 130 + +def factorial(n): + # 23 calls total + # 170 ticks total, 150 ticks local + # 3 primitive calls, 130, 20 and 20 ticks total + # including 116, 17, 17 ticks local + global ticks + if n > 0: + ticks += n + return mul(n, factorial(n-1)) + else: + ticks += 11 + return 1 + +def mul(a, b): + # 20 calls + # 1 tick, local + global ticks + ticks += 1 + return a * b def helper(): # 2 calls - # 300 ticks total: 40 ticks local, 260 ticks in subfunctions + # 300 ticks total: 20 ticks local, 260 ticks in subfunctions global ticks ticks += 1 helper1() # 30 - ticks += 3 + ticks += 2 helper1() # 30 ticks += 6 helper2() # 50 - ticks += 5 - helper2() # 50 - ticks += 4 + ticks += 3 helper2() # 50 - ticks += 7 + ticks += 2 helper2() # 50 - ticks += 14 + ticks += 5 + helper2_indirect() # 70 + ticks += 1 def helper1(): # 4 calls # 30 ticks total: 29 ticks local, 1 tick in subfunctions global ticks ticks += 10 - hasattr(C(), "foo") + hasattr(C(), "foo") # 1 ticks += 19 + lst = [] + lst.append(42) # 0 + sys.exc_info() # 0 + +def helper2_indirect(): + helper2() # 50 + factorial(3) # 20 def helper2(): # 8 calls @@ -70,7 +105,7 @@ def subhelper(): # 10 ticks total: 8 ticks local, 2 ticks in subfunctions global ticks ticks += 2 - for i in range(2): + for i in range(2): # 0 try: C().foo # 1 x 2 except AttributeError: @@ -84,36 +119,5 @@ class C: ticks += 1 raise AttributeError - -def test_2(): - d = globals().copy() - def testfunc(): - global x - x = 1 - d['testfunc'] = testfunc - profile.runctx("testfunc()", d, d, TESTFN) - vereq (x, 1) - os.unlink (TESTFN) - -def test_3(): - result = [] - def testfunc1(): - try: len(None) - except: pass - try: len(None) - except: pass - result.append(True) - def testfunc2(): - testfunc1() - testfunc1() - profile.runctx("testfunc2()", locals(), locals(), TESTFN) - vereq(result, [True, True]) - os.unlink(TESTFN) - -def test_main(): - test_1() - test_2() - test_3() - if __name__ == "__main__": test_main() diff --git a/Misc/NEWS b/Misc/NEWS index 1e4ad2a..8b38832 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -2019,6 +2019,11 @@ Extension modules Library ------- +- Added a new module: cProfile, a C profiler with the same interface as the + profile module. cProfile avoids some of the drawbacks of the hotshot + profiler and provides a bit more information than the other two profilers. + Based on "lsprof" (patch #1212837). + - Bug #1266283: The new function "lexists" is now in os.path.__all__. - Bug #981530: Fix UnboundLocalError in shutil.rmtree(). This affects diff --git a/Modules/_lsprof.c b/Modules/_lsprof.c new file mode 100644 index 0000000..81d88ea --- /dev/null +++ b/Modules/_lsprof.c @@ -0,0 +1,867 @@ +#include "Python.h" +#include "compile.h" +#include "frameobject.h" +#include "structseq.h" +#include "rotatingtree.h" + +#if !defined(HAVE_LONG_LONG) +#error "This module requires long longs!" +#endif + +/*** Selection of a high-precision timer ***/ + +#ifdef MS_WINDOWS + +#include + +static PY_LONG_LONG +hpTimer(void) +{ + LARGE_INTEGER li; + QueryPerformanceCounter(&li); + return li.QuadPart; +} + +static double +hpTimerUnit(void) +{ + LARGE_INTEGER li; + if (QueryPerformanceFrequency(&li)) + return 1000.0 / li.QuadPart; + else + return 0.001; /* unlikely */ +} + +#else /* !MS_WINDOWS */ + +#ifndef HAVE_GETTIMEOFDAY +#error "This module requires gettimeofday() on non-Windows platforms!" +#endif + +#if (defined(PYOS_OS2) && defined(PYCC_GCC)) +#include +#else +#include +#include +#endif + +static PY_LONG_LONG +hpTimer(void) +{ + struct timeval tv; + PY_LONG_LONG ret; +#ifdef GETTIMEOFDAY_NO_TZ + gettimeofday(&tv); +#else + gettimeofday(&tv, (struct timezone *)NULL); +#endif + ret = tv.tv_sec; + ret = ret * 1000000 + tv.tv_usec; + return ret; +} + +static double +hpTimerUnit(void) +{ + return 0.001; +} + +#endif /* MS_WINDOWS */ + +/************************************************************/ +/* Written by Brett Rosen and Ted Czotter */ + +struct _ProfilerEntry; + +/* represents a function called from another function */ +typedef struct _ProfilerSubEntry { + rotating_node_t header; + PY_LONG_LONG tt; + PY_LONG_LONG it; + long callcount; + long recursivecallcount; + long recursionLevel; +} ProfilerSubEntry; + +/* represents a function or user defined block */ +typedef struct _ProfilerEntry { + rotating_node_t header; + PyObject *userObj; /* PyCodeObject, or a descriptive str for builtins */ + PY_LONG_LONG tt; /* total time in this entry */ + PY_LONG_LONG it; /* inline time in this entry (not in subcalls) */ + long callcount; /* how many times this was called */ + long recursivecallcount; /* how many times called recursively */ + long recursionLevel; + rotating_node_t *calls; +} ProfilerEntry; + +typedef struct _ProfilerContext { + PY_LONG_LONG t0; + PY_LONG_LONG subt; + struct _ProfilerContext *previous; + ProfilerEntry *ctxEntry; +} ProfilerContext; + +typedef struct { + PyObject_HEAD + rotating_node_t *profilerEntries; + ProfilerContext *currentProfilerContext; + ProfilerContext *freelistProfilerContext; + int flags; + PyObject *externalTimer; + double externalTimerUnit; +} ProfilerObject; + +#define POF_ENABLED 0x001 +#define POF_SUBCALLS 0x002 +#define POF_BUILTINS 0x004 +#define POF_NOMEMORY 0x100 + +staticforward PyTypeObject PyProfiler_Type; + +#define PyProfiler_Check(op) PyObject_TypeCheck(op, &PyProfiler_Type) +#define PyProfiler_CheckExact(op) ((op)->ob_type == &PyProfiler_Type) + +/*** External Timers ***/ + +#define DOUBLE_TIMER_PRECISION 4294967296.0 +static PyObject *empty_tuple; + +static PY_LONG_LONG CallExternalTimer(ProfilerObject *pObj) +{ + PY_LONG_LONG result; + PyObject *o = PyObject_Call(pObj->externalTimer, empty_tuple, NULL); + if (o == NULL) { + PyErr_WriteUnraisable(pObj->externalTimer); + return 0; + } + if (pObj->externalTimerUnit > 0.0) { + /* interpret the result as an integer that will be scaled + in profiler_getstats() */ + result = PyLong_AsLongLong(o); + } + else { + /* interpret the result as a double measured in seconds. + As the profiler works with PY_LONG_LONG internally + we convert it to a large integer */ + double val = PyFloat_AsDouble(o); + /* error handling delayed to the code below */ + result = (PY_LONG_LONG) (val * DOUBLE_TIMER_PRECISION); + } + Py_DECREF(o); + if (PyErr_Occurred()) { + PyErr_WriteUnraisable((PyObject *) pObj); + return 0; + } + return result; +} + +#define CALL_TIMER(pObj) ((pObj)->externalTimer ? \ + CallExternalTimer(pObj) : \ + hpTimer()) + +/*** ProfilerObject ***/ + +static PyObject * +normalizeUserObj(PyObject *obj) +{ + PyCFunctionObject *fn; + if (!PyCFunction_Check(obj)) { + Py_INCREF(obj); + return obj; + } + /* Replace built-in function objects with a descriptive string + because of built-in methods -- keeping a reference to + __self__ is probably not a good idea. */ + fn = (PyCFunctionObject *)obj; + + if (fn->m_self == NULL) { + /* built-in function: look up the module name */ + PyObject *mod = fn->m_module; + char *modname; + if (mod && PyString_Check(mod)) { + modname = PyString_AS_STRING(mod); + } + else if (mod && PyModule_Check(mod)) { + modname = PyModule_GetName(mod); + if (modname == NULL) { + PyErr_Clear(); + modname = "__builtin__"; + } + } + else { + modname = "__builtin__"; + } + if (strcmp(modname, "__builtin__") != 0) + return PyString_FromFormat("<%s.%s>", + modname, + fn->m_ml->ml_name); + else + return PyString_FromFormat("<%s>", + fn->m_ml->ml_name); + } + else { + /* built-in method: try to return + repr(getattr(type(__self__), __name__)) + */ + PyObject *self = fn->m_self; + PyObject *name = PyString_FromString(fn->m_ml->ml_name); + if (name != NULL) { + PyObject *mo = _PyType_Lookup(self->ob_type, name); + Py_XINCREF(mo); + Py_DECREF(name); + if (mo != NULL) { + PyObject *res = PyObject_Repr(mo); + Py_DECREF(mo); + if (res != NULL) + return res; + } + } + PyErr_Clear(); + return PyString_FromFormat("", + fn->m_ml->ml_name); + } +} + +static ProfilerEntry* +newProfilerEntry(ProfilerObject *pObj, void *key, PyObject *userObj) +{ + ProfilerEntry *self; + self = (ProfilerEntry*) malloc(sizeof(ProfilerEntry)); + if (self == NULL) { + pObj->flags |= POF_NOMEMORY; + return NULL; + } + userObj = normalizeUserObj(userObj); + if (userObj == NULL) { + PyErr_Clear(); + free(self); + pObj->flags |= POF_NOMEMORY; + return NULL; + } + self->header.key = key; + self->userObj = userObj; + self->tt = 0; + self->it = 0; + self->callcount = 0; + self->recursivecallcount = 0; + self->recursionLevel = 0; + self->calls = EMPTY_ROTATING_TREE; + RotatingTree_Add(&pObj->profilerEntries, &self->header); + return self; +} + +static ProfilerEntry* +getEntry(ProfilerObject *pObj, void *key) +{ + return (ProfilerEntry*) RotatingTree_Get(&pObj->profilerEntries, key); +} + +static ProfilerSubEntry * +getSubEntry(ProfilerObject *pObj, ProfilerEntry *caller, ProfilerEntry* entry) +{ + return (ProfilerSubEntry*) RotatingTree_Get(&caller->calls, + (void *)entry); +} + +static ProfilerSubEntry * +newSubEntry(ProfilerObject *pObj, ProfilerEntry *caller, ProfilerEntry* entry) +{ + ProfilerSubEntry *self; + self = (ProfilerSubEntry*) malloc(sizeof(ProfilerSubEntry)); + if (self == NULL) { + pObj->flags |= POF_NOMEMORY; + return NULL; + } + self->header.key = (void *)entry; + self->tt = 0; + self->it = 0; + self->callcount = 0; + self->recursivecallcount = 0; + self->recursionLevel = 0; + RotatingTree_Add(&caller->calls, &self->header); + return self; +} + +static int freeSubEntry(rotating_node_t *header, void *arg) +{ + ProfilerSubEntry *subentry = (ProfilerSubEntry*) header; + free(subentry); + return 0; +} + +static int freeEntry(rotating_node_t *header, void *arg) +{ + ProfilerEntry *entry = (ProfilerEntry*) header; + RotatingTree_Enum(entry->calls, freeSubEntry, NULL); + Py_DECREF(entry->userObj); + free(entry); + return 0; +} + +static void clearEntries(ProfilerObject *pObj) +{ + RotatingTree_Enum(pObj->profilerEntries, freeEntry, NULL); + pObj->profilerEntries = EMPTY_ROTATING_TREE; + /* release the memory hold by the free list of ProfilerContexts */ + while (pObj->freelistProfilerContext) { + ProfilerContext *c = pObj->freelistProfilerContext; + pObj->freelistProfilerContext = c->previous; + free(c); + } +} + +static void +initContext(ProfilerObject *pObj, ProfilerContext *self, ProfilerEntry *entry) +{ + self->ctxEntry = entry; + self->subt = 0; + self->previous = pObj->currentProfilerContext; + pObj->currentProfilerContext = self; + ++entry->recursionLevel; + if ((pObj->flags & POF_SUBCALLS) && self->previous) { + /* find or create an entry for me in my caller's entry */ + ProfilerEntry *caller = self->previous->ctxEntry; + ProfilerSubEntry *subentry = getSubEntry(pObj, caller, entry); + if (subentry == NULL) + subentry = newSubEntry(pObj, caller, entry); + if (subentry) + ++subentry->recursionLevel; + } + self->t0 = CALL_TIMER(pObj); +} + +static void +Stop(ProfilerObject *pObj, ProfilerContext *self, ProfilerEntry *entry) +{ + PY_LONG_LONG tt = CALL_TIMER(pObj) - self->t0; + PY_LONG_LONG it = tt - self->subt; + if (self->previous) + self->previous->subt += tt; + pObj->currentProfilerContext = self->previous; + if (--entry->recursionLevel == 0) + entry->tt += tt; + else + ++entry->recursivecallcount; + entry->it += it; + entry->callcount++; + if ((pObj->flags & POF_SUBCALLS) && self->previous) { + /* find or create an entry for me in my caller's entry */ + ProfilerEntry *caller = self->previous->ctxEntry; + ProfilerSubEntry *subentry = getSubEntry(pObj, caller, entry); + if (subentry) { + if (--subentry->recursionLevel == 0) + subentry->tt += tt; + else + ++subentry->recursivecallcount; + subentry->it += it; + ++subentry->callcount; + } + } +} + +static void +ptrace_enter_call(PyObject *self, void *key, PyObject *userObj) +{ + /* entering a call to the function identified by 'key' + (which can be a PyCodeObject or a PyMethodDef pointer) */ + ProfilerObject *pObj = (ProfilerObject*)self; + ProfilerEntry *profEntry; + ProfilerContext *pContext; + + profEntry = getEntry(pObj, key); + if (profEntry == NULL) { + profEntry = newProfilerEntry(pObj, key, userObj); + if (profEntry == NULL) + return; + } + /* grab a ProfilerContext out of the free list */ + pContext = pObj->freelistProfilerContext; + if (pContext) { + pObj->freelistProfilerContext = pContext->previous; + } + else { + /* free list exhausted, allocate a new one */ + pContext = (ProfilerContext*) + malloc(sizeof(ProfilerContext)); + if (pContext == NULL) { + pObj->flags |= POF_NOMEMORY; + return; + } + } + initContext(pObj, pContext, profEntry); +} + +static void +ptrace_leave_call(PyObject *self, void *key) +{ + /* leaving a call to the function identified by 'key' */ + ProfilerObject *pObj = (ProfilerObject*)self; + ProfilerEntry *profEntry; + ProfilerContext *pContext; + + pContext = pObj->currentProfilerContext; + if (pContext == NULL) + return; + profEntry = getEntry(pObj, key); + if (profEntry) { + Stop(pObj, pContext, profEntry); + } + else { + pObj->currentProfilerContext = pContext->previous; + } + /* put pContext into the free list */ + pContext->previous = pObj->freelistProfilerContext; + pObj->freelistProfilerContext = pContext; +} + +static int +profiler_callback(PyObject *self, PyFrameObject *frame, int what, + PyObject *arg) +{ + switch (what) { + + /* the 'frame' of a called function is about to start its execution */ + case PyTrace_CALL: + ptrace_enter_call(self, (void *)frame->f_code, + (PyObject *)frame->f_code); + break; + + /* the 'frame' of a called function is about to finish + (either normally or with an exception) */ + case PyTrace_RETURN: + ptrace_leave_call(self, (void *)frame->f_code); + break; + + /* case PyTrace_EXCEPTION: + If the exception results in the function exiting, a + PyTrace_RETURN event will be generated, so we don't need to + handle it. */ + +#ifdef PyTrace_C_CALL /* not defined in Python <= 2.3 */ + /* the Python function 'frame' is issuing a call to the built-in + function 'arg' */ + case PyTrace_C_CALL: + if ((((ProfilerObject *)self)->flags & POF_BUILTINS) + && PyCFunction_Check(arg)) { + ptrace_enter_call(self, + ((PyCFunctionObject *)arg)->m_ml, + arg); + } + break; + + /* the call to the built-in function 'arg' is returning into its + caller 'frame' */ + case PyTrace_C_RETURN: /* ...normally */ + case PyTrace_C_EXCEPTION: /* ...with an exception set */ + if ((((ProfilerObject *)self)->flags & POF_BUILTINS) + && PyCFunction_Check(arg)) { + ptrace_leave_call(self, + ((PyCFunctionObject *)arg)->m_ml); + } + break; +#endif + + default: + break; + } + return 0; +} + +static int +pending_exception(ProfilerObject *pObj) +{ + if (pObj->flags & POF_NOMEMORY) { + pObj->flags -= POF_NOMEMORY; + PyErr_SetString(PyExc_MemoryError, + "memory was exhausted while profiling"); + return -1; + } + return 0; +} + +/************************************************************/ + +static PyStructSequence_Field profiler_entry_fields[] = { + {"code", "code object or built-in function name"}, + {"callcount", "how many times this was called"}, + {"reccallcount", "how many times called recursively"}, + {"totaltime", "total time in this entry"}, + {"inlinetime", "inline time in this entry (not in subcalls)"}, + {"calls", "details of the calls"}, + {0} +}; + +static PyStructSequence_Field profiler_subentry_fields[] = { + {"code", "called code object or built-in function name"}, + {"callcount", "how many times this is called"}, + {"reccallcount", "how many times this is called recursively"}, + {"totaltime", "total time spent in this call"}, + {"inlinetime", "inline time (not in further subcalls)"}, + {0} +}; + +static PyStructSequence_Desc profiler_entry_desc = { + "_lsprof.profiler_entry", /* name */ + NULL, /* doc */ + profiler_entry_fields, + 6 +}; + +static PyStructSequence_Desc profiler_subentry_desc = { + "_lsprof.profiler_subentry", /* name */ + NULL, /* doc */ + profiler_subentry_fields, + 5 +}; + +static PyTypeObject StatsEntryType; +static PyTypeObject StatsSubEntryType; + + +typedef struct { + PyObject *list; + PyObject *sublist; + double factor; +} statscollector_t; + +static int statsForSubEntry(rotating_node_t *node, void *arg) +{ + ProfilerSubEntry *sentry = (ProfilerSubEntry*) node; + statscollector_t *collect = (statscollector_t*) arg; + ProfilerEntry *entry = (ProfilerEntry*) sentry->header.key; + int err; + PyObject *sinfo; + sinfo = PyObject_CallFunction((PyObject*) &StatsSubEntryType, + "((Olldd))", + entry->userObj, + sentry->callcount, + sentry->recursivecallcount, + collect->factor * sentry->tt, + collect->factor * sentry->it); + if (sinfo == NULL) + return -1; + err = PyList_Append(collect->sublist, sinfo); + Py_DECREF(sinfo); + return err; +} + +static int statsForEntry(rotating_node_t *node, void *arg) +{ + ProfilerEntry *entry = (ProfilerEntry*) node; + statscollector_t *collect = (statscollector_t*) arg; + PyObject *info; + int err; + if (entry->callcount == 0) + return 0; /* skip */ + + if (entry->calls != EMPTY_ROTATING_TREE) { + collect->sublist = PyList_New(0); + if (collect->sublist == NULL) + return -1; + if (RotatingTree_Enum(entry->calls, + statsForSubEntry, collect) != 0) { + Py_DECREF(collect->sublist); + return -1; + } + } + else { + Py_INCREF(Py_None); + collect->sublist = Py_None; + } + + info = PyObject_CallFunction((PyObject*) &StatsEntryType, + "((OllddO))", + entry->userObj, + entry->callcount, + entry->recursivecallcount, + collect->factor * entry->tt, + collect->factor * entry->it, + collect->sublist); + Py_DECREF(collect->sublist); + if (info == NULL) + return -1; + err = PyList_Append(collect->list, info); + Py_DECREF(info); + return err; +} + +PyDoc_STRVAR(getstats_doc, "\ +getstats() -> list of profiler_entry objects\n\ +\n\ +Return all information collected by the profiler.\n\ +Each profiler_entry is a tuple-like object with the\n\ +following attributes:\n\ +\n\ + code code object\n\ + callcount how many times this was called\n\ + reccallcount how many times called recursively\n\ + totaltime total time in this entry\n\ + inlinetime inline time in this entry (not in subcalls)\n\ + calls details of the calls\n\ +\n\ +The calls attribute is either None or a list of\n\ +profiler_subentry objects:\n\ +\n\ + code called code object\n\ + callcount how many times this is called\n\ + reccallcount how many times this is called recursively\n\ + totaltime total time spent in this call\n\ + inlinetime inline time (not in further subcalls)\n\ +"); + +static PyObject* +profiler_getstats(ProfilerObject *pObj, PyObject* noarg) +{ + statscollector_t collect; + if (pending_exception(pObj)) + return NULL; + if (!pObj->externalTimer) + collect.factor = hpTimerUnit(); + else if (pObj->externalTimerUnit > 0.0) + collect.factor = pObj->externalTimerUnit; + else + collect.factor = 1.0 / DOUBLE_TIMER_PRECISION; + collect.list = PyList_New(0); + if (collect.list == NULL) + return NULL; + if (RotatingTree_Enum(pObj->profilerEntries, statsForEntry, &collect) + != 0) { + Py_DECREF(collect.list); + return NULL; + } + return collect.list; +} + +static int +setSubcalls(ProfilerObject *pObj, int nvalue) +{ + if (nvalue == 0) + pObj->flags &= ~POF_SUBCALLS; + else if (nvalue > 0) + pObj->flags |= POF_SUBCALLS; + return 0; +} + +static int +setBuiltins(ProfilerObject *pObj, int nvalue) +{ + if (nvalue == 0) + pObj->flags &= ~POF_BUILTINS; + else if (nvalue > 0) { +#ifndef PyTrace_C_CALL + PyErr_SetString(PyExc_ValueError, + "builtins=True requires Python >= 2.4"); + return -1; +#else + pObj->flags |= POF_BUILTINS; +#endif + } + return 0; +} + +PyDoc_STRVAR(enable_doc, "\ +enable(subcalls=True, builtins=True)\n\ +\n\ +Start collecting profiling information.\n\ +If 'subcalls' is True, also records for each function\n\ +statistics separated according to its current caller.\n\ +If 'builtins' is True, records the time spent in\n\ +built-in functions separately from their caller.\n\ +"); + +static PyObject* +profiler_enable(ProfilerObject *self, PyObject *args, PyObject *kwds) +{ + int subcalls = -1; + int builtins = -1; + static const char *kwlist[] = {"subcalls", "builtins", 0}; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|ii:enable", + kwlist, &subcalls, &builtins)) + return NULL; + if (setSubcalls(self, subcalls) < 0 || setBuiltins(self, builtins) < 0) + return NULL; + PyEval_SetProfile(profiler_callback, (PyObject*)self); + self->flags |= POF_ENABLED; + Py_INCREF(Py_None); + return Py_None; +} + +static void +flush_unmatched(ProfilerObject *pObj) +{ + while (pObj->currentProfilerContext) { + ProfilerContext *pContext = pObj->currentProfilerContext; + ProfilerEntry *profEntry= pContext->ctxEntry; + if (profEntry) + Stop(pObj, pContext, profEntry); + else + pObj->currentProfilerContext = pContext->previous; + if (pContext) + free(pContext); + } + +} + +PyDoc_STRVAR(disable_doc, "\ +disable()\n\ +\n\ +Stop collecting profiling information.\n\ +"); + +static PyObject* +profiler_disable(ProfilerObject *self, PyObject* noarg) +{ + self->flags &= ~POF_ENABLED; + PyEval_SetProfile(NULL, NULL); + flush_unmatched(self); + if (pending_exception(self)) + return NULL; + Py_INCREF(Py_None); + return Py_None; +} + +PyDoc_STRVAR(clear_doc, "\ +clear()\n\ +\n\ +Clear all profiling information collected so far.\n\ +"); + +static PyObject* +profiler_clear(ProfilerObject *pObj, PyObject* noarg) +{ + clearEntries(pObj); + Py_INCREF(Py_None); + return Py_None; +} + +static void +profiler_dealloc(ProfilerObject *op) +{ + if (op->flags & POF_ENABLED) + PyEval_SetProfile(NULL, NULL); + flush_unmatched(op); + clearEntries(op); + Py_XDECREF(op->externalTimer); + op->ob_type->tp_free(op); +} + +static int +profiler_init(ProfilerObject *pObj, PyObject *args, PyObject *kw) +{ + PyObject *o; + PyObject *timer = NULL; + double timeunit = 0.0; + int subcalls = 1; +#ifdef PyTrace_C_CALL + int builtins = 1; +#else + int builtins = 0; +#endif + static const char *kwlist[] = {"timer", "timeunit", + "subcalls", "builtins", 0}; + + if (!PyArg_ParseTupleAndKeywords(args, kw, "|Odii:Profiler", kwlist, + &timer, &timeunit, + &subcalls, &builtins)) + return -1; + + if (setSubcalls(pObj, subcalls) < 0 || setBuiltins(pObj, builtins) < 0) + return -1; + o = pObj->externalTimer; + pObj->externalTimer = timer; + Py_XINCREF(timer); + Py_XDECREF(o); + pObj->externalTimerUnit = timeunit; + return 0; +} + +static PyMethodDef profiler_methods[] = { + {"getstats", (PyCFunction)profiler_getstats, + METH_NOARGS, getstats_doc}, + {"enable", (PyCFunction)profiler_enable, + METH_VARARGS | METH_KEYWORDS, enable_doc}, + {"disable", (PyCFunction)profiler_disable, + METH_NOARGS, disable_doc}, + {"clear", (PyCFunction)profiler_clear, + METH_NOARGS, clear_doc}, + {NULL, NULL} +}; + +PyDoc_STRVAR(profiler_doc, "\ +Profiler(custom_timer=None, time_unit=None, subcalls=True, builtins=True)\n\ +\n\ + Builds a profiler object using the specified timer function.\n\ + The default timer is a fast built-in one based on real time.\n\ + For custom timer functions returning integers, time_unit can\n\ + be a float specifying a scale (i.e. how long each integer unit\n\ + is, in seconds).\n\ +"); + +statichere PyTypeObject PyProfiler_Type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "_lsprof.Profiler", /* tp_name */ + sizeof(ProfilerObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)profiler_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ + profiler_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + profiler_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)profiler_init, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ + PyObject_Del, /* tp_free */ +}; + +static PyMethodDef moduleMethods[] = { + {NULL, NULL} +}; + +PyMODINIT_FUNC +init_lsprof(void) +{ + PyObject *module, *d; + module = Py_InitModule3("_lsprof", moduleMethods, "Fast profiler"); + d = PyModule_GetDict(module); + if (PyType_Ready(&PyProfiler_Type) < 0) + return; + PyDict_SetItemString(d, "Profiler", (PyObject *)&PyProfiler_Type); + + PyStructSequence_InitType(&StatsEntryType, &profiler_entry_desc); + PyStructSequence_InitType(&StatsSubEntryType, &profiler_subentry_desc); + Py_INCREF((PyObject*) &StatsEntryType); + Py_INCREF((PyObject*) &StatsSubEntryType); + PyModule_AddObject(module, "profiler_entry", + (PyObject*) &StatsEntryType); + PyModule_AddObject(module, "profiler_subentry", + (PyObject*) &StatsSubEntryType); + empty_tuple = PyTuple_New(0); +} diff --git a/Modules/rotatingtree.c b/Modules/rotatingtree.c new file mode 100644 index 0000000..952725b --- /dev/null +++ b/Modules/rotatingtree.c @@ -0,0 +1,121 @@ +#include "rotatingtree.h" + +#define KEY_LOWER_THAN(key1, key2) ((char*)(key1) < (char*)(key2)) + +/* The randombits() function below is a fast-and-dirty generator that + * is probably irregular enough for our purposes. Note that it's biased: + * I think that ones are slightly more probable than zeroes. It's not + * important here, though. + */ + +static unsigned int random_value = 1; +static unsigned int random_stream = 0; + +static int +randombits(int bits) +{ + int result; + if (random_stream < (1<>= bits; + return result; +} + + +/* Insert a new node into the tree. + (*root) is modified to point to the new root. */ +void +RotatingTree_Add(rotating_node_t **root, rotating_node_t *node) +{ + while (*root != NULL) { + if (KEY_LOWER_THAN(node->key, (*root)->key)) + root = &((*root)->left); + else + root = &((*root)->right); + } + node->left = NULL; + node->right = NULL; + *root = node; +} + +/* Locate the node with the given key. This is the most complicated + function because it occasionally rebalances the tree to move the + resulting node closer to the root. */ +rotating_node_t * +RotatingTree_Get(rotating_node_t **root, void *key) +{ + if (randombits(3) != 4) { + /* Fast path, no rebalancing */ + rotating_node_t *node = *root; + while (node != NULL) { + if (node->key == key) + return node; + if (KEY_LOWER_THAN(key, node->key)) + node = node->left; + else + node = node->right; + } + return NULL; + } + else { + rotating_node_t **pnode = root; + rotating_node_t *node = *pnode; + rotating_node_t *next; + int rotate; + if (node == NULL) + return NULL; + while (1) { + if (node->key == key) + return node; + rotate = !randombits(1); + if (KEY_LOWER_THAN(key, node->key)) { + next = node->left; + if (next == NULL) + return NULL; + if (rotate) { + node->left = next->right; + next->right = node; + *pnode = next; + } + else + pnode = &(node->left); + } + else { + next = node->right; + if (next == NULL) + return NULL; + if (rotate) { + node->right = next->left; + next->left = node; + *pnode = next; + } + else + pnode = &(node->right); + } + node = next; + } + } +} + +/* Enumerate all nodes in the tree. The callback enumfn() should return + zero to continue the enumeration, or non-zero to interrupt it. + A non-zero value is directly returned by RotatingTree_Enum(). */ +int +RotatingTree_Enum(rotating_node_t *root, rotating_tree_enum_fn enumfn, + void *arg) +{ + int result; + rotating_node_t *node; + while (root != NULL) { + result = RotatingTree_Enum(root->left, enumfn, arg); + if (result != 0) return result; + node = root->right; + result = enumfn(root, arg); + if (result != 0) return result; + root = node; + } + return 0; +} diff --git a/Modules/rotatingtree.h b/Modules/rotatingtree.h new file mode 100644 index 0000000..97cc8e8 --- /dev/null +++ b/Modules/rotatingtree.h @@ -0,0 +1,27 @@ +/* "Rotating trees" (Armin Rigo) + * + * Google "splay trees" for the general idea. + * + * It's a dict-like data structure that works best when accesses are not + * random, but follow a strong pattern. The one implemented here is for + * accesses patterns where the same small set of keys is looked up over + * and over again, and this set of keys evolves slowly over time. + */ + +#include + +#define EMPTY_ROTATING_TREE ((rotating_node_t *)NULL) + +typedef struct rotating_node_s rotating_node_t; +typedef int (*rotating_tree_enum_fn) (rotating_node_t *node, void *arg); + +struct rotating_node_s { + void *key; + rotating_node_t *left; + rotating_node_t *right; +}; + +void RotatingTree_Add(rotating_node_t **root, rotating_node_t *node); +rotating_node_t* RotatingTree_Get(rotating_node_t **root, void *key); +int RotatingTree_Enum(rotating_node_t *root, rotating_tree_enum_fn enumfn, + void *arg); diff --git a/setup.py b/setup.py index 7481dd7..9762b9a 100644 --- a/setup.py +++ b/setup.py @@ -328,7 +328,6 @@ class PyBuildExt(build_ext): # Some modules that are normally always on: exts.append( Extension('regex', ['regexmodule.c', 'regexpr.c']) ) - exts.append( Extension('_hotshot', ['_hotshot.c']) ) exts.append( Extension('_weakref', ['_weakref.c']) ) # array objects @@ -363,6 +362,9 @@ class PyBuildExt(build_ext): exts.append( Extension("functional", ["functionalmodule.c"]) ) # Python C API test module exts.append( Extension('_testcapi', ['_testcapimodule.c']) ) + # profilers (_lsprof is for cProfile.py) + exts.append( Extension('_hotshot', ['_hotshot.c']) ) + exts.append( Extension('_lsprof', ['_lsprof.c', 'rotatingtree.c']) ) # static Unicode character database if have_unicode: exts.append( Extension('unicodedata', ['unicodedata.c']) ) -- cgit v0.12