1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
|
#
# Class for profiling python code. rev 1.0 6/2/94
#
# Based on prior profile module by Sjoerd Mullender...
# which was hacked somewhat by: Guido van Rossum
#
# See profile.doc for more information
# Copyright 1994, by InfoSeek Corporation, all rights reserved.
# Written by James Roskind
#
# Permission to use, copy, modify, and distribute this Python software
# and its associated documentation for any purpose (subject to the
# restriction in the following sentence) without fee is hereby granted,
# provided that the above copyright notice appears in all copies, and
# that both that copyright notice and this permission notice appear in
# supporting documentation, and that the name of InfoSeek not be used in
# advertising or publicity pertaining to distribution of the software
# without specific, written prior permission. This permission is
# explicitly restricted to the copying and modification of the software
# to remain in Python, compiled Python, or other languages (such as C)
# wherein the modified or derived code is exclusively imported into a
# Python module.
#
# INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
# FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
# SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
import sys
import os
import time
import string
import marshal
# Global variables
func_norm_dict = {}
func_norm_counter = 0
if hasattr(os, 'getpid'):
pid_string = `os.getpid()`
else:
pid_string = ''
# Sample timer for use with
#i_count = 0
#def integer_timer():
# global i_count
# i_count = i_count + 1
# return i_count
#itimes = integer_timer # replace with C coded timer returning integers
#**************************************************************************
# The following are the static member functions for the profiler class
# Note that an instance of Profile() is *not* needed to call them.
#**************************************************************************
# simplified user interface
def run(statement, *args):
prof = Profile()
try:
prof = prof.run(statement)
except SystemExit:
pass
if args:
prof.dump_stats(args[0])
else:
return prof.print_stats()
# print help
def help():
for dirname in sys.path:
fullname = os.path.join(dirname, 'profile.doc')
if os.path.exists(fullname):
sts = os.system('${PAGER-more} '+fullname)
if sts: print '*** Pager exit status:', sts
break
else:
print 'Sorry, can\'t find the help file "profile.doc"',
print 'along the Python search path'
#**************************************************************************
# class Profile documentation:
#**************************************************************************
# self.cur is always a tuple. Each such tuple corresponds to a stack
# frame that is currently active (self.cur[-2]). The following are the
# definitions of its members. We use this external "parallel stack" to
# avoid contaminating the program that we are profiling. (old profiler
# used to write into the frames local dictionary!!) Derived classes
# can change the definition of some entries, as long as they leave
# [-2:] intact.
#
# [ 0] = Time that needs to be charged to the parent frame's function. It is
# used so that a function call will not have to access the timing data
# for the parents frame.
# [ 1] = Total time spent in this frame's function, excluding time in
# subfunctions
# [ 2] = Cumulative time spent in this frame's function, including time in
# all subfunctions to this frame.
# [-3] = Name of the function that corresonds to this frame.
# [-2] = Actual frame that we correspond to (used to sync exception handling)
# [-1] = Our parent 6-tuple (corresonds to frame.f_back)
#**************************************************************************
# Timing data for each function is stored as a 5-tuple in the dictionary
# self.timings[]. The index is always the name stored in self.cur[4].
# The following are the definitions of the members:
#
# [0] = The number of times this function was called, not counting direct
# or indirect recursion,
# [1] = Number of times this function appears on the stack, minus one
# [2] = Total time spent internal to this function
# [3] = Cumulative time that this function was present on the stack. In
# non-recursive functions, this is the total execution time from start
# to finish of each invocation of a function, including time spent in
# all subfunctions.
# [5] = A dictionary indicating for each function name, the number of times
# it was called by us.
#**************************************************************************
# We produce function names via a repr() call on the f_code object during
# profiling. This save a *lot* of CPU time. This results in a string that
# always looks like:
# <code object main at 87090, file "/a/lib/python-local/myfib.py", line 76>
# After we "normalize it, it is a tuple of filename, line, function-name.
# We wait till we are done profiling to do the normalization.
# *IF* this repr format changes, then only the normalization routine should
# need to be fixed.
#**************************************************************************
class Profile:
def __init__(self, timer=None):
self.timings = {}
self.cur = None
self.cmd = ""
self.dispatch = { \
'call' : self.trace_dispatch_call, \
'return' : self.trace_dispatch_return, \
'exception': self.trace_dispatch_exception, \
}
if not timer:
if hasattr(os, 'times'):
self.timer = os.times
self.dispatcher = self.trace_dispatch
else:
self.timer = time.time
self.dispatcher = self.trace_dispatch_i
else:
self.timer = timer
t = self.timer() # test out timer function
try:
if len(t) == 2:
self.dispatcher = self.trace_dispatch
else:
self.dispatcher = self.trace_dispatch_l
except TypeError:
self.dispatcher = self.trace_dispatch_i
self.t = self.get_time()
self.simulate_call('profiler')
def get_time(self): # slow simulation of method to acquire time
t = self.timer()
if type(t) == type(()) or type(t) == type([]):
t = reduce(lambda x,y: x+y, t, 0)
return t
# Heavily optimized dispatch routine for os.times() timer
def trace_dispatch(self, frame, event, arg):
t = self.timer()
t = t[0] + t[1] - self.t # No Calibration constant
# t = t[0] + t[1] - self.t - .00053 # Calibration constant
if self.dispatch[event](frame,t):
t = self.timer()
self.t = t[0] + t[1]
else:
r = self.timer()
self.t = r[0] + r[1] - t # put back unrecorded delta
return
# Dispatch routine for best timer program (return = scalar integer)
def trace_dispatch_i(self, frame, event, arg):
t = self.timer() - self.t # - 1 # Integer calibration constant
if self.dispatch[event](frame,t):
self.t = self.timer()
else:
self.t = self.timer() - t # put back unrecorded delta
return
# SLOW generic dispatch rountine for timer returning lists of numbers
def trace_dispatch_l(self, frame, event, arg):
t = self.get_time() - self.t
if self.dispatch[event](frame,t):
self.t = self.get_time()
else:
self.t = self.get_time()-t # put back unrecorded delta
return
def trace_dispatch_exception(self, frame, t):
rt, rtt, rct, rfn, rframe, rcur = self.cur
if (not rframe is frame) and rcur:
return self.trace_dispatch_return(rframe, t)
return 0
def trace_dispatch_call(self, frame, t):
fn = `frame.f_code`
# The following should be about the best approach, but
# we would need a function that maps from id() back to
# the actual code object.
# fn = id(frame.f_code)
# Note we would really use our own function, which would
# return the code address, *and* bump the ref count. We
# would then fix up the normalize function to do the
# actualy repr(fn) call.
# The following is an interesting alternative
# It doesn't do as good a job, and it doesn't run as
# fast 'cause repr() is written in C, and this is Python.
#fcode = frame.f_code
#code = fcode.co_code
#if ord(code[0]) == 127: # == SET_LINENO
# # see "opcode.h" in the Python source
# fn = (fcode.co_filename, ord(code[1]) | \
# ord(code[2]) << 8, fcode.co_name)
#else:
# fn = (fcode.co_filename, 0, fcode.co_name)
self.cur = (t, 0, 0, fn, frame, self.cur)
if self.timings.has_key(fn):
cc, ns, tt, ct, callers = self.timings[fn]
self.timings[fn] = cc, ns + 1, tt, ct, callers
else:
self.timings[fn] = 0, 0, 0, 0, {}
return 1
def trace_dispatch_return(self, frame, t):
# if not frame is self.cur[-2]: raise "Bad return", self.cur[3]
# Prefix "r" means part of the Returning or exiting frame
# Prefix "p" means part of the Previous or older frame
rt, rtt, rct, rfn, frame, rcur = self.cur
rtt = rtt + t
sft = rtt + rct
pt, ptt, pct, pfn, pframe, pcur = rcur
self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
cc, ns, tt, ct, callers = self.timings[rfn]
if not ns:
ct = ct + sft
cc = cc + 1
if callers.has_key(pfn):
callers[pfn] = callers[pfn] + 1 # hack: gather more
# stats such as the amount of time added to ct courtesy
# of this specific call, and the contribution to cc
# courtesy of this call.
else:
callers[pfn] = 1
self.timings[rfn] = cc, ns - 1, tt+rtt, ct, callers
return 1
# The next few function play with self.cmd. By carefully preloading
# our paralell stack, we can force the profiled result to include
# an arbitrary string as the name of the calling function.
# We use self.cmd as that string, and the resulting stats look
# very nice :-).
def set_cmd(self, cmd):
if self.cur[-1]: return # already set
self.cmd = cmd
self.simulate_call(cmd)
class fake_code:
def __init__(self, filename, line, name):
self.co_filename = filename
self.co_line = line
self.co_name = name
self.co_code = '\0' # anything but 127
def __repr__(self):
return (self.co_filename, self.co_line, self.co_name)
class fake_frame:
def __init__(self, code, prior):
self.f_code = code
self.f_back = prior
def simulate_call(self, name):
code = self.fake_code('profile', 0, name)
if self.cur:
pframe = self.cur[-2]
else:
pframe = None
frame = self.fake_frame(code, pframe)
a = self.dispatch['call'](frame, 0)
return
# collect stats from pending stack, including getting final
# timings for self.cmd frame.
def simulate_cmd_complete(self):
t = self.get_time() - self.t
while self.cur[-1]:
# We *can* cause assertion errors here if
# dispatch_trace_return checks for a frame match!
a = self.dispatch['return'](self.cur[-2], t)
t = 0
self.t = self.get_time() - t
def print_stats(self):
import pstats
pstats.Stats(self).strip_dirs().sort_stats(-1). \
print_stats()
def dump_stats(self, file):
f = open(file, 'w')
self.create_stats()
marshal.dump(self.stats, f)
f.close()
def create_stats(self):
self.simulate_cmd_complete()
self.snapshot_stats()
def snapshot_stats(self):
self.stats = {}
for func in self.timings.keys():
cc, ns, tt, ct, callers = self.timings[func]
nor_func = self.func_normalize(func)
nor_callers = {}
nc = 0
for func_caller in callers.keys():
nor_callers[self.func_normalize(func_caller)]=\
callers[func_caller]
nc = nc + callers[func_caller]
self.stats[nor_func] = cc, nc, tt, ct, nor_callers
# Override the following function if you can figure out
# a better name for the binary f_code entries. I just normalize
# them sequentially in a dictionary. It would be nice if we could
# *really* see the name of the underlying C code :-). Sometimes
# you can figure out what-is-what by looking at caller and callee
# lists (and knowing what your python code does).
def func_normalize(self, func_name):
global func_norm_dict
global func_norm_counter
global func_sequence_num
if func_norm_dict.has_key(func_name):
return func_norm_dict[func_name]
if type(func_name) == type(""):
long_name = string.split(func_name)
file_name = long_name[-3][1:-2]
func = long_name[2]
lineno = long_name[-1][:-1]
if '?' == func: # Until I find out how to may 'em...
file_name = 'python'
func_norm_counter = func_norm_counter + 1
func = pid_string + ".C." + `func_norm_counter`
result = file_name , string.atoi(lineno) , func
else:
result = func_name
func_norm_dict[func_name] = result
return result
# The following two methods can be called by clients to use
# a profiler to profile a statement, given as a string.
def run(self, cmd):
import __main__
dict = __main__.__dict__
self.runctx(cmd, dict, dict)
return self
def runctx(self, cmd, globals, locals):
self.set_cmd(cmd)
sys.setprofile(self.dispatcher)
try:
exec cmd in globals, locals
finally:
sys.setprofile(None)
# This method is more useful to profile a single function call.
def runcall(self, func, *args):
self.set_cmd(`func`)
sys.setprofile(self.dispatcher)
try:
apply(func, args)
finally:
sys.setprofile(None)
return self
#******************************************************************
# The following calculates the overhead for using a profiler. The
# problem is that it takes a fair amount of time for the profiler
# to stop the stopwatch (from the time it recieves an event).
# Similarly, there is a delay from the time that the profiler
# re-starts the stopwatch before the user's code really gets to
# continue. The following code tries to measure the difference on
# a per-event basis. The result can the be placed in the
# Profile.dispatch_event() routine for the given platform. Note
# that this difference is only significant if there are a lot of
# events, and relatively little user code per event. For example,
# code with small functions will typically benefit from having the
# profiler calibrated for the current platform. This *could* be
# done on the fly during init() time, but it is not worth the
# effort. Also note that if too large a value specified, then
# execution time on some functions will actually appear as a
# negative number. It is *normal* for some functions (with very
# low call counts) to have such negative stats, even if the
# calibration figure is "correct."
#
# One alternative to profile-time calibration adjustments (i.e.,
# adding in the magic little delta during each event) is to track
# more carefully the number of events (and cumulatively, the number
# of events during sub functions) that are seen. If this were
# done, then the arithmetic could be done after the fact (i.e., at
# display time). Currintly, we track only call/return events.
# These values can be deduced by examining the callees and callers
# vectors for each functions. Hence we *can* almost correct the
# internal time figure at print time (note that we currently don't
# track exception event processing counts). Unfortunately, there
# is currently no similar information for cumulative sub-function
# time. It would not be hard to "get all this info" at profiler
# time. Specifically, we would have to extend the tuples to keep
# counts of this in each frame, and then extend the defs of timing
# tuples to include the significant two figures. I'm a bit fearful
# that this additional feature will slow the heavily optimized
# event/time ratio (i.e., the profiler would run slower, fur a very
# low "value added" feature.)
#
# Plugging in the calibration constant doesn't slow down the
# profiler very much, and the accuracy goes way up.
#**************************************************************
def calibrate(self, m):
n = m
s = self.timer()
while n:
self.simple()
n = n - 1
f = self.timer()
my_simple = f[0]+f[1]-s[0]-s[1]
#print "Simple =", my_simple,
n = m
s = self.timer()
while n:
self.instrumented()
n = n - 1
f = self.timer()
my_inst = f[0]+f[1]-s[0]-s[1]
# print "Instrumented =", my_inst
avg_cost = (my_inst - my_simple)/m
#print "Delta/call =", avg_cost, "(profiler fixup constant)"
return avg_cost
# simulate a program with no profiler activity
def simple(self):
a = 1
pass
# simulate a program with call/return event processing
def instrumented(self):
a = 1
self.profiler_simulation(a, a, a)
# simulate an event processing activity (from user's perspective)
def profiler_simulation(self, x, y, z):
t = self.timer()
t = t[0] + t[1]
self.ut = t
#****************************************************************************
# OldProfile class documentation
#****************************************************************************
#
# The following derived profiler simulates the old style profile, providing
# errant results on recursive functions. The reason for the usefulnes of this
# profiler is that it runs faster (i.e., less overhead). It still creates
# all the caller stats, and is quite useful when there is *no* recursion
# in the user's code.
#
# This code also shows how easy it is to create a modified profiler.
#****************************************************************************
class OldProfile(Profile):
def trace_dispatch_exception(self, frame, t):
rt, rtt, rct, rfn, rframe, rcur = self.cur
if rcur and not rframe is frame:
return self.trace_dispatch_return(rframe, t)
return 0
def trace_dispatch_call(self, frame, t):
fn = `frame.f_code`
self.cur = (t, 0, 0, fn, frame, self.cur)
if self.timings.has_key(fn):
tt, ct, callers = self.timings[fn]
self.timings[fn] = tt, ct, callers
else:
self.timings[fn] = 0, 0, {}
return 1
def trace_dispatch_return(self, frame, t):
rt, rtt, rct, rfn, frame, rcur = self.cur
rtt = rtt + t
sft = rtt + rct
pt, ptt, pct, pfn, pframe, pcur = rcur
self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
tt, ct, callers = self.timings[rfn]
if callers.has_key(pfn):
callers[pfn] = callers[pfn] + 1
else:
callers[pfn] = 1
self.timings[rfn] = tt+rtt, ct + sft, callers
return 1
def snapshot_stats(self):
self.stats = {}
for func in self.timings.keys():
tt, ct, callers = self.timings[func]
nor_func = self.func_normalize(func)
nor_callers = {}
nc = 0
for func_caller in callers.keys():
nor_callers[self.func_normalize(func_caller)]=\
callers[func_caller]
nc = nc + callers[func_caller]
self.stats[nor_func] = nc, nc, tt, ct, nor_callers
#****************************************************************************
# HotProfile class documentation
#****************************************************************************
#
# This profiler is the fastest derived profile example. It does not
# calculate caller-callee relationships, and does not calculate cumulative
# time under a function. It only calculates time spent in a function, so
# it runs very quickly (re: very low overhead)
#****************************************************************************
class HotProfile(Profile):
def trace_dispatch_exception(self, frame, t):
rt, rtt, rfn, rframe, rcur = self.cur
if rcur and not rframe is frame:
return self.trace_dispatch_return(rframe, t)
return 0
def trace_dispatch_call(self, frame, t):
self.cur = (t, 0, frame, self.cur)
return 1
def trace_dispatch_return(self, frame, t):
rt, rtt, frame, rcur = self.cur
rfn = `frame.f_code`
pt, ptt, pframe, pcur = rcur
self.cur = pt, ptt+rt, pframe, pcur
if self.timings.has_key(rfn):
nc, tt = self.timings[rfn]
self.timings[rfn] = nc + 1, rt + rtt + tt
else:
self.timings[rfn] = 1, rt + rtt
return 1
def snapshot_stats(self):
self.stats = {}
for func in self.timings.keys():
nc, tt = self.timings[func]
nor_func = self.func_normalize(func)
self.stats[nor_func] = nc, nc, tt, 0, {}
#****************************************************************************
def Stats(*args):
print 'Report generating functions are in the "pstats" module\a'
|