profile.py 20.9 KB
Newer Older
1
#! /usr/bin/env python
Guido van Rossum's avatar
Guido van Rossum committed
2
#
3
# Class for profiling python code. rev 1.0  6/2/94
Guido van Rossum's avatar
Guido van Rossum committed
4
#
5 6 7 8 9
# Based on prior profile module by Sjoerd Mullender...
#   which was hacked somewhat by: Guido van Rossum
#
# See profile.doc for more information

10
"""Class for profiling Python code."""
11 12 13

# Copyright 1994, by InfoSeek Corporation, all rights reserved.
# Written by James Roskind
Tim Peters's avatar
Tim Peters committed
14
#
15 16 17 18 19 20 21 22 23 24 25 26
# Permission to use, copy, modify, and distribute this Python software
# and its associated documentation for any purpose (subject to the
# restriction in the following sentence) without fee is hereby granted,
# provided that the above copyright notice appears in all copies, and
# that both that copyright notice and this permission notice appear in
# supporting documentation, and that the name of InfoSeek not be used in
# advertising or publicity pertaining to distribution of the software
# without specific, written prior permission.  This permission is
# explicitly restricted to the copying and modification of the software
# to remain in Python, compiled Python, or other languages (such as C)
# wherein the modified or derived code is exclusively imported into a
# Python module.
Tim Peters's avatar
Tim Peters committed
27
#
28 29 30 31 32 33 34 35 36
# INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
# FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
# SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.


Guido van Rossum's avatar
Guido van Rossum committed
37 38

import sys
39
import os
40
import time
41
import marshal
Guido van Rossum's avatar
Guido van Rossum committed
42

43
__all__ = ["run","help","Profile"]
44

Tim Peters's avatar
Tim Peters committed
45
# Sample timer for use with
46 47
#i_count = 0
#def integer_timer():
Tim Peters's avatar
Tim Peters committed
48 49 50
#       global i_count
#       i_count = i_count + 1
#       return i_count
51 52 53 54 55 56 57
#itimes = integer_timer # replace with C coded timer returning integers

#**************************************************************************
# The following are the static member functions for the profiler class
# Note that an instance of Profile() is *not* needed to call them.
#**************************************************************************

58 59
def run(statement, filename=None):
    """Run statement under profiler optionally saving results in filename
60

61 62 63 64 65 66 67 68
    This function takes a single argument that can be passed to the
    "exec" statement, and an optional file name.  In all cases this
    routine attempts to "exec" its first argument and gather profiling
    statistics from the execution. If no file name is present, then this
    function automatically prints a simple profiling report, sorted by the
    standard name string (file/line/function-name) that is presented in
    each line.
    """
Tim Peters's avatar
Tim Peters committed
69 70 71 72 73
    prof = Profile()
    try:
        prof = prof.run(statement)
    except SystemExit:
        pass
74 75
    if filename is not None:
        prof.dump_stats(filename)
Tim Peters's avatar
Tim Peters committed
76 77
    else:
        return prof.print_stats()
78 79 80

# print help
def help():
Tim Peters's avatar
Tim Peters committed
81 82 83
    for dirname in sys.path:
        fullname = os.path.join(dirname, 'profile.doc')
        if os.path.exists(fullname):
84
            sts = os.system('${PAGER-more} ' + fullname)
Tim Peters's avatar
Tim Peters committed
85 86 87 88
            if sts: print '*** Pager exit status:', sts
            break
    else:
        print 'Sorry, can\'t find the help file "profile.doc"',
89
        print 'along the Python search path.'
90 91


92
if os.name == "mac":
93
    import MacOS
94 95 96 97 98 99 100 101 102
    def _get_time_mac(timer=MacOS.GetTicks):
        return timer() / 60.0

if hasattr(os, "times"):
    def _get_time_times(timer=os.times):
        t = timer()
        return t[0] + t[1]


103
class Profile:
Tim Peters's avatar
Tim Peters committed
104 105 106 107 108 109 110 111
    """Profiler class.

    self.cur is always a tuple.  Each such tuple corresponds to a stack
    frame that is currently active (self.cur[-2]).  The following are the
    definitions of its members.  We use this external "parallel stack" to
    avoid contaminating the program that we are profiling. (old profiler
    used to write into the frames local dictionary!!) Derived classes
    can change the definition of some entries, as long as they leave
112 113 114 115 116 117 118
    [-2:] intact (frame and previous tuple).  In case an internal error is
    detected, the -3 element is used as the function name.

    [ 0] = Time that needs to be charged to the parent frame's function.
           It is used so that a function call will not have to access the
           timing data for the parent frame.
    [ 1] = Total time spent in this frame's function, excluding time in
119
           subfunctions (this latter is tallied in cur[2]).
Tim Peters's avatar
Tim Peters committed
120
    [ 2] = Total time spent in subfunctions, excluding time executing the
121
           frame's function (this latter is tallied in cur[1]).
122
    [-3] = Name of the function that corresponds to this frame.
123 124
    [-2] = Actual frame that we correspond to (used to sync exception handling).
    [-1] = Our parent 6-tuple (corresponds to frame.f_back).
Tim Peters's avatar
Tim Peters committed
125 126

    Timing data for each function is stored as a 5-tuple in the dictionary
127
    self.timings[].  The index is always the name stored in self.cur[-3].
Tim Peters's avatar
Tim Peters committed
128 129 130 131 132 133 134 135 136 137
    The following are the definitions of the members:

    [0] = The number of times this function was called, not counting direct
          or indirect recursion,
    [1] = Number of times this function appears on the stack, minus one
    [2] = Total time spent internal to this function
    [3] = Cumulative time that this function was present on the stack.  In
          non-recursive functions, this is the total execution time from start
          to finish of each invocation of a function, including time spent in
          all subfunctions.
138
    [4] = A dictionary indicating for each function name, the number of times
Tim Peters's avatar
Tim Peters committed
139 140 141
          it was called by us.
    """

142 143 144
    bias = 0  # calibration constant

    def __init__(self, timer=None, bias=None):
Tim Peters's avatar
Tim Peters committed
145 146 147 148
        self.timings = {}
        self.cur = None
        self.cmd = ""

149 150 151 152
        if bias is None:
            bias = self.bias
        self.bias = bias     # Materialize in local dict for lookup speed.

153
        if timer is None:
Tim Peters's avatar
Tim Peters committed
154 155 156
            if os.name == 'mac':
                self.timer = MacOS.GetTicks
                self.dispatcher = self.trace_dispatch_mac
157
                self.get_time = _get_time_mac
Tim Peters's avatar
Tim Peters committed
158
            elif hasattr(time, 'clock'):
159
                self.timer = self.get_time = time.clock
Tim Peters's avatar
Tim Peters committed
160 161 162 163
                self.dispatcher = self.trace_dispatch_i
            elif hasattr(os, 'times'):
                self.timer = os.times
                self.dispatcher = self.trace_dispatch
164
                self.get_time = _get_time_times
Tim Peters's avatar
Tim Peters committed
165
            else:
166
                self.timer = self.get_time = time.time
Tim Peters's avatar
Tim Peters committed
167 168 169 170 171
                self.dispatcher = self.trace_dispatch_i
        else:
            self.timer = timer
            t = self.timer() # test out timer function
            try:
172 173 174 175 176 177
                length = len(t)
            except TypeError:
                self.get_time = timer
                self.dispatcher = self.trace_dispatch_i
            else:
                if length == 2:
Tim Peters's avatar
Tim Peters committed
178 179 180
                    self.dispatcher = self.trace_dispatch
                else:
                    self.dispatcher = self.trace_dispatch_l
181 182 183 184 185
                # This get_time() implementation needs to be defined
                # here to capture the passed-in timer in the parameter
                # list (for performance).  Note that we can't assume
                # the timer() result contains two values in all
                # cases.
186
                import operator
187 188 189 190
                def get_time_timer(timer=timer,
                                   reduce=reduce, reducer=operator.add):
                    return reduce(reducer, timer(), 0)
                self.get_time = get_time_timer
Tim Peters's avatar
Tim Peters committed
191 192 193 194 195 196
        self.t = self.get_time()
        self.simulate_call('profiler')

    # Heavily optimized dispatch routine for os.times() timer

    def trace_dispatch(self, frame, event, arg):
197 198
        timer = self.timer
        t = timer()
199
        t = t[0] + t[1] - self.t - self.bias
Tim Peters's avatar
Tim Peters committed
200

201 202
        if self.dispatch[event](self, frame,t):
            t = timer()
Tim Peters's avatar
Tim Peters committed
203 204
            self.t = t[0] + t[1]
        else:
205
            r = timer()
Tim Peters's avatar
Tim Peters committed
206 207
            self.t = r[0] + r[1] - t # put back unrecorded delta

208 209
    # Dispatch routine for best timer program (return = scalar, fastest if
    # an integer but float works too -- and time.clock() relies on that).
Tim Peters's avatar
Tim Peters committed
210 211

    def trace_dispatch_i(self, frame, event, arg):
212
        timer = self.timer
213
        t = timer() - self.t - self.bias
214 215
        if self.dispatch[event](self, frame,t):
            self.t = timer()
Tim Peters's avatar
Tim Peters committed
216
        else:
217
            self.t = timer() - t  # put back unrecorded delta
Tim Peters's avatar
Tim Peters committed
218

219 220
    # Dispatch routine for macintosh (timer returns time in ticks of
    # 1/60th second)
Tim Peters's avatar
Tim Peters committed
221 222

    def trace_dispatch_mac(self, frame, event, arg):
223
        timer = self.timer
224 225
        t = timer()/60.0 - self.t - self.bias
        if self.dispatch[event](self, frame, t):
226
            self.t = timer()/60.0
Tim Peters's avatar
Tim Peters committed
227
        else:
228
            self.t = timer()/60.0 - t  # put back unrecorded delta
Tim Peters's avatar
Tim Peters committed
229 230 231 232

    # SLOW generic dispatch routine for timer returning lists of numbers

    def trace_dispatch_l(self, frame, event, arg):
233
        get_time = self.get_time
234
        t = get_time() - self.t - self.bias
Tim Peters's avatar
Tim Peters committed
235

236
        if self.dispatch[event](self, frame, t):
237
            self.t = get_time()
Tim Peters's avatar
Tim Peters committed
238
        else:
239
            self.t = get_time() - t # put back unrecorded delta
Tim Peters's avatar
Tim Peters committed
240

241 242 243 244 245 246
    # In the event handlers, the first 3 elements of self.cur are unpacked
    # into vrbls w/ 3-letter names.  The last two characters are meant to be
    # mnemonic:
    #     _pt  self.cur[0] "parent time"   time to be charged to parent frame
    #     _it  self.cur[1] "internal time" time spent directly in the function
    #     _et  self.cur[2] "external time" time spent in subfunctions
Tim Peters's avatar
Tim Peters committed
247 248

    def trace_dispatch_exception(self, frame, t):
249
        rpt, rit, ret, rfn, rframe, rcur = self.cur
250
        if (rframe is not frame) and rcur:
Tim Peters's avatar
Tim Peters committed
251
            return self.trace_dispatch_return(rframe, t)
252
        self.cur = rpt, rit+t, ret, rfn, rframe, rcur
253
        return 1
Tim Peters's avatar
Tim Peters committed
254 255 256


    def trace_dispatch_call(self, frame, t):
257
        if self.cur and frame.f_back is not self.cur[-2]:
258
            rpt, rit, ret, rfn, rframe, rcur = self.cur
259
            if not isinstance(rframe, Profile.fake_frame):
260 261 262
                assert rframe.f_back is frame.f_back, ("Bad call", rfn,
                                                       rframe, rframe.f_back,
                                                       frame, frame.f_back)
263
                self.trace_dispatch_return(rframe, 0)
264 265 266
                assert (self.cur is None or \
                        frame.f_back is self.cur[-2]), ("Bad call",
                                                        self.cur[-3])
Tim Peters's avatar
Tim Peters committed
267 268 269
        fcode = frame.f_code
        fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
        self.cur = (t, 0, 0, fn, frame, self.cur)
270
        timings = self.timings
271
        if fn in timings:
272 273
            cc, ns, tt, ct, callers = timings[fn]
            timings[fn] = cc, ns + 1, tt, ct, callers
Tim Peters's avatar
Tim Peters committed
274
        else:
275
            timings[fn] = 0, 0, 0, 0, {}
Tim Peters's avatar
Tim Peters committed
276 277 278
        return 1

    def trace_dispatch_return(self, frame, t):
279
        if frame is not self.cur[-2]:
280 281
            assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
            self.trace_dispatch_return(self.cur[-2], 0)
Tim Peters's avatar
Tim Peters committed
282

283 284
        # Prefix "r" means part of the Returning or exiting frame.
        # Prefix "p" means part of the Previous or Parent or older frame.
Tim Peters's avatar
Tim Peters committed
285

286 287 288
        rpt, rit, ret, rfn, frame, rcur = self.cur
        rit = rit + t
        frame_total = rit + ret
Tim Peters's avatar
Tim Peters committed
289

290 291
        ppt, pit, pet, pfn, pframe, pcur = rcur
        self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
Tim Peters's avatar
Tim Peters committed
292

293 294
        timings = self.timings
        cc, ns, tt, ct, callers = timings[rfn]
Tim Peters's avatar
Tim Peters committed
295
        if not ns:
296 297 298 299 300
            # This is the only occurrence of the function on the stack.
            # Else this is a (directly or indirectly) recursive call, and
            # its cumulative time will get updated when the topmost call to
            # it returns.
            ct = ct + frame_total
Tim Peters's avatar
Tim Peters committed
301
            cc = cc + 1
302

303
        if pfn in callers:
Tim Peters's avatar
Tim Peters committed
304 305 306 307 308 309
            callers[pfn] = callers[pfn] + 1  # hack: gather more
            # stats such as the amount of time added to ct courtesy
            # of this specific call, and the contribution to cc
            # courtesy of this call.
        else:
            callers[pfn] = 1
310 311

        timings[rfn] = cc, ns - 1, tt + rit, ct, callers
Tim Peters's avatar
Tim Peters committed
312 313 314

        return 1

315 316 317 318 319 320 321 322

    dispatch = {
        "call": trace_dispatch_call,
        "exception": trace_dispatch_exception,
        "return": trace_dispatch_return,
        }


323
    # The next few functions play with self.cmd. By carefully preloading
Tim Peters's avatar
Tim Peters committed
324 325 326 327 328 329
    # our parallel stack, we can force the profiled result to include
    # an arbitrary string as the name of the calling function.
    # We use self.cmd as that string, and the resulting stats look
    # very nice :-).

    def set_cmd(self, cmd):
330
        if self.cur[-1]: return   # already set
Tim Peters's avatar
Tim Peters committed
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351
        self.cmd = cmd
        self.simulate_call(cmd)

    class fake_code:
        def __init__(self, filename, line, name):
            self.co_filename = filename
            self.co_line = line
            self.co_name = name
            self.co_firstlineno = 0

        def __repr__(self):
            return repr((self.co_filename, self.co_line, self.co_name))

    class fake_frame:
        def __init__(self, code, prior):
            self.f_code = code
            self.f_back = prior

    def simulate_call(self, name):
        code = self.fake_code('profile', 0, name)
        if self.cur:
352
            pframe = self.cur[-2]
Tim Peters's avatar
Tim Peters committed
353 354 355
        else:
            pframe = None
        frame = self.fake_frame(code, pframe)
356
        self.dispatch['call'](self, frame, 0)
Tim Peters's avatar
Tim Peters committed
357 358 359 360 361

    # collect stats from pending stack, including getting final
    # timings for self.cmd frame.

    def simulate_cmd_complete(self):
362 363
        get_time = self.get_time
        t = get_time() - self.t
364
        while self.cur[-1]:
Tim Peters's avatar
Tim Peters committed
365 366
            # We *can* cause assertion errors here if
            # dispatch_trace_return checks for a frame match!
367
            self.dispatch['return'](self, self.cur[-2], t)
Tim Peters's avatar
Tim Peters committed
368
            t = 0
369
        self.t = get_time() - t
Tim Peters's avatar
Tim Peters committed
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388


    def print_stats(self):
        import pstats
        pstats.Stats(self).strip_dirs().sort_stats(-1). \
                  print_stats()

    def dump_stats(self, file):
        f = open(file, 'wb')
        self.create_stats()
        marshal.dump(self.stats, f)
        f.close()

    def create_stats(self):
        self.simulate_cmd_complete()
        self.snapshot_stats()

    def snapshot_stats(self):
        self.stats = {}
389
        for func, (cc, ns, tt, ct, callers) in self.timings.iteritems():
Tim Peters's avatar
Tim Peters committed
390 391
            callers = callers.copy()
            nc = 0
392 393
            for callcnt in callers.itervalues():
                nc += callcnt
Tim Peters's avatar
Tim Peters committed
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
            self.stats[func] = cc, nc, tt, ct, callers


    # The following two methods can be called by clients to use
    # a profiler to profile a statement, given as a string.

    def run(self, cmd):
        import __main__
        dict = __main__.__dict__
        return self.runctx(cmd, dict, dict)

    def runctx(self, cmd, globals, locals):
        self.set_cmd(cmd)
        sys.setprofile(self.dispatcher)
        try:
            exec cmd in globals, locals
        finally:
            sys.setprofile(None)
        return self

    # This method is more useful to profile a single function call.
415
    def runcall(self, func, *args, **kw):
Tim Peters's avatar
Tim Peters committed
416 417 418
        self.set_cmd(`func`)
        sys.setprofile(self.dispatcher)
        try:
419
            return apply(func, args, kw)
Tim Peters's avatar
Tim Peters committed
420 421 422 423 424 425 426 427 428 429 430
        finally:
            sys.setprofile(None)


    #******************************************************************
    # The following calculates the overhead for using a profiler.  The
    # problem is that it takes a fair amount of time for the profiler
    # to stop the stopwatch (from the time it receives an event).
    # Similarly, there is a delay from the time that the profiler
    # re-starts the stopwatch before the user's code really gets to
    # continue.  The following code tries to measure the difference on
431 432 433
    # a per-event basis.
    #
    # Note that this difference is only significant if there are a lot of
Tim Peters's avatar
Tim Peters committed
434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
    # events, and relatively little user code per event.  For example,
    # code with small functions will typically benefit from having the
    # profiler calibrated for the current platform.  This *could* be
    # done on the fly during init() time, but it is not worth the
    # effort.  Also note that if too large a value specified, then
    # execution time on some functions will actually appear as a
    # negative number.  It is *normal* for some functions (with very
    # low call counts) to have such negative stats, even if the
    # calibration figure is "correct."
    #
    # One alternative to profile-time calibration adjustments (i.e.,
    # adding in the magic little delta during each event) is to track
    # more carefully the number of events (and cumulatively, the number
    # of events during sub functions) that are seen.  If this were
    # done, then the arithmetic could be done after the fact (i.e., at
    # display time).  Currently, we track only call/return events.
    # These values can be deduced by examining the callees and callers
    # vectors for each functions.  Hence we *can* almost correct the
    # internal time figure at print time (note that we currently don't
    # track exception event processing counts).  Unfortunately, there
    # is currently no similar information for cumulative sub-function
    # time.  It would not be hard to "get all this info" at profiler
    # time.  Specifically, we would have to extend the tuples to keep
    # counts of this in each frame, and then extend the defs of timing
    # tuples to include the significant two figures. I'm a bit fearful
    # that this additional feature will slow the heavily optimized
    # event/time ratio (i.e., the profiler would run slower, fur a very
    # low "value added" feature.)
    #**************************************************************

464
    def calibrate(self, m, verbose=0):
465 466 467 468 469 470
        if self.__class__ is not Profile:
            raise TypeError("Subclasses must override .calibrate().")

        saved_bias = self.bias
        self.bias = 0
        try:
471
            return self._calibrate_inner(m, verbose)
472 473 474
        finally:
            self.bias = saved_bias

475
    def _calibrate_inner(self, m, verbose):
476
        get_time = self.get_time
Tim Peters's avatar
Tim Peters committed
477

478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535
        # Set up a test case to be run with and without profiling.  Include
        # lots of calls, because we're trying to quantify stopwatch overhead.
        # Do not raise any exceptions, though, because we want to know
        # exactly how many profile events are generated (one call event, +
        # one return event, per Python-level call).

        def f1(n):
            for i in range(n):
                x = 1

        def f(m, f1=f1):
            for i in range(m):
                f1(100)

        f(m)    # warm up the cache

        # elapsed_noprofile <- time f(m) takes without profiling.
        t0 = get_time()
        f(m)
        t1 = get_time()
        elapsed_noprofile = t1 - t0
        if verbose:
            print "elapsed time without profiling =", elapsed_noprofile

        # elapsed_profile <- time f(m) takes with profiling.  The difference
        # is profiling overhead, only some of which the profiler subtracts
        # out on its own.
        p = Profile()
        t0 = get_time()
        p.runctx('f(m)', globals(), locals())
        t1 = get_time()
        elapsed_profile = t1 - t0
        if verbose:
            print "elapsed time with profiling =", elapsed_profile

        # reported_time <- "CPU seconds" the profiler charged to f and f1.
        total_calls = 0.0
        reported_time = 0.0
        for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
                p.timings.items():
            if funcname in ("f", "f1"):
                total_calls += cc
                reported_time += tt

        if verbose:
            print "'CPU seconds' profiler reported =", reported_time
            print "total # calls =", total_calls
        if total_calls != m + 1:
            raise ValueError("internal error: total calls = %d" % total_calls)

        # reported_time - elapsed_noprofile = overhead the profiler wasn't
        # able to measure.  Divide by twice the number of calls (since there
        # are two profiler events per call in this test) to get the hidden
        # overhead per event.
        mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
        if verbose:
            print "mean stopwatch overhead per profile event =", mean
        return mean
536 537 538

#****************************************************************************
def Stats(*args):
Tim Peters's avatar
Tim Peters committed
539
    print 'Report generating functions are in the "pstats" module\a'
540 541 542 543


# When invoked as main program, invoke the profiler on a script
if __name__ == '__main__':
Tim Peters's avatar
Tim Peters committed
544 545 546
    if not sys.argv[1:]:
        print "usage: profile.py scriptfile [arg] ..."
        sys.exit(2)
547

Tim Peters's avatar
Tim Peters committed
548
    filename = sys.argv[1]  # Get script filename
549

Tim Peters's avatar
Tim Peters committed
550
    del sys.argv[0]         # Hide "profile.py" from argument list
551

Tim Peters's avatar
Tim Peters committed
552 553
    # Insert script directory in front of module search path
    sys.path.insert(0, os.path.dirname(filename))
554

Tim Peters's avatar
Tim Peters committed
555
    run('execfile(' + `filename` + ')')