profile.py 21.5 KB
Newer Older
1
#! /usr/bin/env python3
Guido van Rossum's avatar
Guido van Rossum committed
2
#
3
# Class for profiling python code. rev 1.0  6/2/94
Guido van Rossum's avatar
Guido van Rossum committed
4
#
5
# Written by James Roskind
6 7 8
# Based on prior profile module by Sjoerd Mullender...
#   which was hacked somewhat by: Guido van Rossum

9
"""Class for profiling Python code."""
10

11 12
# Copyright Disney Enterprises, Inc.  All Rights Reserved.
# Licensed to PSF under a Contributor Agreement
Benjamin Peterson's avatar
Benjamin Peterson committed
13
#
14 15 16
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Benjamin Peterson's avatar
Benjamin Peterson committed
17
#
18
# http://www.apache.org/licenses/LICENSE-2.0
Benjamin Peterson's avatar
Benjamin Peterson committed
19
#
20 21 22 23 24
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
# either express or implied.  See the License for the specific language
# governing permissions and limitations under the License.
25

Guido van Rossum's avatar
Guido van Rossum committed
26 27

import sys
28
import time
29
import marshal
Guido van Rossum's avatar
Guido van Rossum committed
30

31
__all__ = ["run", "runctx", "Profile"]
32

Tim Peters's avatar
Tim Peters committed
33
# Sample timer for use with
34 35
#i_count = 0
#def integer_timer():
Tim Peters's avatar
Tim Peters committed
36 37 38
#       global i_count
#       i_count = i_count + 1
#       return i_count
39 40
#itimes = integer_timer # replace with C coded timer returning integers

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
class _Utils:
    """Support class for utility functions which are shared by
    profile.py and cProfile.py modules.
    Not supposed to be used directly.
    """

    def __init__(self, profiler):
        self.profiler = profiler

    def run(self, statement, filename, sort):
        prof = self.profiler()
        try:
            prof.run(statement)
        except SystemExit:
            pass
        finally:
            self._show(prof, filename, sort)

    def runctx(self, statement, globals, locals, filename, sort):
        prof = self.profiler()
        try:
            prof.runctx(statement, globals, locals)
        except SystemExit:
            pass
        finally:
            self._show(prof, filename, sort)

    def _show(self, prof, filename, sort):
        if filename is not None:
            prof.dump_stats(filename)
        else:
            prof.print_stats(sort)


75 76 77 78 79
#**************************************************************************
# The following are the static member functions for the profiler class
# Note that an instance of Profile() is *not* needed to call them.
#**************************************************************************

80
def run(statement, filename=None, sort=-1):
81
    """Run statement under profiler optionally saving results in filename
82

83 84 85 86 87 88 89 90
    This function takes a single argument that can be passed to the
    "exec" statement, and an optional file name.  In all cases this
    routine attempts to "exec" its first argument and gather profiling
    statistics from the execution. If no file name is present, then this
    function automatically prints a simple profiling report, sorted by the
    standard name string (file/line/function-name) that is presented in
    each line.
    """
91
    return _Utils(Profile).run(statement, filename, sort)
92

93
def runctx(statement, globals, locals, filename=None, sort=-1):
94 95 96 97 98
    """Run statement under profiler, supplying your own globals and locals,
    optionally saving results in filename.

    statement and filename have the same semantics as profile.run
    """
99 100
    return _Utils(Profile).runctx(statement, globals, locals, filename, sort)

101

102
class Profile:
Tim Peters's avatar
Tim Peters committed
103 104 105 106 107 108 109 110
    """Profiler class.

    self.cur is always a tuple.  Each such tuple corresponds to a stack
    frame that is currently active (self.cur[-2]).  The following are the
    definitions of its members.  We use this external "parallel stack" to
    avoid contaminating the program that we are profiling. (old profiler
    used to write into the frames local dictionary!!) Derived classes
    can change the definition of some entries, as long as they leave
111 112 113 114 115 116 117
    [-2:] intact (frame and previous tuple).  In case an internal error is
    detected, the -3 element is used as the function name.

    [ 0] = Time that needs to be charged to the parent frame's function.
           It is used so that a function call will not have to access the
           timing data for the parent frame.
    [ 1] = Total time spent in this frame's function, excluding time in
118
           subfunctions (this latter is tallied in cur[2]).
Tim Peters's avatar
Tim Peters committed
119
    [ 2] = Total time spent in subfunctions, excluding time executing the
120
           frame's function (this latter is tallied in cur[1]).
121
    [-3] = Name of the function that corresponds to this frame.
122 123
    [-2] = Actual frame that we correspond to (used to sync exception handling).
    [-1] = Our parent 6-tuple (corresponds to frame.f_back).
Tim Peters's avatar
Tim Peters committed
124 125

    Timing data for each function is stored as a 5-tuple in the dictionary
126
    self.timings[].  The index is always the name stored in self.cur[-3].
Tim Peters's avatar
Tim Peters committed
127 128 129 130 131 132 133 134 135 136
    The following are the definitions of the members:

    [0] = The number of times this function was called, not counting direct
          or indirect recursion,
    [1] = Number of times this function appears on the stack, minus one
    [2] = Total time spent internal to this function
    [3] = Cumulative time that this function was present on the stack.  In
          non-recursive functions, this is the total execution time from start
          to finish of each invocation of a function, including time spent in
          all subfunctions.
137
    [4] = A dictionary indicating for each function name, the number of times
Tim Peters's avatar
Tim Peters committed
138 139 140
          it was called by us.
    """

141 142 143
    bias = 0  # calibration constant

    def __init__(self, timer=None, bias=None):
Tim Peters's avatar
Tim Peters committed
144 145 146
        self.timings = {}
        self.cur = None
        self.cmd = ""
147
        self.c_func_name = ""
Tim Peters's avatar
Tim Peters committed
148

149 150 151 152
        if bias is None:
            bias = self.bias
        self.bias = bias     # Materialize in local dict for lookup speed.

153
        if not timer:
154 155
            self.timer = self.get_time = time.process_time
            self.dispatcher = self.trace_dispatch_i
Tim Peters's avatar
Tim Peters committed
156 157 158 159
        else:
            self.timer = timer
            t = self.timer() # test out timer function
            try:
160 161 162 163 164 165
                length = len(t)
            except TypeError:
                self.get_time = timer
                self.dispatcher = self.trace_dispatch_i
            else:
                if length == 2:
Tim Peters's avatar
Tim Peters committed
166 167 168
                    self.dispatcher = self.trace_dispatch
                else:
                    self.dispatcher = self.trace_dispatch_l
169 170 171 172 173
                # This get_time() implementation needs to be defined
                # here to capture the passed-in timer in the parameter
                # list (for performance).  Note that we can't assume
                # the timer() result contains two values in all
                # cases.
174 175
                def get_time_timer(timer=timer, sum=sum):
                    return sum(timer())
176
                self.get_time = get_time_timer
Tim Peters's avatar
Tim Peters committed
177 178 179
        self.t = self.get_time()
        self.simulate_call('profiler')

180
    # Heavily optimized dispatch routine for time.process_time() timer
Tim Peters's avatar
Tim Peters committed
181 182

    def trace_dispatch(self, frame, event, arg):
183 184
        timer = self.timer
        t = timer()
185
        t = t[0] + t[1] - self.t - self.bias
Tim Peters's avatar
Tim Peters committed
186

187
        if event == "c_call":
188
            self.c_func_name = arg.__name__
189

190 191
        if self.dispatch[event](self, frame,t):
            t = timer()
Tim Peters's avatar
Tim Peters committed
192 193
            self.t = t[0] + t[1]
        else:
194
            r = timer()
Tim Peters's avatar
Tim Peters committed
195 196
            self.t = r[0] + r[1] - t # put back unrecorded delta

197 198
    # Dispatch routine for best timer program (return = scalar, fastest if
    # an integer but float works too -- and time.clock() relies on that).
Tim Peters's avatar
Tim Peters committed
199 200

    def trace_dispatch_i(self, frame, event, arg):
201
        timer = self.timer
202
        t = timer() - self.t - self.bias
203 204

        if event == "c_call":
205
            self.c_func_name = arg.__name__
206 207

        if self.dispatch[event](self, frame, t):
208
            self.t = timer()
Tim Peters's avatar
Tim Peters committed
209
        else:
210
            self.t = timer() - t  # put back unrecorded delta
Tim Peters's avatar
Tim Peters committed
211

212 213
    # Dispatch routine for macintosh (timer returns time in ticks of
    # 1/60th second)
Tim Peters's avatar
Tim Peters committed
214 215

    def trace_dispatch_mac(self, frame, event, arg):
216
        timer = self.timer
217
        t = timer()/60.0 - self.t - self.bias
218 219

        if event == "c_call":
220
            self.c_func_name = arg.__name__
221

222
        if self.dispatch[event](self, frame, t):
223
            self.t = timer()/60.0
Tim Peters's avatar
Tim Peters committed
224
        else:
225
            self.t = timer()/60.0 - t  # put back unrecorded delta
Tim Peters's avatar
Tim Peters committed
226 227 228 229

    # SLOW generic dispatch routine for timer returning lists of numbers

    def trace_dispatch_l(self, frame, event, arg):
230
        get_time = self.get_time
231
        t = get_time() - self.t - self.bias
Tim Peters's avatar
Tim Peters committed
232

233
        if event == "c_call":
234
            self.c_func_name = arg.__name__
235

236
        if self.dispatch[event](self, frame, t):
237
            self.t = get_time()
Tim Peters's avatar
Tim Peters committed
238
        else:
239
            self.t = get_time() - t # put back unrecorded delta
Tim Peters's avatar
Tim Peters committed
240

241 242 243 244 245 246
    # In the event handlers, the first 3 elements of self.cur are unpacked
    # into vrbls w/ 3-letter names.  The last two characters are meant to be
    # mnemonic:
    #     _pt  self.cur[0] "parent time"   time to be charged to parent frame
    #     _it  self.cur[1] "internal time" time spent directly in the function
    #     _et  self.cur[2] "external time" time spent in subfunctions
Tim Peters's avatar
Tim Peters committed
247 248

    def trace_dispatch_exception(self, frame, t):
249
        rpt, rit, ret, rfn, rframe, rcur = self.cur
250
        if (rframe is not frame) and rcur:
Tim Peters's avatar
Tim Peters committed
251
            return self.trace_dispatch_return(rframe, t)
252
        self.cur = rpt, rit+t, ret, rfn, rframe, rcur
253
        return 1
Tim Peters's avatar
Tim Peters committed
254 255 256


    def trace_dispatch_call(self, frame, t):
257
        if self.cur and frame.f_back is not self.cur[-2]:
258
            rpt, rit, ret, rfn, rframe, rcur = self.cur
259
            if not isinstance(rframe, Profile.fake_frame):
260 261 262
                assert rframe.f_back is frame.f_back, ("Bad call", rfn,
                                                       rframe, rframe.f_back,
                                                       frame, frame.f_back)
263
                self.trace_dispatch_return(rframe, 0)
264 265 266
                assert (self.cur is None or \
                        frame.f_back is self.cur[-2]), ("Bad call",
                                                        self.cur[-3])
Tim Peters's avatar
Tim Peters committed
267 268 269
        fcode = frame.f_code
        fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
        self.cur = (t, 0, 0, fn, frame, self.cur)
270
        timings = self.timings
271
        if fn in timings:
272 273
            cc, ns, tt, ct, callers = timings[fn]
            timings[fn] = cc, ns + 1, tt, ct, callers
Tim Peters's avatar
Tim Peters committed
274
        else:
275
            timings[fn] = 0, 0, 0, 0, {}
Tim Peters's avatar
Tim Peters committed
276 277
        return 1

278 279 280 281
    def trace_dispatch_c_call (self, frame, t):
        fn = ("", 0, self.c_func_name)
        self.cur = (t, 0, 0, fn, frame, self.cur)
        timings = self.timings
282
        if fn in timings:
283 284 285 286 287 288
            cc, ns, tt, ct, callers = timings[fn]
            timings[fn] = cc, ns+1, tt, ct, callers
        else:
            timings[fn] = 0, 0, 0, 0, {}
        return 1

Tim Peters's avatar
Tim Peters committed
289
    def trace_dispatch_return(self, frame, t):
290
        if frame is not self.cur[-2]:
291 292
            assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
            self.trace_dispatch_return(self.cur[-2], 0)
Tim Peters's avatar
Tim Peters committed
293

294 295
        # Prefix "r" means part of the Returning or exiting frame.
        # Prefix "p" means part of the Previous or Parent or older frame.
Tim Peters's avatar
Tim Peters committed
296

297 298 299
        rpt, rit, ret, rfn, frame, rcur = self.cur
        rit = rit + t
        frame_total = rit + ret
Tim Peters's avatar
Tim Peters committed
300

301 302
        ppt, pit, pet, pfn, pframe, pcur = rcur
        self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
Tim Peters's avatar
Tim Peters committed
303

304 305
        timings = self.timings
        cc, ns, tt, ct, callers = timings[rfn]
Tim Peters's avatar
Tim Peters committed
306
        if not ns:
307 308 309 310 311
            # This is the only occurrence of the function on the stack.
            # Else this is a (directly or indirectly) recursive call, and
            # its cumulative time will get updated when the topmost call to
            # it returns.
            ct = ct + frame_total
Tim Peters's avatar
Tim Peters committed
312
            cc = cc + 1
313

314
        if pfn in callers:
Tim Peters's avatar
Tim Peters committed
315 316 317 318 319 320
            callers[pfn] = callers[pfn] + 1  # hack: gather more
            # stats such as the amount of time added to ct courtesy
            # of this specific call, and the contribution to cc
            # courtesy of this call.
        else:
            callers[pfn] = 1
321 322

        timings[rfn] = cc, ns - 1, tt + rit, ct, callers
Tim Peters's avatar
Tim Peters committed
323 324 325

        return 1

326 327 328 329 330

    dispatch = {
        "call": trace_dispatch_call,
        "exception": trace_dispatch_exception,
        "return": trace_dispatch_return,
331
        "c_call": trace_dispatch_c_call,
332
        "c_exception": trace_dispatch_return,  # the C function returned
333
        "c_return": trace_dispatch_return,
334 335 336
        }


337
    # The next few functions play with self.cmd. By carefully preloading
Tim Peters's avatar
Tim Peters committed
338 339 340 341 342 343
    # our parallel stack, we can force the profiled result to include
    # an arbitrary string as the name of the calling function.
    # We use self.cmd as that string, and the resulting stats look
    # very nice :-).

    def set_cmd(self, cmd):
344
        if self.cur[-1]: return   # already set
Tim Peters's avatar
Tim Peters committed
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365
        self.cmd = cmd
        self.simulate_call(cmd)

    class fake_code:
        def __init__(self, filename, line, name):
            self.co_filename = filename
            self.co_line = line
            self.co_name = name
            self.co_firstlineno = 0

        def __repr__(self):
            return repr((self.co_filename, self.co_line, self.co_name))

    class fake_frame:
        def __init__(self, code, prior):
            self.f_code = code
            self.f_back = prior

    def simulate_call(self, name):
        code = self.fake_code('profile', 0, name)
        if self.cur:
366
            pframe = self.cur[-2]
Tim Peters's avatar
Tim Peters committed
367 368 369
        else:
            pframe = None
        frame = self.fake_frame(code, pframe)
370
        self.dispatch['call'](self, frame, 0)
Tim Peters's avatar
Tim Peters committed
371 372 373 374 375

    # collect stats from pending stack, including getting final
    # timings for self.cmd frame.

    def simulate_cmd_complete(self):
376 377
        get_time = self.get_time
        t = get_time() - self.t
378
        while self.cur[-1]:
Tim Peters's avatar
Tim Peters committed
379 380
            # We *can* cause assertion errors here if
            # dispatch_trace_return checks for a frame match!
381
            self.dispatch['return'](self, self.cur[-2], t)
Tim Peters's avatar
Tim Peters committed
382
            t = 0
383
        self.t = get_time() - t
Tim Peters's avatar
Tim Peters committed
384 385


386
    def print_stats(self, sort=-1):
Tim Peters's avatar
Tim Peters committed
387
        import pstats
388
        pstats.Stats(self).strip_dirs().sort_stats(sort). \
Tim Peters's avatar
Tim Peters committed
389 390 391
                  print_stats()

    def dump_stats(self, file):
392 393 394
        with open(file, 'wb') as f:
            self.create_stats()
            marshal.dump(self.stats, f)
Tim Peters's avatar
Tim Peters committed
395 396 397 398 399 400 401

    def create_stats(self):
        self.simulate_cmd_complete()
        self.snapshot_stats()

    def snapshot_stats(self):
        self.stats = {}
402
        for func, (cc, ns, tt, ct, callers) in self.timings.items():
Tim Peters's avatar
Tim Peters committed
403 404
            callers = callers.copy()
            nc = 0
405
            for callcnt in callers.values():
406
                nc += callcnt
Tim Peters's avatar
Tim Peters committed
407 408 409 410 411 412 413 414 415 416 417 418 419 420 421
            self.stats[func] = cc, nc, tt, ct, callers


    # The following two methods can be called by clients to use
    # a profiler to profile a statement, given as a string.

    def run(self, cmd):
        import __main__
        dict = __main__.__dict__
        return self.runctx(cmd, dict, dict)

    def runctx(self, cmd, globals, locals):
        self.set_cmd(cmd)
        sys.setprofile(self.dispatcher)
        try:
422
            exec(cmd, globals, locals)
Tim Peters's avatar
Tim Peters committed
423 424 425 426 427
        finally:
            sys.setprofile(None)
        return self

    # This method is more useful to profile a single function call.
428
    def runcall(self, func, *args, **kw):
429
        self.set_cmd(repr(func))
Tim Peters's avatar
Tim Peters committed
430 431
        sys.setprofile(self.dispatcher)
        try:
432
            return func(*args, **kw)
Tim Peters's avatar
Tim Peters committed
433 434 435 436 437 438 439 440 441 442 443
        finally:
            sys.setprofile(None)


    #******************************************************************
    # The following calculates the overhead for using a profiler.  The
    # problem is that it takes a fair amount of time for the profiler
    # to stop the stopwatch (from the time it receives an event).
    # Similarly, there is a delay from the time that the profiler
    # re-starts the stopwatch before the user's code really gets to
    # continue.  The following code tries to measure the difference on
444 445 446
    # a per-event basis.
    #
    # Note that this difference is only significant if there are a lot of
Tim Peters's avatar
Tim Peters committed
447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476
    # events, and relatively little user code per event.  For example,
    # code with small functions will typically benefit from having the
    # profiler calibrated for the current platform.  This *could* be
    # done on the fly during init() time, but it is not worth the
    # effort.  Also note that if too large a value specified, then
    # execution time on some functions will actually appear as a
    # negative number.  It is *normal* for some functions (with very
    # low call counts) to have such negative stats, even if the
    # calibration figure is "correct."
    #
    # One alternative to profile-time calibration adjustments (i.e.,
    # adding in the magic little delta during each event) is to track
    # more carefully the number of events (and cumulatively, the number
    # of events during sub functions) that are seen.  If this were
    # done, then the arithmetic could be done after the fact (i.e., at
    # display time).  Currently, we track only call/return events.
    # These values can be deduced by examining the callees and callers
    # vectors for each functions.  Hence we *can* almost correct the
    # internal time figure at print time (note that we currently don't
    # track exception event processing counts).  Unfortunately, there
    # is currently no similar information for cumulative sub-function
    # time.  It would not be hard to "get all this info" at profiler
    # time.  Specifically, we would have to extend the tuples to keep
    # counts of this in each frame, and then extend the defs of timing
    # tuples to include the significant two figures. I'm a bit fearful
    # that this additional feature will slow the heavily optimized
    # event/time ratio (i.e., the profiler would run slower, fur a very
    # low "value added" feature.)
    #**************************************************************

477
    def calibrate(self, m, verbose=0):
478 479 480 481 482 483
        if self.__class__ is not Profile:
            raise TypeError("Subclasses must override .calibrate().")

        saved_bias = self.bias
        self.bias = 0
        try:
484
            return self._calibrate_inner(m, verbose)
485 486 487
        finally:
            self.bias = saved_bias

488
    def _calibrate_inner(self, m, verbose):
489
        get_time = self.get_time
Tim Peters's avatar
Tim Peters committed
490

491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512
        # Set up a test case to be run with and without profiling.  Include
        # lots of calls, because we're trying to quantify stopwatch overhead.
        # Do not raise any exceptions, though, because we want to know
        # exactly how many profile events are generated (one call event, +
        # one return event, per Python-level call).

        def f1(n):
            for i in range(n):
                x = 1

        def f(m, f1=f1):
            for i in range(m):
                f1(100)

        f(m)    # warm up the cache

        # elapsed_noprofile <- time f(m) takes without profiling.
        t0 = get_time()
        f(m)
        t1 = get_time()
        elapsed_noprofile = t1 - t0
        if verbose:
513
            print("elapsed time without profiling =", elapsed_noprofile)
514 515 516 517 518 519 520 521 522 523

        # elapsed_profile <- time f(m) takes with profiling.  The difference
        # is profiling overhead, only some of which the profiler subtracts
        # out on its own.
        p = Profile()
        t0 = get_time()
        p.runctx('f(m)', globals(), locals())
        t1 = get_time()
        elapsed_profile = t1 - t0
        if verbose:
524
            print("elapsed time with profiling =", elapsed_profile)
525 526 527 528 529 530 531 532 533 534 535

        # reported_time <- "CPU seconds" the profiler charged to f and f1.
        total_calls = 0.0
        reported_time = 0.0
        for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
                p.timings.items():
            if funcname in ("f", "f1"):
                total_calls += cc
                reported_time += tt

        if verbose:
536 537
            print("'CPU seconds' profiler reported =", reported_time)
            print("total # calls =", total_calls)
538 539 540 541 542 543 544 545 546
        if total_calls != m + 1:
            raise ValueError("internal error: total calls = %d" % total_calls)

        # reported_time - elapsed_noprofile = overhead the profiler wasn't
        # able to measure.  Divide by twice the number of calls (since there
        # are two profiler events per call in this test) to get the hidden
        # overhead per event.
        mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
        if verbose:
547
            print("mean stopwatch overhead per profile event =", mean)
548
        return mean
549 550

#****************************************************************************
551

552
def main():
553 554 555
    import os
    from optparse import OptionParser

556
    usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
557
    parser = OptionParser(usage=usage)
558
    parser.allow_interspersed_args = False
Tim Peters's avatar
Tim Peters committed
559
    parser.add_option('-o', '--outfile', dest="outfile",
560 561
        help="Save stats to <outfile>", default=None)
    parser.add_option('-s', '--sort', dest="sort",
562 563
        help="Sort order when printing to stdout, based on pstats.Stats class",
        default=-1)
Tim Peters's avatar
Tim Peters committed
564

565 566 567
    if not sys.argv[1:]:
        parser.print_usage()
        sys.exit(2)
Tim Peters's avatar
Tim Peters committed
568

569
    (options, args) = parser.parse_args()
570 571 572 573 574 575 576 577 578 579 580 581 582 583
    sys.argv[:] = args

    if len(args) > 0:
        progname = args[0]
        sys.path.insert(0, os.path.dirname(progname))
        with open(progname, 'rb') as fp:
            code = compile(fp.read(), progname, 'exec')
        globs = {
            '__file__': progname,
            '__name__': '__main__',
            '__package__': None,
            '__cached__': None,
        }
        runctx(code, globs, None, options.outfile, options.sort)
584
    else:
585 586 587 588 589 590
        parser.print_usage()
    return parser

# When invoked as main program, invoke the profiler on a script
if __name__ == '__main__':
    main()