dis.py 16.9 KB
Newer Older
1
"""Disassembler of Python byte code into mnemonics."""
Guido van Rossum's avatar
Guido van Rossum committed
2 3

import sys
4
import types
5
import collections
6
import io
Guido van Rossum's avatar
Guido van Rossum committed
7

8 9 10
from opcode import *
from opcode import __all__ as _opcodes_all

11
__all__ = ["code_info", "dis", "disassemble", "distb", "disco",
12 13
           "findlinestarts", "findlabels", "show_code",
           "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
14
del _opcodes_all
15

Benjamin Peterson's avatar
Benjamin Peterson committed
16 17
_have_code = (types.MethodType, types.FunctionType, types.CodeType, type)

18 19 20 21 22 23 24 25 26 27 28 29 30
def _try_compile(source, name):
    """Attempts to compile the given source, first as an expression and
       then as a statement if the first approach fails.

       Utility function to accept strings in functions that otherwise
       expect code objects
    """
    try:
        c = compile(source, name, 'eval')
    except SyntaxError:
        c = compile(source, name, 'exec')
    return c

31
def dis(x=None, *, file=None):
32
    """Disassemble classes, methods, functions, generators, or code.
Tim Peters's avatar
Tim Peters committed
33 34 35 36

    With no argument, disassemble the last traceback.

    """
37
    if x is None:
38
        distb(file=file)
Tim Peters's avatar
Tim Peters committed
39
        return
40
    if hasattr(x, '__func__'):  # Method
41
        x = x.__func__
42
    if hasattr(x, '__code__'):  # Function
43
        x = x.__code__
44 45
    if hasattr(x, 'gi_code'):  # Generator
        x = x.gi_code
46
    if hasattr(x, '__dict__'):  # Class or module
47
        items = sorted(x.__dict__.items())
Tim Peters's avatar
Tim Peters committed
48
        for name, x1 in items:
Benjamin Peterson's avatar
Benjamin Peterson committed
49
            if isinstance(x1, _have_code):
50
                print("Disassembly of %s:" % name, file=file)
Tim Peters's avatar
Tim Peters committed
51
                try:
52
                    dis(x1, file=file)
53
                except TypeError as msg:
54 55
                    print("Sorry:", msg, file=file)
                print(file=file)
56
    elif hasattr(x, 'co_code'): # Code object
57
        disassemble(x, file=file)
58
    elif isinstance(x, (bytes, bytearray)): # Raw bytecode
59
        _disassemble_bytes(x, file=file)
60
    elif isinstance(x, str):    # Source code
61
        _disassemble_str(x, file=file)
Tim Peters's avatar
Tim Peters committed
62
    else:
63 64
        raise TypeError("don't know how to disassemble %s objects" %
                        type(x).__name__)
Guido van Rossum's avatar
Guido van Rossum committed
65

66
def distb(tb=None, *, file=None):
Tim Peters's avatar
Tim Peters committed
67
    """Disassemble a traceback (default: last traceback)."""
68
    if tb is None:
Tim Peters's avatar
Tim Peters committed
69 70 71
        try:
            tb = sys.last_traceback
        except AttributeError:
72
            raise RuntimeError("no last traceback to disassemble")
Tim Peters's avatar
Tim Peters committed
73
        while tb.tb_next: tb = tb.tb_next
74
    disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file)
Guido van Rossum's avatar
Guido van Rossum committed
75

76 77 78 79
# The inspect module interrogates this dictionary to build its
# list of CO_* constants. It is also used by pretty_flags to
# turn the co_flags field into a human readable list.
COMPILER_FLAG_NAMES = {
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
     1: "OPTIMIZED",
     2: "NEWLOCALS",
     4: "VARARGS",
     8: "VARKEYWORDS",
    16: "NESTED",
    32: "GENERATOR",
    64: "NOFREE",
}

def pretty_flags(flags):
    """Return pretty representation of code flags."""
    names = []
    for i in range(32):
        flag = 1<<i
        if flags & flag:
95
            names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
96 97 98 99 100 101 102
            flags ^= flag
            if not flags:
                break
    else:
        names.append(hex(flags))
    return ", ".join(names)

103
def _get_code_object(x):
104
    """Helper to handle methods, functions, generators, strings and raw code objects"""
105 106 107 108
    if hasattr(x, '__func__'): # Method
        x = x.__func__
    if hasattr(x, '__code__'): # Function
        x = x.__code__
109 110
    if hasattr(x, 'gi_code'):  # Generator
        x = x.gi_code
111
    if isinstance(x, str):     # Source code
112
        x = _try_compile(x, "<disassembly>")
113
    if hasattr(x, 'co_code'):  # Code object
114 115 116 117 118 119 120
        return x
    raise TypeError("don't know how to disassemble %s objects" %
                    type(x).__name__)

def code_info(x):
    """Formatted details of methods, functions, or code."""
    return _format_code_info(_get_code_object(x))
121 122 123 124 125 126 127 128 129 130

def _format_code_info(co):
    lines = []
    lines.append("Name:              %s" % co.co_name)
    lines.append("Filename:          %s" % co.co_filename)
    lines.append("Argument count:    %s" % co.co_argcount)
    lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
    lines.append("Number of locals:  %s" % co.co_nlocals)
    lines.append("Stack size:        %s" % co.co_stacksize)
    lines.append("Flags:             %s" % pretty_flags(co.co_flags))
131
    if co.co_consts:
132
        lines.append("Constants:")
133
        for i_c in enumerate(co.co_consts):
134
            lines.append("%4d: %r" % i_c)
135
    if co.co_names:
136
        lines.append("Names:")
137
        for i_n in enumerate(co.co_names):
138
            lines.append("%4d: %s" % i_n)
139
    if co.co_varnames:
140
        lines.append("Variable names:")
141
        for i_n in enumerate(co.co_varnames):
142
            lines.append("%4d: %s" % i_n)
143
    if co.co_freevars:
144
        lines.append("Free variables:")
145
        for i_n in enumerate(co.co_freevars):
146
            lines.append("%4d: %s" % i_n)
147
    if co.co_cellvars:
148
        lines.append("Cell variables:")
149
        for i_n in enumerate(co.co_cellvars):
150 151 152
            lines.append("%4d: %s" % i_n)
    return "\n".join(lines)

153
def show_code(co, *, file=None):
154 155 156 157
    """Print details of methods, functions, or code to *file*.

    If *file* is not provided, the output is printed on stdout.
    """
158
    print(code_info(co), file=file)
159

160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
_Instruction = collections.namedtuple("_Instruction",
     "opname opcode arg argval argrepr offset starts_line is_jump_target")

class Instruction(_Instruction):
    """Details for a bytecode operation

       Defined fields:
         opname - human readable name for operation
         opcode - numeric code for operation
         arg - numeric argument to operation (if any), otherwise None
         argval - resolved arg value (if known), otherwise same as arg
         argrepr - human readable description of operation argument
         offset - start index of operation within bytecode sequence
         starts_line - line started by this opcode (if any), otherwise None
         is_jump_target - True if other code jumps to here, otherwise False
    """

    def _disassemble(self, lineno_width=3, mark_as_current=False):
        """Format instruction details for inclusion in disassembly output

        *lineno_width* sets the width of the line number field (0 omits it)
        *mark_as_current* inserts a '-->' marker arrow as part of the line
        """
        fields = []
        # Column: Source code line number
        if lineno_width:
            if self.starts_line is not None:
                lineno_fmt = "%%%dd" % lineno_width
                fields.append(lineno_fmt % self.starts_line)
            else:
                fields.append(' ' * lineno_width)
        # Column: Current instruction indicator
        if mark_as_current:
            fields.append('-->')
        else:
            fields.append('   ')
        # Column: Jump target marker
        if self.is_jump_target:
            fields.append('>>')
        else:
            fields.append('  ')
        # Column: Instruction offset from start of code sequence
        fields.append(repr(self.offset).rjust(4))
        # Column: Opcode name
        fields.append(self.opname.ljust(20))
        # Column: Opcode argument
        if self.arg is not None:
            fields.append(repr(self.arg).rjust(5))
            # Column: Opcode argument details
            if self.argrepr:
                fields.append('(' + self.argrepr + ')')
211
        return ' '.join(fields).rstrip()
212 213


214
def get_instructions(x, *, first_line=None):
215 216 217 218 219
    """Iterator for the opcodes in methods, functions or code

    Generates a series of Instruction named tuples giving the details of
    each operations in the supplied code.

220 221 222 223
    If *first_line* is not None, it indicates the line number that should
    be reported for the first source line in the disassembled code.
    Otherwise, the source line information (if any) is taken directly from
    the disassembled code object.
224 225 226
    """
    co = _get_code_object(x)
    cell_names = co.co_cellvars + co.co_freevars
227
    linestarts = dict(findlinestarts(co))
228 229 230 231
    if first_line is not None:
        line_offset = first_line - co.co_firstlineno
    else:
        line_offset = 0
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
    return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
                                   co.co_consts, cell_names, linestarts,
                                   line_offset)

def _get_const_info(const_index, const_list):
    """Helper to get optional details about const references

       Returns the dereferenced constant and its repr if the constant
       list is defined.
       Otherwise returns the constant index and its repr().
    """
    argval = const_index
    if const_list is not None:
        argval = const_list[const_index]
    return argval, repr(argval)

def _get_name_info(name_index, name_list):
    """Helper to get optional details about named references

       Returns the dereferenced name as both value and repr if the name
       list is defined.
       Otherwise returns the name index and its repr().
    """
    argval = name_index
    if name_list is not None:
        argval = name_list[name_index]
        argrepr = argval
    else:
        argrepr = repr(argval)
    return argval, argrepr


def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
                      cells=None, linestarts=None, line_offset=0):
    """Iterate over the instructions in a bytecode string.

    Generates a sequence of Instruction namedtuples giving the details of each
    opcode.  Additional information about the code's runtime environment
    (e.g. variable names, constants) can be specified using optional
    arguments.

    """
    labels = findlabels(code)
Tim Peters's avatar
Tim Peters committed
275
    extended_arg = 0
276
    starts_line = None
Jeremy Hylton's avatar
Jeremy Hylton committed
277
    free = None
278 279 280 281
    # enumerate() is not an option, since we sometimes process
    # multiple elements on a single pass through the loop
    n = len(code)
    i = 0
Tim Peters's avatar
Tim Peters committed
282
    while i < n:
283
        op = code[i]
284 285 286 287 288 289
        offset = i
        if linestarts is not None:
            starts_line = linestarts.get(i, None)
            if starts_line is not None:
                starts_line += line_offset
        is_jump_target = i in labels
Tim Peters's avatar
Tim Peters committed
290
        i = i+1
291 292 293
        arg = None
        argval = None
        argrepr = ''
Tim Peters's avatar
Tim Peters committed
294
        if op >= HAVE_ARGUMENT:
295
            arg = code[i] + code[i+1]*256 + extended_arg
Tim Peters's avatar
Tim Peters committed
296 297 298
            extended_arg = 0
            i = i+2
            if op == EXTENDED_ARG:
299 300 301 302 303 304
                extended_arg = arg*65536
            #  Set argval to the dereferenced value of the argument when
            #  availabe, and argrepr to the string representation of argval.
            #    _disassemble_bytes needs the string repr of the
            #    raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
            argval = arg
Tim Peters's avatar
Tim Peters committed
305
            if op in hasconst:
306
                argval, argrepr = _get_const_info(arg, constants)
Tim Peters's avatar
Tim Peters committed
307
            elif op in hasname:
308
                argval, argrepr = _get_name_info(arg, names)
Tim Peters's avatar
Tim Peters committed
309
            elif op in hasjrel:
310 311
                argval = i + arg
                argrepr = "to " + repr(argval)
Tim Peters's avatar
Tim Peters committed
312
            elif op in haslocal:
313
                argval, argrepr = _get_name_info(arg, varnames)
Tim Peters's avatar
Tim Peters committed
314
            elif op in hascompare:
315 316
                argval = cmp_op[arg]
                argrepr = argval
Jeremy Hylton's avatar
Jeremy Hylton committed
317
            elif op in hasfree:
318
                argval, argrepr = _get_name_info(arg, cells)
319
            elif op in hasnargs:
320 321 322 323 324 325 326 327 328 329 330
                argrepr = "%d positional, %d keyword pair" % (code[i-2], code[i-1])
        yield Instruction(opname[op], op,
                          arg, argval, argrepr,
                          offset, starts_line, is_jump_target)

def disassemble(co, lasti=-1, *, file=None):
    """Disassemble a code object."""
    cell_names = co.co_cellvars + co.co_freevars
    linestarts = dict(findlinestarts(co))
    _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names,
                       co.co_consts, cell_names, linestarts, file=file)
Tim Peters's avatar
Tim Peters committed
331

332
def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
333
                       constants=None, cells=None, linestarts=None,
334
                       *, file=None, line_offset=0):
335 336 337 338 339
    # Omit the line number column entirely if we have no line number info
    show_lineno = linestarts is not None
    # TODO?: Adjust width upwards if max(linestarts.values()) >= 1000?
    lineno_width = 3 if show_lineno else 0
    for instr in _get_instructions_bytes(code, varnames, names,
340 341
                                         constants, cells, linestarts,
                                         line_offset=line_offset):
342 343 344 345 346 347 348
        new_source_line = (show_lineno and
                           instr.starts_line is not None and
                           instr.offset > 0)
        if new_source_line:
            print(file=file)
        is_current_instr = instr.offset == lasti
        print(instr._disassemble(lineno_width, is_current_instr), file=file)
349

350
def _disassemble_str(source, *, file=None):
351
    """Compile the source string, then disassemble the code object."""
352
    disassemble(_try_compile(source, '<dis>'), file=file)
353

Tim Peters's avatar
Tim Peters committed
354
disco = disassemble                     # XXX For backwards compatibility
355

Guido van Rossum's avatar
Guido van Rossum committed
356
def findlabels(code):
Tim Peters's avatar
Tim Peters committed
357 358 359 360 361 362
    """Detect all offsets in a byte code which are jump targets.

    Return the list of offsets.

    """
    labels = []
363 364
    # enumerate() is not an option, since we sometimes process
    # multiple elements on a single pass through the loop
Tim Peters's avatar
Tim Peters committed
365 366 367
    n = len(code)
    i = 0
    while i < n:
368
        op = code[i]
Tim Peters's avatar
Tim Peters committed
369 370
        i = i+1
        if op >= HAVE_ARGUMENT:
371
            arg = code[i] + code[i+1]*256
Tim Peters's avatar
Tim Peters committed
372 373 374
            i = i+2
            label = -1
            if op in hasjrel:
375
                label = i+arg
Tim Peters's avatar
Tim Peters committed
376
            elif op in hasjabs:
377
                label = arg
Tim Peters's avatar
Tim Peters committed
378 379 380 381
            if label >= 0:
                if label not in labels:
                    labels.append(label)
    return labels
Guido van Rossum's avatar
Guido van Rossum committed
382

383 384 385 386 387 388
def findlinestarts(code):
    """Find the offsets in a byte code which are start of lines in the source.

    Generate pairs (offset, lineno) as described in Python/compile.c.

    """
389 390
    byte_increments = list(code.co_lnotab[0::2])
    line_increments = list(code.co_lnotab[1::2])
391 392 393 394 395 396 397 398 399 400 401 402 403

    lastlineno = None
    lineno = code.co_firstlineno
    addr = 0
    for byte_incr, line_incr in zip(byte_increments, line_increments):
        if byte_incr:
            if lineno != lastlineno:
                yield (addr, lineno)
                lastlineno = lineno
            addr += byte_incr
        lineno += line_incr
    if lineno != lastlineno:
        yield (addr, lineno)
404

405 406 407 408 409 410 411 412
class Bytecode:
    """The bytecode operations of a piece of code

    Instantiate this with a function, method, string of code, or a code object
    (as returned by compile()).

    Iterating over this yields the bytecode operations as Instruction instances.
    """
413
    def __init__(self, x, *, first_line=None, current_offset=None):
414 415 416 417 418 419 420 421 422 423
        self.codeobj = co = _get_code_object(x)
        if first_line is None:
            self.first_line = co.co_firstlineno
            self._line_offset = 0
        else:
            self.first_line = first_line
            self._line_offset = first_line - co.co_firstlineno
        self._cell_names = co.co_cellvars + co.co_freevars
        self._linestarts = dict(findlinestarts(co))
        self._original_object = x
424
        self.current_offset = current_offset
425 426 427 428

    def __iter__(self):
        co = self.codeobj
        return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
429 430 431
                                       co.co_consts, self._cell_names,
                                       self._linestarts,
                                       line_offset=self._line_offset)
432 433

    def __repr__(self):
434 435
        return "{}({!r})".format(self.__class__.__name__,
                                 self._original_object)
436

437 438 439 440 441 442 443
    @classmethod
    def from_traceback(cls, tb):
        """ Construct a Bytecode from the given traceback """
        while tb.tb_next:
            tb = tb.tb_next
        return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti)

444 445 446 447
    def info(self):
        """Return formatted information about the code object."""
        return _format_code_info(self.codeobj)

448 449
    def dis(self):
        """Return a formatted view of the bytecode operations."""
450
        co = self.codeobj
451 452 453 454
        if self.current_offset is not None:
            offset = self.current_offset
        else:
            offset = -1
455 456 457 458 459 460
        with io.StringIO() as output:
            _disassemble_bytes(co.co_code, varnames=co.co_varnames,
                               names=co.co_names, constants=co.co_consts,
                               cells=self._cell_names,
                               linestarts=self._linestarts,
                               line_offset=self._line_offset,
461 462
                               file=output,
                               lasti=offset)
463
            return output.getvalue()
464 465


466
def _test():
Tim Peters's avatar
Tim Peters committed
467
    """Simple test program to disassemble a file."""
468 469 470 471 472 473 474 475
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('infile', type=argparse.FileType(), nargs='?', default='-')
    args = parser.parse_args()
    with args.infile as infile:
        source = infile.read()
    code = compile(source, args.infile.name, "exec")
Tim Peters's avatar
Tim Peters committed
476
    dis(code)
477 478

if __name__ == "__main__":
Tim Peters's avatar
Tim Peters committed
479
    _test()