Kaydet (Commit) bf0dfb3d authored tarafından Martin v. Löwis's avatar Martin v. Löwis

Issue #8032: For gdb7, a python-gdb.py file is added to the build,

allowing to use advanced gdb features when debugging Python.
üst a01da93d
......@@ -14,6 +14,7 @@ platform
pyconfig.h
libpython*.a
python.exe
python-gdb.py
reflog.txt
tags
TAGS
......
......@@ -30,6 +30,7 @@ Modules/Setup.local
Modules/config.c
Parser/pgen
core
python-gdb.py
syntax: glob
libpython*.a
......
# Verify that gdb can pretty-print the various PyObject* types
#
# The code for testing gdb was adapted from similar work in Unladen Swallow's
# Lib/test/test_jit_gdb.py
import os
import re
import subprocess
import sys
import unittest
from test.test_support import run_unittest
try:
gdb_version, _ = subprocess.Popen(["gdb", "--version"],
stdout=subprocess.PIPE).communicate()
except OSError:
# This is what "no gdb" looks like. There may, however, be other
# errors that manifest this way too.
raise unittest.SkipTest("Couldn't find gdb on the path")
gdb_version_number = re.search(r"^GNU gdb [^\d]*(\d+)\.", gdb_version)
if int(gdb_version_number.group(1)) < 7:
raise unittest.SkipTest("gdb versions before 7.0 didn't support python embedding"
" Saw:\n" + gdb_version)
# Verify that "gdb" was built with the embedded python support enabled:
cmd = "--eval-command=python import sys; print sys.version_info"
p = subprocess.Popen(["gdb", "--batch", cmd],
stdout=subprocess.PIPE)
gdbpy_version, _ = p.communicate()
if gdbpy_version == '':
raise unittest.SkipTest("gdb not built with embedded python support")
class DebuggerTests(unittest.TestCase):
"""Test that the debugger can debug Python."""
def run_gdb(self, *args):
"""Runs gdb with the command line given by *args.
Returns its stdout, stderr
"""
out, err = subprocess.Popen(
args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
).communicate()
return out, err
def get_stack_trace(self, source=None, script=None,
breakpoint='PyObject_Print',
cmds_after_breakpoint=None,
import_site=False):
'''
Run 'python -c SOURCE' under gdb with a breakpoint.
Support injecting commands after the breakpoint is reached
Returns the stdout from gdb
cmds_after_breakpoint: if provided, a list of strings: gdb commands
'''
# We use "set breakpoint pending yes" to avoid blocking with a:
# Function "foo" not defined.
# Make breakpoint pending on future shared library load? (y or [n])
# error, which typically happens python is dynamically linked (the
# breakpoints of interest are to be found in the shared library)
# When this happens, we still get:
# Function "PyObject_Print" not defined.
# emitted to stderr each time, alas.
# Initially I had "--eval-command=continue" here, but removed it to
# avoid repeated print breakpoints when traversing hierarchical data
# structures
# Generate a list of commands in gdb's language:
commands = ['set breakpoint pending yes',
'break %s' % breakpoint,
'run']
if cmds_after_breakpoint:
commands += cmds_after_breakpoint
else:
commands += ['backtrace']
# print commands
# Use "commands" to generate the arguments with which to invoke "gdb":
args = ["gdb", "--batch"]
args += ['--eval-command=%s' % cmd for cmd in commands]
args += ["--args",
sys.executable]
if not import_site:
# -S suppresses the default 'import site'
args += ["-S"]
if source:
args += ["-c", source]
elif script:
args += [script]
# print args
# print ' '.join(args)
# Use "args" to invoke gdb, capturing stdout, stderr:
out, err = self.run_gdb(*args)
# Ignore some noise on stderr due to the pending breakpoint:
err = err.replace('Function "%s" not defined.\n' % breakpoint, '')
# Ensure no unexpected error messages:
self.assertEquals(err, '')
return out
def get_gdb_repr(self, source,
cmds_after_breakpoint=None,
import_site=False):
# Given an input python source representation of data,
# run "python -c'print DATA'" under gdb with a breakpoint on
# PyObject_Print and scrape out gdb's representation of the "op"
# parameter, and verify that the gdb displays the same string
#
# For a nested structure, the first time we hit the breakpoint will
# give us the top-level structure
gdb_output = self.get_stack_trace(source, breakpoint='PyObject_Print',
cmds_after_breakpoint=cmds_after_breakpoint,
import_site=import_site)
m = re.match('.*#0 PyObject_Print \(op\=(.*?), fp=.*\).*',
gdb_output, re.DOTALL)
#print m.groups()
return m.group(1), gdb_output
def assertEndsWith(self, actual, exp_end):
'''Ensure that the given "actual" string ends with "exp_end"'''
self.assert_(actual.endswith(exp_end),
msg='%r did not end with %r' % (actual, exp_end))
def assertMultilineMatches(self, actual, pattern):
m = re.match(pattern, actual, re.DOTALL)
self.assert_(m,
msg='%r did not match %r' % (actual, pattern))
class PrettyPrintTests(DebuggerTests):
def test_getting_backtrace(self):
gdb_output = self.get_stack_trace('print 42')
self.assertTrue('PyObject_Print' in gdb_output)
def assertGdbRepr(self, val, cmds_after_breakpoint=None):
# Ensure that gdb's rendering of the value in a debugged process
# matches repr(value) in this process:
gdb_repr, gdb_output = self.get_gdb_repr('print ' + repr(val),
cmds_after_breakpoint)
self.assertEquals(gdb_repr, repr(val), gdb_output)
def test_int(self):
'Verify the pretty-printing of various "int" values'
self.assertGdbRepr(42)
self.assertGdbRepr(0)
self.assertGdbRepr(-7)
self.assertGdbRepr(sys.maxint)
self.assertGdbRepr(-sys.maxint)
def test_long(self):
'Verify the pretty-printing of various "long" values'
self.assertGdbRepr(0L)
self.assertGdbRepr(1000000000000L)
self.assertGdbRepr(-1L)
self.assertGdbRepr(-1000000000000000L)
def test_singletons(self):
'Verify the pretty-printing of True, False and None'
self.assertGdbRepr(True)
self.assertGdbRepr(False)
self.assertGdbRepr(None)
def test_dicts(self):
'Verify the pretty-printing of dictionaries'
self.assertGdbRepr({})
self.assertGdbRepr({'foo': 'bar'})
self.assertGdbRepr({'foo': 'bar', 'douglas':42})
def test_lists(self):
'Verify the pretty-printing of lists'
self.assertGdbRepr([])
self.assertGdbRepr(range(5))
def test_strings(self):
'Verify the pretty-printing of strings'
self.assertGdbRepr('')
self.assertGdbRepr('And now for something hopefully the same')
self.assertGdbRepr('string with embedded NUL here \0 and then some more text')
self.assertGdbRepr('this is byte 255:\xff and byte 128:\x80')
def test_tuples(self):
'Verify the pretty-printing of tuples'
self.assertGdbRepr(tuple())
self.assertGdbRepr((1,))
self.assertGdbRepr(('foo', 'bar', 'baz'))
def test_unicode(self):
'Verify the pretty-printing of unicode values'
# Test the empty unicode string:
self.assertGdbRepr(u'')
self.assertGdbRepr(u'hello world')
# Test printing a single character:
# U+2620 SKULL AND CROSSBONES
self.assertGdbRepr(u'\u2620')
# Test printing a Japanese unicode string
# (I believe this reads "mojibake", using 3 characters from the CJK
# Unified Ideographs area, followed by U+3051 HIRAGANA LETTER KE)
self.assertGdbRepr(u'\u6587\u5b57\u5316\u3051')
# Test a character outside the BMP:
# U+1D121 MUSICAL SYMBOL C CLEF
# This is:
# UTF-8: 0xF0 0x9D 0x84 0xA1
# UTF-16: 0xD834 0xDD21
try:
# This will only work on wide-unicode builds:
self.assertGdbRepr(unichr(0x1D121))
except ValueError, e:
if e.message != 'unichr() arg not in range(0x10000) (narrow Python build)':
raise e
def test_sets(self):
'Verify the pretty-printing of sets'
self.assertGdbRepr(set())
self.assertGdbRepr(set(['a', 'b']))
self.assertGdbRepr(set([4, 5, 6]))
# Ensure that we handled sets containing the "dummy" key value,
# which happens on deletion:
gdb_repr, gdb_output = self.get_gdb_repr('''s = set(['a','b'])
s.pop()
print s''')
self.assertEquals(gdb_repr, "set(['b'])")
def test_frozensets(self):
'Verify the pretty-printing of frozensets'
self.assertGdbRepr(frozenset())
self.assertGdbRepr(frozenset(['a', 'b']))
self.assertGdbRepr(frozenset([4, 5, 6]))
def test_exceptions(self):
# Test a RuntimeError
gdb_repr, gdb_output = self.get_gdb_repr('''
try:
raise RuntimeError("I am an error")
except RuntimeError, e:
print e
''')
self.assertEquals(gdb_repr,
"exceptions.RuntimeError('I am an error',)")
# Test division by zero:
gdb_repr, gdb_output = self.get_gdb_repr('''
try:
a = 1 / 0
except ZeroDivisionError, e:
print e
''')
self.assertEquals(gdb_repr,
"exceptions.ZeroDivisionError('integer division or modulo by zero',)")
def test_classic_class(self):
'Verify the pretty-printing of classic class instances'
gdb_repr, gdb_output = self.get_gdb_repr('''
class Foo:
pass
foo = Foo()
foo.an_int = 42
print foo''')
m = re.match(r'<Foo\(an_int=42\) at remote 0x[0-9a-f]+>', gdb_repr)
self.assertTrue(m,
msg='Unexpected classic-class rendering %r' % gdb_repr)
def test_modern_class(self):
'Verify the pretty-printing of new-style class instances'
gdb_repr, gdb_output = self.get_gdb_repr('''
class Foo(object):
pass
foo = Foo()
foo.an_int = 42
print foo''')
m = re.match(r'<Foo\(an_int=42\) at remote 0x[0-9a-f]+>', gdb_repr)
self.assertTrue(m,
msg='Unexpected new-style class rendering %r' % gdb_repr)
def test_subclassing_list(self):
'Verify the pretty-printing of an instance of a list subclass'
gdb_repr, gdb_output = self.get_gdb_repr('''
class Foo(list):
pass
foo = Foo()
foo += [1, 2, 3]
foo.an_int = 42
print foo''')
m = re.match(r'<Foo\(an_int=42\) at remote 0x[0-9a-f]+>', gdb_repr)
self.assertTrue(m,
msg='Unexpected new-style class rendering %r' % gdb_repr)
def test_subclassing_tuple(self):
'Verify the pretty-printing of an instance of a tuple subclass'
# This should exercise the negative tp_dictoffset code in the
# new-style class support
gdb_repr, gdb_output = self.get_gdb_repr('''
class Foo(tuple):
pass
foo = Foo((1, 2, 3))
foo.an_int = 42
print foo''')
m = re.match(r'<Foo\(an_int=42\) at remote 0x[0-9a-f]+>', gdb_repr)
self.assertTrue(m,
msg='Unexpected new-style class rendering %r' % gdb_repr)
def assertSane(self, source, corruption, exp_type='unknown'):
'''Run Python under gdb, corrupting variables in the inferior process
immediately before taking a backtrace.
Verify that the variable's representation is the expected failsafe
representation'''
if corruption:
cmds_after_breakpoint=[corruption, 'backtrace']
else:
cmds_after_breakpoint=['backtrace']
gdb_repr, gdb_output = \
self.get_gdb_repr(source,
cmds_after_breakpoint=cmds_after_breakpoint)
self.assertTrue(re.match('<%s at remote 0x[0-9a-f]+>' % exp_type,
gdb_repr),
'Unexpected gdb representation: %r\n%s' % \
(gdb_repr, gdb_output))
def test_NULL_ptr(self):
'Ensure that a NULL PyObject* is handled gracefully'
gdb_repr, gdb_output = (
self.get_gdb_repr('print 42',
cmds_after_breakpoint=['set variable op=0',
'backtrace'])
)
self.assertEquals(gdb_repr, '0x0')
def test_NULL_ob_type(self):
'Ensure that a PyObject* with NULL ob_type is handled gracefully'
self.assertSane('print 42',
'set op->ob_type=0')
def test_corrupt_ob_type(self):
'Ensure that a PyObject* with a corrupt ob_type is handled gracefully'
self.assertSane('print 42',
'set op->ob_type=0xDEADBEEF')
def test_corrupt_tp_flags(self):
'Ensure that a PyObject* with a type with corrupt tp_flags is handled'
self.assertSane('print 42',
'set op->ob_type->tp_flags=0x0',
exp_type='int')
def test_corrupt_tp_name(self):
'Ensure that a PyObject* with a type with corrupt tp_name is handled'
self.assertSane('print 42',
'set op->ob_type->tp_name=0xDEADBEEF')
def test_NULL_instance_dict(self):
'Ensure that a PyInstanceObject with with a NULL in_dict is handled'
self.assertSane('''
class Foo:
pass
foo = Foo()
foo.an_int = 42
print foo''',
'set ((PyInstanceObject*)op)->in_dict = 0',
exp_type='Foo')
def test_builtins_help(self):
'Ensure that the new-style class _Helper in site.py can be handled'
# (this was the issue causing tracebacks in
# http://bugs.python.org/issue8032#msg100537 )
gdb_repr, gdb_output = self.get_gdb_repr('print __builtins__.help', import_site=True)
m = re.match(r'<_Helper at remote 0x[0-9a-f]+>', gdb_repr)
self.assertTrue(m,
msg='Unexpected rendering %r' % gdb_repr)
def test_selfreferential_list(self):
'''Ensure that a reference loop involving a list doesn't lead proxyval
into an infinite loop:'''
gdb_repr, gdb_output = \
self.get_gdb_repr("a = [3, 4, 5] ; a.append(a) ; print a")
self.assertEquals(gdb_repr, '[3, 4, 5, [...]]')
gdb_repr, gdb_output = \
self.get_gdb_repr("a = [3, 4, 5] ; b = [a] ; a.append(b) ; print a")
self.assertEquals(gdb_repr, '[3, 4, 5, [[...]]]')
def test_selfreferential_dict(self):
'''Ensure that a reference loop involving a dict doesn't lead proxyval
into an infinite loop:'''
gdb_repr, gdb_output = \
self.get_gdb_repr("a = {} ; b = {'bar':a} ; a['foo'] = b ; print a")
self.assertEquals(gdb_repr, "{'foo': {'bar': {...}}}")
def test_selfreferential_old_style_instance(self):
gdb_repr, gdb_output = \
self.get_gdb_repr('''
class Foo:
pass
foo = Foo()
foo.an_attr = foo
print foo''')
self.assertTrue(re.match('<Foo\(an_attr=<\.\.\.>\) at remote 0x[0-9a-f]+>',
gdb_repr),
'Unexpected gdb representation: %r\n%s' % \
(gdb_repr, gdb_output))
def test_selfreferential_new_style_instance(self):
gdb_repr, gdb_output = \
self.get_gdb_repr('''
class Foo(object):
pass
foo = Foo()
foo.an_attr = foo
print foo''')
self.assertTrue(re.match('<Foo\(an_attr=<\.\.\.>\) at remote 0x[0-9a-f]+>',
gdb_repr),
'Unexpected gdb representation: %r\n%s' % \
(gdb_repr, gdb_output))
gdb_repr, gdb_output = \
self.get_gdb_repr('''
class Foo(object):
pass
a = Foo()
b = Foo()
a.an_attr = b
b.an_attr = a
print a''')
self.assertTrue(re.match('<Foo\(an_attr=<Foo\(an_attr=<\.\.\.>\) at remote 0x[0-9a-f]+>\) at remote 0x[0-9a-f]+>',
gdb_repr),
'Unexpected gdb representation: %r\n%s' % \
(gdb_repr, gdb_output))
def test_truncation(self):
'Verify that very long output is truncated'
gdb_repr, gdb_output = self.get_gdb_repr('print range(1000)')
self.assertEquals(gdb_repr,
"\n [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, "
"14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, "
"27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, "
"40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, "
"53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, "
"66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, "
"79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, "
"92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, "
"104, 105, 106, 107, 108, 109, 110, 111, 112, 113, "
"114, 115, 116, 117, 118, 119, 120, 121, 122, 123, "
"124, 125, 126, 127, 128, 129, 130, 131, 132, 133, "
"134, 135, 136, 137, 138, 139, 140, 141, 142, 143, "
"144, 145, 146, 147, 148, 149, 150, 151, 152, 153, "
"154, 155, 156, 157, 158, 159, 160, 161, 162, 163, "
"164, 165, 166, 167, 168, 169, 170, 171, 172, 173, "
"174, 175, 176, 177, 178, 179, 180, 181, 182, 183, "
"184, 185, 186, 187, 188, 189, 190, 191, 192, 193, "
"194, 195, 196, 197, 198, 199, 200, 201, 202, 203, "
"204, 205, 206, 207, 208, 209, 210, 211, 212, 213, "
"214, 215, 216, 217, 218, 219, 220, 221, 222, 223, "
"224, 225, 226...(truncated)")
self.assertEquals(len(gdb_repr),
len('\n ') + 1024 + len('...(truncated)'))
def test_builtin_function(self):
gdb_repr, gdb_output = self.get_gdb_repr('print len')
self.assertEquals(gdb_repr, '<built-in function len>')
def test_builtin_method(self):
gdb_repr, gdb_output = self.get_gdb_repr('import sys; print sys.stdout.readlines')
self.assertTrue(re.match('<built-in method readlines of file object at remote 0x[0-9a-f]+>',
gdb_repr),
'Unexpected gdb representation: %r\n%s' % \
(gdb_repr, gdb_output))
def test_frames(self):
gdb_output = self.get_stack_trace('''
def foo(a, b, c):
pass
foo(3, 4, 5)
print foo.__code__''',
breakpoint='PyObject_Print',
cmds_after_breakpoint=['print (PyFrameObject*)(((PyCodeObject*)op)->co_zombieframe)']
)
for line in gdb_output.splitlines():
if line.startswith('$1'):
self.assertTrue(re.match(r'\$1 = Frame 0x[0-9a-f]+, for file <string>, line 3, in foo \(\)',
line),
'Unexpected gdb representation: %r\n%s' % (line, gdb_output))
return
self.fail('Did not find expected line beginning with $1')
class PyListTests(DebuggerTests):
def assertListing(self, expected, actual):
self.assertEndsWith(actual, expected)
def test_basic_command(self):
'Verify that the "py-list" command works'
bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py',
cmds_after_breakpoint=['py-list'])
self.assertListing('''
5
6 def bar(a, b, c):
7 baz(a, b, c)
8
9 def baz(*args):
>10 print(42)
11
12 foo(1, 2, 3)
''',
bt)
def test_one_abs_arg(self):
'Verify the "py-list" command with one absolute argument'
bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py',
cmds_after_breakpoint=['py-list 9'])
self.assertListing('''
9 def baz(*args):
>10 print(42)
11
12 foo(1, 2, 3)
''',
bt)
def test_two_abs_args(self):
'Verify the "py-list" command with two absolute arguments'
bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py',
cmds_after_breakpoint=['py-list 1,3'])
self.assertListing('''
1 # Sample script for use by test_gdb.py
2
3 def foo(a, b, c):
''',
bt)
class StackNavigationTests(DebuggerTests):
def test_pyup_command(self):
'Verify that the "py-up" command works'
bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py',
cmds_after_breakpoint=['py-up'])
self.assertMultilineMatches(bt,
r'''^.*
#[0-9]+ Frame 0x[0-9a-f]+, for file Lib/test/test_gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\)
baz\(a, b, c\)
$''')
def test_down_at_bottom(self):
'Verify handling of "py-down" at the bottom of the stack'
bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py',
cmds_after_breakpoint=['py-down'])
self.assertEndsWith(bt,
'Unable to find a newer python frame\n')
def test_up_at_top(self):
'Verify handling of "py-up" at the top of the stack'
bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py',
cmds_after_breakpoint=['py-up'] * 4)
self.assertEndsWith(bt,
'Unable to find an older python frame\n')
def test_up_then_down(self):
'Verify "py-up" followed by "py-down"'
bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py',
cmds_after_breakpoint=['py-up', 'py-down'])
self.assertMultilineMatches(bt,
r'''^.*
#[0-9]+ Frame 0x[0-9a-f]+, for file Lib/test/test_gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\)
baz\(a, b, c\)
#[0-9]+ Frame 0x[0-9a-f]+, for file Lib/test/test_gdb_sample.py, line 10, in baz \(args=\(1, 2, 3\)\)
print\(42\)
$''')
class PyBtTests(DebuggerTests):
def test_basic_command(self):
'Verify that the "py-bt" command works'
bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py',
cmds_after_breakpoint=['py-bt'])
self.assertMultilineMatches(bt,
r'''^.*
#[0-9]+ Frame 0x[0-9a-f]+, for file Lib/test/test_gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\)
baz\(a, b, c\)
#[0-9]+ Frame 0x[0-9a-f]+, for file Lib/test/test_gdb_sample.py, line 4, in foo \(a=1, b=2, c=3\)
bar\(a, b, c\)
#[0-9]+ Frame 0x[0-9a-f]+, for file Lib/test/test_gdb_sample.py, line 12, in <module> \(\)
foo\(1, 2, 3\)
''')
class PyPrintTests(DebuggerTests):
def test_basic_command(self):
'Verify that the "py-print" command works'
bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py',
cmds_after_breakpoint=['py-print args'])
self.assertMultilineMatches(bt,
r".*\nlocal 'args' = \(1, 2, 3\)\n.*")
def test_print_after_up(self):
bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py',
cmds_after_breakpoint=['py-up', 'py-print c', 'py-print b', 'py-print a'])
self.assertMultilineMatches(bt,
r".*\nlocal 'c' = 3\nlocal 'b' = 2\nlocal 'a' = 1\n.*")
def test_printing_global(self):
bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py',
cmds_after_breakpoint=['py-print __name__'])
self.assertMultilineMatches(bt,
r".*\nglobal '__name__' = '__main__'\n.*")
def test_printing_builtin(self):
bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py',
cmds_after_breakpoint=['py-print len'])
self.assertMultilineMatches(bt,
r".*\nbuiltin 'len' = <built-in function len>\n.*")
class PyLocalsTests(DebuggerTests):
def test_basic_command(self):
bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py',
cmds_after_breakpoint=['py-locals'])
self.assertMultilineMatches(bt,
r".*\nargs = \(1, 2, 3\)\n.*")
def test_locals_after_up(self):
bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py',
cmds_after_breakpoint=['py-up', 'py-locals'])
self.assertMultilineMatches(bt,
r".*\na = 1\nb = 2\nc = 3\n.*")
def test_main():
run_unittest(PrettyPrintTests,
#PyListTests,
#StackNavigationTests,
#PyBtTests,
#PyPrintTests,
#PyLocalsTests
)
if __name__ == "__main__":
test_main()
# Sample script for use by test_gdb.py
def foo(a, b, c):
bar(a, b, c)
def bar(a, b, c):
baz(a, b, c)
def baz(*args):
print(42)
foo(1, 2, 3)
......@@ -360,7 +360,7 @@ LIBRARY_OBJS= \
# Default target
all: build_all
build_all: $(BUILDPYTHON) oldsharedmods sharedmods
build_all: $(BUILDPYTHON) oldsharedmods sharedmods gdbhooks
# Compile a binary with gcc profile guided optimization.
profile-opt:
......@@ -433,6 +433,16 @@ libpython$(VERSION).dylib: $(LIBRARY_OBJS)
libpython$(VERSION).sl: $(LIBRARY_OBJS)
$(LDSHARED) $(LDFLAGS) -o $@ $(LIBRARY_OBJS) $(MODLIBS) $(SHLIBS) $(LIBC) $(LIBM) $(LDLAST)
# Copy up the gdb python hooks into a position where they can be automatically
# loaded by gdb during Lib/test/test_gdb.py
#
# Distributors are likely to want to install this somewhere else e.g. relative
# to the stripped DWARF data for the shared library.
gdbhooks: $(BUILDPYTHON)-gdb.py
$(BUILDPYTHON)-gdb.py: Tools/gdb/libpython.py
$(INSTALL_SCRIPT) $< $(BUILDPYTHON)-gdb.py
# This rule is here for OPENSTEP/Rhapsody/MacOSX. It builds a temporary
# minimal framework (not including the Lib directory and such) in the current
# directory.
......@@ -1238,5 +1248,6 @@ Python/thread.o: @THREADHEADERS@
.PHONY: frameworkinstallmaclib frameworkinstallapps frameworkinstallunixtools
.PHONY: frameworkaltinstallunixtools recheck autoconf clean clobber distclean
.PHONY: smelly funny patchcheck
.PHONY: gdbhooks
# IF YOU PUT ANYTHING HERE IT WILL GO AWAY
......@@ -481,6 +481,7 @@ Tim MacKenzie
Nick Maclaren
Steve Majewski
Grzegorz Makarewicz
David Malcolm
Ken Manheimer
Vladimir Marangozov
David Marek
......
......@@ -155,6 +155,9 @@ Tools/Demos
Build
-----
- Issue #8032: For gdb7, a python-gdb.py file is added to the build,
allowing to use advanced gdb features when debugging Python.
- Issue #1628484: The Makefile doesn't ignore the CFLAGS environment
variable anymore. It also forwards the LDFLAGS settings to the linker
when building a shared library.
......
......@@ -20,6 +20,9 @@ faqwiz FAQ Wizard.
freeze Create a stand-alone executable from a Python program.
gdb Python code to be run inside gdb, to make it easier to
debug Python itself (by David Malcolm).
i18n Tools for internationalization. pygettext.py
parses Python source code and generates .pot files,
and msgfmt.py generates a binary message catalog
......
#!/usr/bin/python
'''
From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
to be extended with Python code e.g. for library-specific data visualizations,
such as for the C++ STL types. Documentation on this API can be seen at:
http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
This python module deals with the case when the process being debugged (the
"inferior process" in gdb parlance) is itself python, or more specifically,
linked against libpython. In this situation, almost every item of data is a
(PyObject*), and having the debugger merely print their addresses is not very
enlightening.
This module embeds knowledge about the implementation details of libpython so
that we can emit useful visualizations e.g. a string, a list, a dict, a frame
giving file/line information and the state of local variables
In particular, given a gdb.Value corresponding to a PyObject* in the inferior
process, we can generate a "proxy value" within the gdb process. For example,
given a PyObject* in the inferior process that is in fact a PyListObject*
holding three PyObject* that turn out to be PyStringObject* instances, we can
generate a proxy value within the gdb process that is a list of strings:
["foo", "bar", "baz"]
Doing so can be expensive for complicated graphs of objects, and could take
some time, so we also have a "write_repr" method that writes a representation
of the data to a file-like object. This allows us to stop the traversal by
having the file-like object raise an exception if it gets too much data.
With both "proxyval" and "write_repr" we keep track of the set of all addresses
visited so far in the traversal, to avoid infinite recursion due to cycles in
the graph of object references.
We try to defer gdb.lookup_type() invocations for python types until as late as
possible: for a dynamically linked python binary, when the process starts in
the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
the type names are known to the debugger
The module also extends gdb with some python-specific commands.
'''
from __future__ import with_statement
import gdb
# Look up the gdb.Type for some standard types:
_type_char_ptr = gdb.lookup_type('char').pointer() # char*
_type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char*
_type_void_ptr = gdb.lookup_type('void').pointer() # void*
_type_size_t = gdb.lookup_type('size_t')
SIZEOF_VOID_P = _type_void_ptr.sizeof
Py_TPFLAGS_HEAPTYPE = (1L << 9)
Py_TPFLAGS_INT_SUBCLASS = (1L << 23)
Py_TPFLAGS_LONG_SUBCLASS = (1L << 24)
Py_TPFLAGS_LIST_SUBCLASS = (1L << 25)
Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26)
Py_TPFLAGS_STRING_SUBCLASS = (1L << 27)
Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28)
Py_TPFLAGS_DICT_SUBCLASS = (1L << 29)
Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31)
MAX_OUTPUT_LEN=1024
class NullPyObjectPtr(RuntimeError):
pass
def safety_limit(val):
# Given a integer value from the process being debugged, limit it to some
# safety threshold so that arbitrary breakage within said process doesn't
# break the gdb process too much (e.g. sizes of iterations, sizes of lists)
return min(val, 1000)
def safe_range(val):
# As per range, but don't trust the value too much: cap it to a safety
# threshold in case the data was corrupted
return xrange(safety_limit(val))
class StringTruncated(RuntimeError):
pass
class TruncatedStringIO(object):
'''Similar to cStringIO, but can truncate the output by raising a
StringTruncated exception'''
def __init__(self, maxlen=None):
self._val = ''
self.maxlen = maxlen
def write(self, data):
if self.maxlen:
if len(data) + len(self._val) > self.maxlen:
# Truncation:
self._val += data[0:self.maxlen - len(self._val)]
raise StringTruncated()
self._val += data
def getvalue(self):
return self._val
class PyObjectPtr(object):
"""
Class wrapping a gdb.Value that's a either a (PyObject*) within the
inferior process, or some subclass pointer e.g. (PyStringObject*)
There will be a subclass for every refined PyObject type that we care
about.
Note that at every stage the underlying pointer could be NULL, point
to corrupt data, etc; this is the debugger, after all.
"""
_typename = 'PyObject'
def __init__(self, gdbval, cast_to=None):
if cast_to:
self._gdbval = gdbval.cast(cast_to)
else:
self._gdbval = gdbval
def field(self, name):
'''
Get the gdb.Value for the given field within the PyObject, coping with
some python 2 versus python 3 differences.
Various libpython types are defined using the "PyObject_HEAD" and
"PyObject_VAR_HEAD" macros.
In Python 2, this these are defined so that "ob_type" and (for a var
object) "ob_size" are fields of the type in question.
In Python 3, this is defined as an embedded PyVarObject type thus:
PyVarObject ob_base;
so that the "ob_size" field is located insize the "ob_base" field, and
the "ob_type" is most easily accessed by casting back to a (PyObject*).
'''
if self.is_null():
raise NullPyObjectPtr(self)
if name == 'ob_type':
pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
return pyo_ptr.dereference()[name]
if name == 'ob_size':
try:
# Python 2:
return self._gdbval.dereference()[name]
except RuntimeError:
# Python 3:
return self._gdbval.dereference()['ob_base'][name]
# General case: look it up inside the object:
return self._gdbval.dereference()[name]
def pyop_field(self, name):
'''
Get a PyObjectPtr for the given PyObject* field within this PyObject,
coping with some python 2 versus python 3 differences.
'''
return PyObjectPtr.from_pyobject_ptr(self.field(name))
def write_field_repr(self, name, out, visited):
'''
Extract the PyObject* field named "name", and write its representation
to file-like object "out"
'''
field_obj = self.pyop_field(name)
field_obj.write_repr(out, visited)
def get_truncated_repr(self, maxlen):
'''
Get a repr-like string for the data, but truncate it at "maxlen" bytes
(ending the object graph traversal as soon as you do)
'''
out = TruncatedStringIO(maxlen)
try:
self.write_repr(out, set())
except StringTruncated:
# Truncation occurred:
return out.getvalue() + '...(truncated)'
# No truncation occurred:
return out.getvalue()
def type(self):
return PyTypeObjectPtr(self.field('ob_type'))
def is_null(self):
return 0 == long(self._gdbval)
def is_optimized_out(self):
'''
Is the value of the underlying PyObject* visible to the debugger?
This can vary with the precise version of the compiler used to build
Python, and the precise version of gdb.
See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
PyEval_EvalFrameEx's "f"
'''
return self._gdbval.is_optimized_out
def safe_tp_name(self):
try:
return self.type().field('tp_name').string()
except NullPyObjectPtr:
# NULL tp_name?
return 'unknown'
except RuntimeError:
# Can't even read the object at all?
return 'unknown'
def proxyval(self, visited):
'''
Scrape a value from the inferior process, and try to represent it
within the gdb process, whilst (hopefully) avoiding crashes when
the remote data is corrupt.
Derived classes will override this.
For example, a PyIntObject* with ob_ival 42 in the inferior process
should result in an int(42) in this process.
visited: a set of all gdb.Value pyobject pointers already visited
whilst generating this value (to guard against infinite recursion when
visiting object graphs with loops). Analogous to Py_ReprEnter and
Py_ReprLeave
'''
class FakeRepr(object):
"""
Class representing a non-descript PyObject* value in the inferior
process for when we don't have a custom scraper, intended to have
a sane repr().
"""
def __init__(self, tp_name, address):
self.tp_name = tp_name
self.address = address
def __repr__(self):
# For the NULL pointer, we have no way of knowing a type, so
# special-case it as per
# http://bugs.python.org/issue8032#msg100882
if self.address == 0:
return '0x0'
return '<%s at remote 0x%x>' % (self.tp_name, self.address)
return FakeRepr(self.safe_tp_name(),
long(self._gdbval))
def write_repr(self, out, visited):
'''
Write a string representation of the value scraped from the inferior
process to "out", a file-like object.
'''
# Default implementation: generate a proxy value and write its repr
# However, this could involve a lot of work for complicated objects,
# so for derived classes we specialize this
return out.write(repr(self.proxyval(visited)))
@classmethod
def subclass_from_type(cls, t):
'''
Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
(PyTypeObject*), determine the corresponding subclass of PyObjectPtr
to use
Ideally, we would look up the symbols for the global types, but that
isn't working yet:
(gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
Traceback (most recent call last):
File "<string>", line 1, in <module>
NotImplementedError: Symbol type not yet supported in Python scripts.
Error while executing Python code.
For now, we use tp_flags, after doing some string comparisons on the
tp_name for some special-cases that don't seem to be visible through
flags
'''
try:
tp_name = t.field('tp_name').string()
tp_flags = int(t.field('tp_flags'))
except RuntimeError:
# Handle any kind of error e.g. NULL ptrs by simply using the base
# class
return cls
#print 'tp_flags = 0x%08x' % tp_flags
#print 'tp_name = %r' % tp_name
name_map = {'bool': PyBoolObjectPtr,
'classobj': PyClassObjectPtr,
'instance': PyInstanceObjectPtr,
'NoneType': PyNoneStructPtr,
'frame': PyFrameObjectPtr,
'set' : PySetObjectPtr,
'frozenset' : PySetObjectPtr,
'builtin_function_or_method' : PyCFunctionObjectPtr,
}
if tp_name in name_map:
return name_map[tp_name]
if tp_flags & Py_TPFLAGS_HEAPTYPE:
return HeapTypeObjectPtr
if tp_flags & Py_TPFLAGS_INT_SUBCLASS:
return PyIntObjectPtr
if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
return PyLongObjectPtr
if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
return PyListObjectPtr
if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
return PyTupleObjectPtr
if tp_flags & Py_TPFLAGS_STRING_SUBCLASS:
return PyStringObjectPtr
if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
return PyUnicodeObjectPtr
if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
return PyDictObjectPtr
if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
return PyBaseExceptionObjectPtr
#if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
# return PyTypeObjectPtr
# Use the base class:
return cls
@classmethod
def from_pyobject_ptr(cls, gdbval):
'''
Try to locate the appropriate derived class dynamically, and cast
the pointer accordingly.
'''
try:
p = PyObjectPtr(gdbval)
cls = cls.subclass_from_type(p.type())
return cls(gdbval, cast_to=cls.get_gdb_type())
except RuntimeError:
# Handle any kind of error e.g. NULL ptrs by simply using the base
# class
pass
return cls(gdbval)
@classmethod
def get_gdb_type(cls):
return gdb.lookup_type(cls._typename).pointer()
def as_address(self):
return long(self._gdbval)
class ProxyAlreadyVisited(object):
'''
Placeholder proxy to use when protecting against infinite recursion due to
loops in the object graph.
Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
'''
def __init__(self, rep):
self._rep = rep
def __repr__(self):
return self._rep
def _write_instance_repr(out, visited, name, pyop_attrdict, address):
'''Shared code for use by old-style and new-style classes:
write a representation to file-like object "out"'''
out.write('<')
out.write(name)
# Write dictionary of instance attributes:
if isinstance(pyop_attrdict, PyDictObjectPtr):
out.write('(')
first = True
for pyop_arg, pyop_val in pyop_attrdict.iteritems():
if not first:
out.write(', ')
first = False
out.write(pyop_arg.proxyval(visited))
out.write('=')
pyop_val.write_repr(out, visited)
out.write(')')
out.write(' at remote 0x%x>' % address)
class InstanceProxy(object):
def __init__(self, cl_name, attrdict, address):
self.cl_name = cl_name
self.attrdict = attrdict
self.address = address
def __repr__(self):
if isinstance(self.attrdict, dict):
kwargs = ', '.join(["%s=%r" % (arg, val)
for arg, val in self.attrdict.iteritems()])
return '<%s(%s) at remote 0x%x>' % (self.cl_name,
kwargs, self.address)
else:
return '<%s at remote 0x%x>' % (self.cl_name,
self.address)
def _PyObject_VAR_SIZE(typeobj, nitems):
return ( ( typeobj.field('tp_basicsize') +
nitems * typeobj.field('tp_itemsize') +
(SIZEOF_VOID_P - 1)
) & ~(SIZEOF_VOID_P - 1)
).cast(_type_size_t)
class HeapTypeObjectPtr(PyObjectPtr):
_typename = 'PyObject'
def get_attr_dict(self):
'''
Get the PyDictObject ptr representing the attribute dictionary
(or None if there's a problem)
'''
try:
typeobj = self.type()
dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
if dictoffset != 0:
if dictoffset < 0:
type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
if tsize < 0:
tsize = -tsize
size = _PyObject_VAR_SIZE(typeobj, tsize)
dictoffset += size
assert dictoffset > 0
assert dictoffset % SIZEOF_VOID_P == 0
dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset
PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
dictptr = dictptr.cast(PyObjectPtrPtr)
return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
except RuntimeError:
# Corrupt data somewhere; fail safe
pass
# Not found, or some kind of error:
return None
def proxyval(self, visited):
'''
Support for new-style classes.
Currently we just locate the dictionary using a transliteration to
python of _PyObject_GetDictPtr, ignoring descriptors
'''
# Guard against infinite loops:
if self.as_address() in visited:
return ProxyAlreadyVisited('<...>')
visited.add(self.as_address())
pyop_attr_dict = self.get_attr_dict()
if pyop_attr_dict:
attr_dict = pyop_attr_dict.proxyval(visited)
else:
attr_dict = {}
tp_name = self.safe_tp_name()
# New-style class:
return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
def write_repr(self, out, visited):
# Guard against infinite loops:
if self.as_address() in visited:
out.write('<...>')
return
visited.add(self.as_address())
pyop_attrdict = self.get_attr_dict()
_write_instance_repr(out, visited,
self.safe_tp_name(), pyop_attrdict, self.as_address())
class ProxyException(Exception):
def __init__(self, tp_name, args):
self.tp_name = tp_name
self.args = args
def __repr__(self):
return '%s%r' % (self.tp_name, self.args)
class PyBaseExceptionObjectPtr(PyObjectPtr):
"""
Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
within the process being debugged.
"""
_typename = 'PyBaseExceptionObject'
def proxyval(self, visited):
# Guard against infinite loops:
if self.as_address() in visited:
return ProxyAlreadyVisited('(...)')
visited.add(self.as_address())
arg_proxy = self.pyop_field('args').proxyval(visited)
return ProxyException(self.safe_tp_name(),
arg_proxy)
def write_repr(self, out, visited):
# Guard against infinite loops:
if self.as_address() in visited:
out.write('(...)')
return
visited.add(self.as_address())
out.write(self.safe_tp_name())
self.write_field_repr('args', out, visited)
class PyBoolObjectPtr(PyObjectPtr):
"""
Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
<bool> instances (Py_True/Py_False) within the process being debugged.
"""
_typename = 'PyBoolObject'
def proxyval(self, visited):
if int_from_int(self.field('ob_ival')):
return True
else:
return False
class PyClassObjectPtr(PyObjectPtr):
"""
Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
instance within the process being debugged.
"""
_typename = 'PyClassObject'
class BuiltInFunctionProxy(object):
def __init__(self, ml_name):
self.ml_name = ml_name
def __repr__(self):
return "<built-in function %s>" % self.ml_name
class BuiltInMethodProxy(object):
def __init__(self, ml_name, pyop_m_self):
self.ml_name = ml_name
self.pyop_m_self = pyop_m_self
def __repr__(self):
return ('<built-in method %s of %s object at remote 0x%x>'
% (self.ml_name,
self.pyop_m_self.safe_tp_name(),
self.pyop_m_self.as_address())
)
class PyCFunctionObjectPtr(PyObjectPtr):
"""
Class wrapping a gdb.Value that's a PyCFunctionObject*
(see Include/methodobject.h and Objects/methodobject.c)
"""
_typename = 'PyCFunctionObject'
def proxyval(self, visited):
m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
ml_name = m_ml['ml_name'].string()
pyop_m_self = self.pyop_field('m_self')
if pyop_m_self.is_null():
return BuiltInFunctionProxy(ml_name)
else:
return BuiltInMethodProxy(ml_name, pyop_m_self)
class PyCodeObjectPtr(PyObjectPtr):
"""
Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
within the process being debugged.
"""
_typename = 'PyCodeObject'
def addr2line(self, addrq):
'''
Get the line number for a given bytecode offset
Analogous to PyCode_Addr2Line; translated from pseudocode in
Objects/lnotab_notes.txt
'''
co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
# Initialize lineno to co_firstlineno as per PyCode_Addr2Line
# not 0, as lnotab_notes.txt has it:
lineno = int_from_int(self.field('co_firstlineno'))
addr = 0
for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
addr += ord(addr_incr)
if addr > addrq:
return lineno
lineno += ord(line_incr)
return lineno
class PyDictObjectPtr(PyObjectPtr):
"""
Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
within the process being debugged.
"""
_typename = 'PyDictObject'
def iteritems(self):
'''
Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
analagous to dict.iteritems()
'''
for i in safe_range(self.field('ma_mask') + 1):
ep = self.field('ma_table') + i
pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
if not pyop_value.is_null():
pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
yield (pyop_key, pyop_value)
def proxyval(self, visited):
# Guard against infinite loops:
if self.as_address() in visited:
return ProxyAlreadyVisited('{...}')
visited.add(self.as_address())
result = {}
for pyop_key, pyop_value in self.iteritems():
proxy_key = pyop_key.proxyval(visited)
proxy_value = pyop_value.proxyval(visited)
result[proxy_key] = proxy_value
return result
def write_repr(self, out, visited):
# Guard against infinite loops:
if self.as_address() in visited:
out.write('{...}')
return
visited.add(self.as_address())
out.write('{')
first = True
for pyop_key, pyop_value in self.iteritems():
if not first:
out.write(', ')
first = False
pyop_key.write_repr(out, visited)
out.write(': ')
pyop_value.write_repr(out, visited)
out.write('}')
class PyInstanceObjectPtr(PyObjectPtr):
_typename = 'PyInstanceObject'
def proxyval(self, visited):
# Guard against infinite loops:
if self.as_address() in visited:
return ProxyAlreadyVisited('<...>')
visited.add(self.as_address())
# Get name of class:
in_class = self.pyop_field('in_class')
cl_name = in_class.pyop_field('cl_name').proxyval(visited)
# Get dictionary of instance attributes:
in_dict = self.pyop_field('in_dict').proxyval(visited)
# Old-style class:
return InstanceProxy(cl_name, in_dict, long(self._gdbval))
def write_repr(self, out, visited):
# Guard against infinite loops:
if self.as_address() in visited:
out.write('<...>')
return
visited.add(self.as_address())
# Old-style class:
# Get name of class:
in_class = self.pyop_field('in_class')
cl_name = in_class.pyop_field('cl_name').proxyval(visited)
# Get dictionary of instance attributes:
pyop_in_dict = self.pyop_field('in_dict')
_write_instance_repr(out, visited,
cl_name, pyop_in_dict, self.as_address())
class PyIntObjectPtr(PyObjectPtr):
_typename = 'PyIntObject'
def proxyval(self, visited):
result = int_from_int(self.field('ob_ival'))
return result
class PyListObjectPtr(PyObjectPtr):
_typename = 'PyListObject'
def __getitem__(self, i):
# Get the gdb.Value for the (PyObject*) with the given index:
field_ob_item = self.field('ob_item')
return field_ob_item[i]
def proxyval(self, visited):
# Guard against infinite loops:
if self.as_address() in visited:
return ProxyAlreadyVisited('[...]')
visited.add(self.as_address())
result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
for i in safe_range(int_from_int(self.field('ob_size')))]
return result
def write_repr(self, out, visited):
# Guard against infinite loops:
if self.as_address() in visited:
out.write('[...]')
return
visited.add(self.as_address())
out.write('[')
for i in safe_range(int_from_int(self.field('ob_size'))):
if i > 0:
out.write(', ')
element = PyObjectPtr.from_pyobject_ptr(self[i])
element.write_repr(out, visited)
out.write(']')
class PyLongObjectPtr(PyObjectPtr):
_typename = 'PyLongObject'
def proxyval(self, visited):
'''
Python's Include/longobjrep.h has this declaration:
struct _longobject {
PyObject_VAR_HEAD
digit ob_digit[1];
};
with this description:
The absolute value of a number is equal to
SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
Negative numbers are represented with ob_size < 0;
zero is represented by ob_size == 0.
where SHIFT can be either:
#define PyLong_SHIFT 30
#define PyLong_SHIFT 15
'''
ob_size = long(self.field('ob_size'))
if ob_size == 0:
return 0L
ob_digit = self.field('ob_digit')
if gdb.lookup_type('digit').sizeof == 2:
SHIFT = 15L
else:
SHIFT = 30L
digits = [long(ob_digit[i]) * 2**(SHIFT*i)
for i in safe_range(abs(ob_size))]
result = sum(digits)
if ob_size < 0:
result = -result
return result
class PyNoneStructPtr(PyObjectPtr):
"""
Class wrapping a gdb.Value that's a PyObject* pointing to the
singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
"""
_typename = 'PyObject'
def proxyval(self, visited):
return None
class PyFrameObjectPtr(PyObjectPtr):
_typename = 'PyFrameObject'
def __init__(self, gdbval, cast_to):
PyObjectPtr.__init__(self, gdbval, cast_to)
if not self.is_optimized_out():
self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
self.co_name = self.co.pyop_field('co_name')
self.co_filename = self.co.pyop_field('co_filename')
self.f_lineno = int_from_int(self.field('f_lineno'))
self.f_lasti = int_from_int(self.field('f_lasti'))
self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
def iter_locals(self):
'''
Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
the local variables of this frame
'''
if self.is_optimized_out():
return
f_localsplus = self.field('f_localsplus')
for i in safe_range(self.co_nlocals):
pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
if not pyop_value.is_null():
pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
yield (pyop_name, pyop_value)
def iter_globals(self):
'''
Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
the global variables of this frame
'''
if self.is_optimized_out():
return
pyop_globals = self.pyop_field('f_globals')
return pyop_globals.iteritems()
def iter_builtins(self):
'''
Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
the builtin variables
'''
if self.is_optimized_out():
return
pyop_builtins = self.pyop_field('f_builtins')
return pyop_builtins.iteritems()
def get_var_by_name(self, name):
'''
Look for the named local variable, returning a (PyObjectPtr, scope) pair
where scope is a string 'local', 'global', 'builtin'
If not found, return (None, None)
'''
for pyop_name, pyop_value in self.iter_locals():
if name == pyop_name.proxyval(set()):
return pyop_value, 'local'
for pyop_name, pyop_value in self.iter_globals():
if name == pyop_name.proxyval(set()):
return pyop_value, 'global'
for pyop_name, pyop_value in self.iter_builtins():
if name == pyop_name.proxyval(set()):
return pyop_value, 'builtin'
return None, None
def filename(self):
'''Get the path of the current Python source file, as a string'''
if self.is_optimized_out():
return '(frame information optimized out)'
return self.co_filename.proxyval(set())
def current_line_num(self):
'''Get current line number as an integer (1-based)
Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
See Objects/lnotab_notes.txt
'''
if self.is_optimized_out():
return None
f_trace = self.field('f_trace')
if long(f_trace) != 0:
# we have a non-NULL f_trace:
return self.f_lineno
else:
#try:
return self.co.addr2line(self.f_lasti)
#except ValueError:
# return self.f_lineno
def current_line(self):
'''Get the text of the current source line as a string, with a trailing
newline character'''
if self.is_optimized_out():
return '(frame information optimized out)'
with open(self.filename(), 'r') as f:
all_lines = f.readlines()
# Convert from 1-based current_line_num to 0-based list offset:
return all_lines[self.current_line_num()-1]
def write_repr(self, out, visited):
if self.is_optimized_out():
out.write('(frame information optimized out)')
return
out.write('Frame 0x%x, for file %s, line %i, in %s ('
% (self.as_address(),
self.co_filename,
self.current_line_num(),
self.co_name))
first = True
for pyop_name, pyop_value in self.iter_locals():
if not first:
out.write(', ')
first = False
out.write(pyop_name.proxyval(visited))
out.write('=')
pyop_value.write_repr(out, visited)
out.write(')')
class PySetObjectPtr(PyObjectPtr):
_typename = 'PySetObject'
def proxyval(self, visited):
# Guard against infinite loops:
if self.as_address() in visited:
return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
visited.add(self.as_address())
members = []
table = self.field('table')
for i in safe_range(self.field('mask')+1):
setentry = table[i]
key = setentry['key']
if key != 0:
key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited)
if key_proxy != '<dummy key>':
members.append(key_proxy)
if self.safe_tp_name() == 'frozenset':
return frozenset(members)
else:
return set(members)
def write_repr(self, out, visited):
out.write(self.safe_tp_name())
# Guard against infinite loops:
if self.as_address() in visited:
out.write('(...)')
return
visited.add(self.as_address())
out.write('([')
first = True
table = self.field('table')
for i in safe_range(self.field('mask')+1):
setentry = table[i]
key = setentry['key']
if key != 0:
pyop_key = PyObjectPtr.from_pyobject_ptr(key)
key_proxy = pyop_key.proxyval(visited) # FIXME!
if key_proxy != '<dummy key>':
if not first:
out.write(', ')
first = False
pyop_key.write_repr(out, visited)
out.write('])')
class PyStringObjectPtr(PyObjectPtr):
_typename = 'PyStringObject'
def __str__(self):
field_ob_size = self.field('ob_size')
field_ob_sval = self.field('ob_sval')
char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr)
return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
def proxyval(self, visited):
return str(self)
class PyTupleObjectPtr(PyObjectPtr):
_typename = 'PyTupleObject'
def __getitem__(self, i):
# Get the gdb.Value for the (PyObject*) with the given index:
field_ob_item = self.field('ob_item')
return field_ob_item[i]
def proxyval(self, visited):
# Guard against infinite loops:
if self.as_address() in visited:
return ProxyAlreadyVisited('(...)')
visited.add(self.as_address())
result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
for i in safe_range(int_from_int(self.field('ob_size')))])
return result
def write_repr(self, out, visited):
# Guard against infinite loops:
if self.as_address() in visited:
out.write('(...)')
return
visited.add(self.as_address())
out.write('(')
for i in safe_range(int_from_int(self.field('ob_size'))):
if i > 0:
out.write(', ')
element = PyObjectPtr.from_pyobject_ptr(self[i])
element.write_repr(out, visited)
if self.field('ob_size') == 1:
out.write(',)')
else:
out.write(')')
class PyTypeObjectPtr(PyObjectPtr):
_typename = 'PyTypeObject'
class PyUnicodeObjectPtr(PyObjectPtr):
_typename = 'PyUnicodeObject'
def proxyval(self, visited):
# From unicodeobject.h:
# Py_ssize_t length; /* Length of raw Unicode data in buffer */
# Py_UNICODE *str; /* Raw Unicode buffer */
field_length = long(self.field('length'))
field_str = self.field('str')
# Gather a list of ints from the Py_UNICODE array; these are either
# UCS-2 or UCS-4 code points:
Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
# Convert the int code points to unicode characters, and generate a
# local unicode instance:
result = u''.join([unichr(ucs) for ucs in Py_UNICODEs])
return result
def int_from_int(gdbval):
return int(str(gdbval))
def stringify(val):
# TODO: repr() puts everything on one line; pformat can be nicer, but
# can lead to v.long results; this function isolates the choice
if True:
return repr(val)
else:
from pprint import pformat
return pformat(val)
class PyObjectPtrPrinter:
"Prints a (PyObject*)"
def __init__ (self, gdbval):
self.gdbval = gdbval
def to_string (self):
pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
if True:
return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
else:
# Generate full proxy value then stringify it.
# Doing so could be expensive
proxyval = pyop.proxyval(set())
return stringify(proxyval)
def pretty_printer_lookup(gdbval):
type = gdbval.type.unqualified()
if type.code == gdb.TYPE_CODE_PTR:
type = type.target().unqualified()
t = str(type)
if t in ("PyObject", "PyFrameObject"):
return PyObjectPtrPrinter(gdbval)
"""
During development, I've been manually invoking the code in this way:
(gdb) python
import sys
sys.path.append('/home/david/coding/python-gdb')
import libpython
end
then reloading it after each edit like this:
(gdb) python reload(libpython)
The following code should ensure that the prettyprinter is registered
if the code is autoloaded by gdb when visiting libpython.so, provided
that this python file is installed to the same path as the library (or its
.debug file) plus a "-gdb.py" suffix, e.g:
/usr/lib/libpython2.6.so.1.0-gdb.py
/usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
"""
def register (obj):
if obj == None:
obj = gdb
# Wire up the pretty-printer
obj.pretty_printers.append(pretty_printer_lookup)
register (gdb.current_objfile ())
class Frame(object):
'''
Wrapper for gdb.Frame, adding various methods
'''
def __init__(self, gdbframe):
self._gdbframe = gdbframe
def older(self):
older = self._gdbframe.older()
if older:
return Frame(older)
else:
return None
def newer(self):
newer = self._gdbframe.newer()
if newer:
return Frame(newer)
else:
return None
def select(self):
self._gdbframe.select()
def get_index(self):
'''Calculate index of frame, starting at 0 for the newest frame within
this thread'''
index = 0
# Go down until you reach the newest frame:
iter_frame = self
while iter_frame.newer():
index += 1
iter_frame = iter_frame.newer()
return index
def is_evalframeex(self):
if self._gdbframe.function():
if self._gdbframe.function().name == 'PyEval_EvalFrameEx':
'''
I believe we also need to filter on the inline
struct frame_id.inline_depth, only regarding frames with
an inline depth of 0 as actually being this function
So we reject those with type gdb.INLINE_FRAME
'''
if self._gdbframe.type() == gdb.NORMAL_FRAME:
# We have a PyEval_EvalFrameEx frame:
return True
return False
def get_pyop(self):
try:
f = self._gdbframe.read_var('f')
return PyFrameObjectPtr.from_pyobject_ptr(f)
except ValueError:
return None
@classmethod
def get_selected_frame(cls):
_gdbframe = gdb.selected_frame()
if _gdbframe:
return Frame(_gdbframe)
return None
@classmethod
def get_selected_python_frame(cls):
'''Try to obtain the Frame for the python code in the selected frame,
or None'''
frame = cls.get_selected_frame()
while frame:
if frame.is_evalframeex():
return frame
frame = frame.older()
# Not found:
return None
def print_summary(self):
if self.is_evalframeex():
pyop = self.get_pyop()
if pyop:
sys.stdout.write('#%i %s\n' % (self.get_index(), pyop.get_truncated_repr(MAX_OUTPUT_LEN)))
sys.stdout.write(pyop.current_line())
else:
sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
else:
sys.stdout.write('#%i\n' % self.get_index())
class PyList(gdb.Command):
'''List the current Python source code, if any
Use
py-list START
to list at a different line number within the python source.
Use
py-list START, END
to list a specific range of lines within the python source.
'''
def __init__(self):
gdb.Command.__init__ (self,
"py-list",
gdb.COMMAND_FILES,
gdb.COMPLETE_NONE)
def invoke(self, args, from_tty):
import re
start = None
end = None
m = re.match(r'\s*(\d+)\s*', args)
if m:
start = int(m.group(0))
end = start + 10
m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
if m:
start, end = map(int, m.groups())
frame = Frame.get_selected_python_frame()
if not frame:
print 'Unable to locate python frame'
return
pyop = frame.get_pyop()
if not pyop:
print 'Unable to read information on python frame'
return
filename = pyop.filename()
lineno = pyop.current_line_num()
if start is None:
start = lineno - 5
end = lineno + 5
if start<1:
start = 1
with open(filename, 'r') as f:
all_lines = f.readlines()
# start and end are 1-based, all_lines is 0-based;
# so [start-1:end] as a python slice gives us [start, end] as a
# closed interval
for i, line in enumerate(all_lines[start-1:end]):
linestr = str(i+start)
# Highlight current line:
if i + start == lineno:
linestr = '>' + linestr
sys.stdout.write('%4s %s' % (linestr, line))
# ...and register the command:
PyList()
def move_in_stack(move_up):
'''Move up or down the stack (for the py-up/py-down command)'''
frame = Frame.get_selected_python_frame()
while frame:
if move_up:
iter_frame = frame.older()
else:
iter_frame = frame.newer()
if not iter_frame:
break
if iter_frame.is_evalframeex():
# Result:
iter_frame.select()
iter_frame.print_summary()
return
frame = iter_frame
if move_up:
print 'Unable to find an older python frame'
else:
print 'Unable to find a newer python frame'
class PyUp(gdb.Command):
'Select and print the python stack frame that called this one (if any)'
def __init__(self):
gdb.Command.__init__ (self,
"py-up",
gdb.COMMAND_STACK,
gdb.COMPLETE_NONE)
def invoke(self, args, from_tty):
move_in_stack(move_up=True)
PyUp()
class PyDown(gdb.Command):
'Select and print the python stack frame called by this one (if any)'
def __init__(self):
gdb.Command.__init__ (self,
"py-down",
gdb.COMMAND_STACK,
gdb.COMPLETE_NONE)
def invoke(self, args, from_tty):
move_in_stack(move_up=False)
PyDown()
class PyBacktrace(gdb.Command):
'Display the current python frame and all the frames within its call stack (if any)'
def __init__(self):
gdb.Command.__init__ (self,
"py-bt",
gdb.COMMAND_STACK,
gdb.COMPLETE_NONE)
def invoke(self, args, from_tty):
frame = Frame.get_selected_python_frame()
while frame:
if frame.is_evalframeex():
frame.print_summary()
frame = frame.older()
PyBacktrace()
class PyPrint(gdb.Command):
'Look up the given python variable name, and print it'
def __init__(self):
gdb.Command.__init__ (self,
"py-print",
gdb.COMMAND_DATA,
gdb.COMPLETE_NONE)
def invoke(self, args, from_tty):
name = str(args)
frame = Frame.get_selected_python_frame()
if not frame:
print 'Unable to locate python frame'
return
pyop_frame = frame.get_pyop()
if not pyop_frame:
print 'Unable to read information on python frame'
return
pyop_var, scope = pyop_frame.get_var_by_name(name)
if pyop_var:
print ('%s %r = %s'
% (scope,
name,
pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
else:
print '%r not found' % name
PyPrint()
class PyLocals(gdb.Command):
'Look up the given python variable name, and print it'
def __init__(self):
gdb.Command.__init__ (self,
"py-locals",
gdb.COMMAND_DATA,
gdb.COMPLETE_NONE)
def invoke(self, args, from_tty):
name = str(args)
frame = Frame.get_selected_python_frame()
if not frame:
print 'Unable to locate python frame'
return
pyop_frame = frame.get_pyop()
if not pyop_frame:
print 'Unable to read information on python frame'
return
for pyop_name, pyop_value in pyop_frame.iter_locals():
print ('%s = %s'
% (pyop_name.proxyval(set()),
pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
PyLocals()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment