diff options
-rw-r--r-- | .bzrignore | 1 | ||||
-rw-r--r-- | .hgignore | 1 | ||||
-rw-r--r-- | Lib/test/gdb_sample.py | 12 | ||||
-rw-r--r-- | Lib/test/test_gdb.py | 657 | ||||
-rw-r--r-- | Makefile.pre.in | 15 | ||||
-rw-r--r-- | Misc/ACKS | 2 | ||||
-rw-r--r-- | Tools/README | 3 | ||||
-rw-r--r-- | Tools/gdb/libpython.py | 1392 |
8 files changed, 2081 insertions, 2 deletions
@@ -14,6 +14,7 @@ platform pyconfig.h libpython*.a python.exe +python-gdb.py reflog.txt tags TAGS @@ -31,6 +31,7 @@ Modules/Setup.local Modules/config.c Parser/pgen core +python-gdb.py syntax: glob libpython*.a diff --git a/Lib/test/gdb_sample.py b/Lib/test/gdb_sample.py new file mode 100644 index 0000000..a732b25 --- /dev/null +++ b/Lib/test/gdb_sample.py @@ -0,0 +1,12 @@ +# Sample script for use by test_gdb.py + +def foo(a, b, c): + bar(a, b, c) + +def bar(a, b, c): + baz(a, b, c) + +def baz(*args): + print(42) + +foo(1, 2, 3) diff --git a/Lib/test/test_gdb.py b/Lib/test/test_gdb.py new file mode 100644 index 0000000..57e8aa0 --- /dev/null +++ b/Lib/test/test_gdb.py @@ -0,0 +1,657 @@ +# Verify that gdb can pretty-print the various PyObject* types +# +# The code for testing gdb was adapted from similar work in Unladen Swallow's +# Lib/test/test_jit_gdb.py + +import os +import re +import subprocess +import sys +import unittest + +from test.support import run_unittest + +try: + gdb_version, _ = subprocess.Popen(["gdb", "--version"], + stdout=subprocess.PIPE).communicate() +except OSError: + # This is what "no gdb" looks like. There may, however, be other + # errors that manifest this way too. + raise unittest.SkipTest("Couldn't find gdb on the path") +gdb_version_number = re.search(b"^GNU gdb [^\d]*(\d+)\.", gdb_version) +if int(gdb_version_number.group(1)) < 7: + raise unittest.SkipTest("gdb versions before 7.0 didn't support python embedding" + " Saw:\n" + gdb_version) + +# Verify that "gdb" was built with the embedded python support enabled: +cmd = "--eval-command=python import sys; print sys.version_info" +p = subprocess.Popen(["gdb", "--batch", cmd], + stdout=subprocess.PIPE) +gdbpy_version, _ = p.communicate() +if gdbpy_version == '': + raise unittest.SkipTest("gdb not built with embedded python support") + + +class DebuggerTests(unittest.TestCase): + + """Test that the debugger can debug Python.""" + + def run_gdb(self, *args): + """Runs gdb with the command line given by *args. + + Returns its stdout, stderr + """ + out, err = subprocess.Popen( + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + ).communicate() + return out, err + + def get_stack_trace(self, source=None, script=None, + breakpoint='PyObject_Print', + cmds_after_breakpoint=None, + import_site=False): + ''' + Run 'python -c SOURCE' under gdb with a breakpoint. + + Support injecting commands after the breakpoint is reached + + Returns the stdout from gdb + + cmds_after_breakpoint: if provided, a list of strings: gdb commands + ''' + # We use "set breakpoint pending yes" to avoid blocking with a: + # Function "foo" not defined. + # Make breakpoint pending on future shared library load? (y or [n]) + # error, which typically happens python is dynamically linked (the + # breakpoints of interest are to be found in the shared library) + # When this happens, we still get: + # Function "PyObject_Print" not defined. + # emitted to stderr each time, alas. + + # Initially I had "--eval-command=continue" here, but removed it to + # avoid repeated print breakpoints when traversing hierarchical data + # structures + + # Generate a list of commands in gdb's language: + commands = ['set breakpoint pending yes', + 'break %s' % breakpoint, + 'run'] + if cmds_after_breakpoint: + commands += cmds_after_breakpoint + else: + commands += ['backtrace'] + + # print commands + + # Use "commands" to generate the arguments with which to invoke "gdb": + args = ["gdb", "--batch"] + args += ['--eval-command=%s' % cmd for cmd in commands] + args += ["--args", + sys.executable] + + if not import_site: + # -S suppresses the default 'import site' + args += ["-S"] + + if source: + args += ["-c", source] + elif script: + args += [script] + + # print args + # print ' '.join(args) + + # Use "args" to invoke gdb, capturing stdout, stderr: + out, err = self.run_gdb(*args) + + # Ignore some noise on stderr due to the pending breakpoint: + err = err.replace('Function "%s" not defined.\n' % breakpoint, '') + + # Ensure no unexpected error messages: + self.assertEquals(err, '') + + return out + + def get_gdb_repr(self, source, + cmds_after_breakpoint=None, + import_site=False): + # Given an input python source representation of data, + # run "python -c'print DATA'" under gdb with a breakpoint on + # PyObject_Print and scrape out gdb's representation of the "op" + # parameter, and verify that the gdb displays the same string + # + # For a nested structure, the first time we hit the breakpoint will + # give us the top-level structure + gdb_output = self.get_stack_trace(source, breakpoint='PyObject_Print', + cmds_after_breakpoint=cmds_after_breakpoint, + import_site=import_site) + # gdb can insert additional '\n' and space characters in various places + # in its output, depending on the width of the terminal it's connected + # to (using its "wrap_here" function) + m = re.match('.*#0\s+PyObject_Print\s+\(\s*op\=\s*(.*?),\s+fp=.*\).*', + gdb_output, re.DOTALL) + if not m: + self.fail('Unexpected gdb output: %r\n%s' % (gdb_output, gdb_output)) + return m.group(1), gdb_output + + def assertEndsWith(self, actual, exp_end): + '''Ensure that the given "actual" string ends with "exp_end"''' + self.assert_(actual.endswith(exp_end), + msg='%r did not end with %r' % (actual, exp_end)) + + def assertMultilineMatches(self, actual, pattern): + m = re.match(pattern, actual, re.DOTALL) + self.assert_(m, + msg='%r did not match %r' % (actual, pattern)) + +class PrettyPrintTests(DebuggerTests): + def test_getting_backtrace(self): + gdb_output = self.get_stack_trace('print 42') + self.assertTrue('PyObject_Print' in gdb_output) + + def assertGdbRepr(self, val, cmds_after_breakpoint=None): + # Ensure that gdb's rendering of the value in a debugged process + # matches repr(value) in this process: + gdb_repr, gdb_output = self.get_gdb_repr('print ' + repr(val), + cmds_after_breakpoint) + self.assertEquals(gdb_repr, repr(val), gdb_output) + + def test_int(self): + 'Verify the pretty-printing of various "int" values' + self.assertGdbRepr(42) + self.assertGdbRepr(0) + self.assertGdbRepr(-7) + self.assertGdbRepr(sys.maxint) + self.assertGdbRepr(-sys.maxint) + + def test_long(self): + 'Verify the pretty-printing of various "long" values' + self.assertGdbRepr(0) + self.assertGdbRepr(1000000000000) + self.assertGdbRepr(-1) + self.assertGdbRepr(-1000000000000000) + + def test_singletons(self): + 'Verify the pretty-printing of True, False and None' + self.assertGdbRepr(True) + self.assertGdbRepr(False) + self.assertGdbRepr(None) + + def test_dicts(self): + 'Verify the pretty-printing of dictionaries' + self.assertGdbRepr({}) + self.assertGdbRepr({'foo': 'bar'}) + self.assertGdbRepr({'foo': 'bar', 'douglas':42}) + + def test_lists(self): + 'Verify the pretty-printing of lists' + self.assertGdbRepr([]) + self.assertGdbRepr(range(5)) + + def test_strings(self): + 'Verify the pretty-printing of strings' + self.assertGdbRepr('') + self.assertGdbRepr('And now for something hopefully the same') + self.assertGdbRepr('string with embedded NUL here \0 and then some more text') + self.assertGdbRepr('this is byte 255:\xff and byte 128:\x80') + + def test_tuples(self): + 'Verify the pretty-printing of tuples' + self.assertGdbRepr(tuple()) + self.assertGdbRepr((1,)) + self.assertGdbRepr(('foo', 'bar', 'baz')) + + def test_unicode(self): + 'Verify the pretty-printing of unicode values' + # Test the empty unicode string: + self.assertGdbRepr('') + + self.assertGdbRepr('hello world') + + # Test printing a single character: + # U+2620 SKULL AND CROSSBONES + self.assertGdbRepr('\u2620') + + # Test printing a Japanese unicode string + # (I believe this reads "mojibake", using 3 characters from the CJK + # Unified Ideographs area, followed by U+3051 HIRAGANA LETTER KE) + self.assertGdbRepr('\u6587\u5b57\u5316\u3051') + + # Test a character outside the BMP: + # U+1D121 MUSICAL SYMBOL C CLEF + # This is: + # UTF-8: 0xF0 0x9D 0x84 0xA1 + # UTF-16: 0xD834 0xDD21 + try: + # This will only work on wide-unicode builds: + self.assertGdbRepr(chr(0x1D121)) + except ValueError as e: + if e.message != 'chr() arg not in range(0x10000) (narrow Python build)': + raise e + + def test_sets(self): + 'Verify the pretty-printing of sets' + self.assertGdbRepr(set()) + self.assertGdbRepr(set(['a', 'b'])) + self.assertGdbRepr(set([4, 5, 6])) + + # Ensure that we handled sets containing the "dummy" key value, + # which happens on deletion: + gdb_repr, gdb_output = self.get_gdb_repr('''s = set(['a','b']) +s.pop() +print s''') + self.assertEquals(gdb_repr, "set(['b'])") + + def test_frozensets(self): + 'Verify the pretty-printing of frozensets' + self.assertGdbRepr(frozenset()) + self.assertGdbRepr(frozenset(['a', 'b'])) + self.assertGdbRepr(frozenset([4, 5, 6])) + + def test_exceptions(self): + # Test a RuntimeError + gdb_repr, gdb_output = self.get_gdb_repr(''' +try: + raise RuntimeError("I am an error") +except RuntimeError, e: + print e +''') + self.assertEquals(gdb_repr, + "exceptions.RuntimeError('I am an error',)") + + + # Test division by zero: + gdb_repr, gdb_output = self.get_gdb_repr(''' +try: + a = 1 / 0 +except ZeroDivisionError, e: + print e +''') + self.assertEquals(gdb_repr, + "exceptions.ZeroDivisionError('integer division or modulo by zero',)") + + def test_classic_class(self): + 'Verify the pretty-printing of classic class instances' + gdb_repr, gdb_output = self.get_gdb_repr(''' +class Foo: + pass +foo = Foo() +foo.an_int = 42 +print foo''') + m = re.match(r'<Foo\(an_int=42\) at remote 0x[0-9a-f]+>', gdb_repr) + self.assertTrue(m, + msg='Unexpected classic-class rendering %r' % gdb_repr) + + def test_modern_class(self): + 'Verify the pretty-printing of new-style class instances' + gdb_repr, gdb_output = self.get_gdb_repr(''' +class Foo(object): + pass +foo = Foo() +foo.an_int = 42 +print foo''') + m = re.match(r'<Foo\(an_int=42\) at remote 0x[0-9a-f]+>', gdb_repr) + self.assertTrue(m, + msg='Unexpected new-style class rendering %r' % gdb_repr) + + def test_subclassing_list(self): + 'Verify the pretty-printing of an instance of a list subclass' + gdb_repr, gdb_output = self.get_gdb_repr(''' +class Foo(list): + pass +foo = Foo() +foo += [1, 2, 3] +foo.an_int = 42 +print foo''') + m = re.match(r'<Foo\(an_int=42\) at remote 0x[0-9a-f]+>', gdb_repr) + self.assertTrue(m, + msg='Unexpected new-style class rendering %r' % gdb_repr) + + def test_subclassing_tuple(self): + 'Verify the pretty-printing of an instance of a tuple subclass' + # This should exercise the negative tp_dictoffset code in the + # new-style class support + gdb_repr, gdb_output = self.get_gdb_repr(''' +class Foo(tuple): + pass +foo = Foo((1, 2, 3)) +foo.an_int = 42 +print foo''') + m = re.match(r'<Foo\(an_int=42\) at remote 0x[0-9a-f]+>', gdb_repr) + self.assertTrue(m, + msg='Unexpected new-style class rendering %r' % gdb_repr) + + def assertSane(self, source, corruption, exp_type='unknown'): + '''Run Python under gdb, corrupting variables in the inferior process + immediately before taking a backtrace. + + Verify that the variable's representation is the expected failsafe + representation''' + if corruption: + cmds_after_breakpoint=[corruption, 'backtrace'] + else: + cmds_after_breakpoint=['backtrace'] + + gdb_repr, gdb_output = \ + self.get_gdb_repr(source, + cmds_after_breakpoint=cmds_after_breakpoint) + self.assertTrue(re.match('<%s at remote 0x[0-9a-f]+>' % exp_type, + gdb_repr), + 'Unexpected gdb representation: %r\n%s' % \ + (gdb_repr, gdb_output)) + + def test_NULL_ptr(self): + 'Ensure that a NULL PyObject* is handled gracefully' + gdb_repr, gdb_output = ( + self.get_gdb_repr('print 42', + cmds_after_breakpoint=['set variable op=0', + 'backtrace']) + ) + + self.assertEquals(gdb_repr, '0x0') + + def test_NULL_ob_type(self): + 'Ensure that a PyObject* with NULL ob_type is handled gracefully' + self.assertSane('print 42', + 'set op->ob_type=0') + + def test_corrupt_ob_type(self): + 'Ensure that a PyObject* with a corrupt ob_type is handled gracefully' + self.assertSane('print 42', + 'set op->ob_type=0xDEADBEEF') + + def test_corrupt_tp_flags(self): + 'Ensure that a PyObject* with a type with corrupt tp_flags is handled' + self.assertSane('print 42', + 'set op->ob_type->tp_flags=0x0', + exp_type='int') + + def test_corrupt_tp_name(self): + 'Ensure that a PyObject* with a type with corrupt tp_name is handled' + self.assertSane('print 42', + 'set op->ob_type->tp_name=0xDEADBEEF') + + def test_NULL_instance_dict(self): + 'Ensure that a PyInstanceObject with with a NULL in_dict is handled' + self.assertSane(''' +class Foo: + pass +foo = Foo() +foo.an_int = 42 +print foo''', + 'set ((PyInstanceObject*)op)->in_dict = 0', + exp_type='Foo') + + def test_builtins_help(self): + 'Ensure that the new-style class _Helper in site.py can be handled' + # (this was the issue causing tracebacks in + # http://bugs.python.org/issue8032#msg100537 ) + + gdb_repr, gdb_output = self.get_gdb_repr('print __builtins__.help', import_site=True) + m = re.match(r'<_Helper at remote 0x[0-9a-f]+>', gdb_repr) + self.assertTrue(m, + msg='Unexpected rendering %r' % gdb_repr) + + def test_selfreferential_list(self): + '''Ensure that a reference loop involving a list doesn't lead proxyval + into an infinite loop:''' + gdb_repr, gdb_output = \ + self.get_gdb_repr("a = [3, 4, 5] ; a.append(a) ; print a") + + self.assertEquals(gdb_repr, '[3, 4, 5, [...]]') + + gdb_repr, gdb_output = \ + self.get_gdb_repr("a = [3, 4, 5] ; b = [a] ; a.append(b) ; print a") + + self.assertEquals(gdb_repr, '[3, 4, 5, [[...]]]') + + def test_selfreferential_dict(self): + '''Ensure that a reference loop involving a dict doesn't lead proxyval + into an infinite loop:''' + gdb_repr, gdb_output = \ + self.get_gdb_repr("a = {} ; b = {'bar':a} ; a['foo'] = b ; print a") + + self.assertEquals(gdb_repr, "{'foo': {'bar': {...}}}") + + def test_selfreferential_old_style_instance(self): + gdb_repr, gdb_output = \ + self.get_gdb_repr(''' +class Foo: + pass +foo = Foo() +foo.an_attr = foo +print foo''') + self.assertTrue(re.match('<Foo\(an_attr=<\.\.\.>\) at remote 0x[0-9a-f]+>', + gdb_repr), + 'Unexpected gdb representation: %r\n%s' % \ + (gdb_repr, gdb_output)) + + def test_selfreferential_new_style_instance(self): + gdb_repr, gdb_output = \ + self.get_gdb_repr(''' +class Foo(object): + pass +foo = Foo() +foo.an_attr = foo +print foo''') + self.assertTrue(re.match('<Foo\(an_attr=<\.\.\.>\) at remote 0x[0-9a-f]+>', + gdb_repr), + 'Unexpected gdb representation: %r\n%s' % \ + (gdb_repr, gdb_output)) + + gdb_repr, gdb_output = \ + self.get_gdb_repr(''' +class Foo(object): + pass +a = Foo() +b = Foo() +a.an_attr = b +b.an_attr = a +print a''') + self.assertTrue(re.match('<Foo\(an_attr=<Foo\(an_attr=<\.\.\.>\) at remote 0x[0-9a-f]+>\) at remote 0x[0-9a-f]+>', + gdb_repr), + 'Unexpected gdb representation: %r\n%s' % \ + (gdb_repr, gdb_output)) + + def test_truncation(self): + 'Verify that very long output is truncated' + gdb_repr, gdb_output = self.get_gdb_repr('print range(1000)') + self.assertEquals(gdb_repr, + "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, " + "14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, " + "27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, " + "40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, " + "53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, " + "66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, " + "79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, " + "92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, " + "104, 105, 106, 107, 108, 109, 110, 111, 112, 113, " + "114, 115, 116, 117, 118, 119, 120, 121, 122, 123, " + "124, 125, 126, 127, 128, 129, 130, 131, 132, 133, " + "134, 135, 136, 137, 138, 139, 140, 141, 142, 143, " + "144, 145, 146, 147, 148, 149, 150, 151, 152, 153, " + "154, 155, 156, 157, 158, 159, 160, 161, 162, 163, " + "164, 165, 166, 167, 168, 169, 170, 171, 172, 173, " + "174, 175, 176, 177, 178, 179, 180, 181, 182, 183, " + "184, 185, 186, 187, 188, 189, 190, 191, 192, 193, " + "194, 195, 196, 197, 198, 199, 200, 201, 202, 203, " + "204, 205, 206, 207, 208, 209, 210, 211, 212, 213, " + "214, 215, 216, 217, 218, 219, 220, 221, 222, 223, " + "224, 225, 226...(truncated)") + self.assertEquals(len(gdb_repr), + 1024 + len('...(truncated)')) + + def test_builtin_function(self): + gdb_repr, gdb_output = self.get_gdb_repr('print len') + self.assertEquals(gdb_repr, '<built-in function len>') + + def test_builtin_method(self): + gdb_repr, gdb_output = self.get_gdb_repr('import sys; print sys.stdout.readlines') + self.assertTrue(re.match('<built-in method readlines of file object at remote 0x[0-9a-f]+>', + gdb_repr), + 'Unexpected gdb representation: %r\n%s' % \ + (gdb_repr, gdb_output)) + + def test_frames(self): + gdb_output = self.get_stack_trace(''' +def foo(a, b, c): + pass + +foo(3, 4, 5) +print foo.__code__''', + breakpoint='PyObject_Print', + cmds_after_breakpoint=['print (PyFrameObject*)(((PyCodeObject*)op)->co_zombieframe)'] + ) + self.assertTrue(re.match(r'.*\s+\$1 =\s+Frame 0x[0-9a-f]+, for file <string>, line 3, in foo \(\)\s+.*', + gdb_output, + re.DOTALL), + 'Unexpected gdb representation: %r\n%s' % (gdb_output, gdb_output)) + +class PyListTests(DebuggerTests): + def assertListing(self, expected, actual): + self.assertEndsWith(actual, expected) + + def test_basic_command(self): + 'Verify that the "py-list" command works' + bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py', + cmds_after_breakpoint=['py-list']) + + self.assertListing(''' + 5 + 6 def bar(a, b, c): + 7 baz(a, b, c) + 8 + 9 def baz(*args): + >10 print(42) + 11 + 12 foo(1, 2, 3) +''', + bt) + + def test_one_abs_arg(self): + 'Verify the "py-list" command with one absolute argument' + bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py', + cmds_after_breakpoint=['py-list 9']) + + self.assertListing(''' + 9 def baz(*args): + >10 print(42) + 11 + 12 foo(1, 2, 3) +''', + bt) + + def test_two_abs_args(self): + 'Verify the "py-list" command with two absolute arguments' + bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py', + cmds_after_breakpoint=['py-list 1,3']) + + self.assertListing(''' + 1 # Sample script for use by test_gdb.py + 2 + 3 def foo(a, b, c): +''', + bt) + +class StackNavigationTests(DebuggerTests): + def test_pyup_command(self): + 'Verify that the "py-up" command works' + bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py', + cmds_after_breakpoint=['py-up']) + self.assertMultilineMatches(bt, + r'''^.* +#[0-9]+ Frame 0x[0-9a-f]+, for file Lib/test/test_gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\) + baz\(a, b, c\) +$''') + + def test_down_at_bottom(self): + 'Verify handling of "py-down" at the bottom of the stack' + bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py', + cmds_after_breakpoint=['py-down']) + self.assertEndsWith(bt, + 'Unable to find a newer python frame\n') + + def test_up_at_top(self): + 'Verify handling of "py-up" at the top of the stack' + bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py', + cmds_after_breakpoint=['py-up'] * 4) + self.assertEndsWith(bt, + 'Unable to find an older python frame\n') + + def test_up_then_down(self): + 'Verify "py-up" followed by "py-down"' + bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py', + cmds_after_breakpoint=['py-up', 'py-down']) + self.assertMultilineMatches(bt, + r'''^.* +#[0-9]+ Frame 0x[0-9a-f]+, for file Lib/test/test_gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\) + baz\(a, b, c\) +#[0-9]+ Frame 0x[0-9a-f]+, for file Lib/test/test_gdb_sample.py, line 10, in baz \(args=\(1, 2, 3\)\) + print\(42\) +$''') + +class PyBtTests(DebuggerTests): + def test_basic_command(self): + 'Verify that the "py-bt" command works' + bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py', + cmds_after_breakpoint=['py-bt']) + self.assertMultilineMatches(bt, + r'''^.* +#[0-9]+ Frame 0x[0-9a-f]+, for file Lib/test/test_gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\) + baz\(a, b, c\) +#[0-9]+ Frame 0x[0-9a-f]+, for file Lib/test/test_gdb_sample.py, line 4, in foo \(a=1, b=2, c=3\) + bar\(a, b, c\) +#[0-9]+ Frame 0x[0-9a-f]+, for file Lib/test/test_gdb_sample.py, line 12, in <module> \(\) +foo\(1, 2, 3\) +''') + +class PyPrintTests(DebuggerTests): + def test_basic_command(self): + 'Verify that the "py-print" command works' + bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py', + cmds_after_breakpoint=['py-print args']) + self.assertMultilineMatches(bt, + r".*\nlocal 'args' = \(1, 2, 3\)\n.*") + + def test_print_after_up(self): + bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py', + cmds_after_breakpoint=['py-up', 'py-print c', 'py-print b', 'py-print a']) + self.assertMultilineMatches(bt, + r".*\nlocal 'c' = 3\nlocal 'b' = 2\nlocal 'a' = 1\n.*") + + def test_printing_global(self): + bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py', + cmds_after_breakpoint=['py-print __name__']) + self.assertMultilineMatches(bt, + r".*\nglobal '__name__' = '__main__'\n.*") + + def test_printing_builtin(self): + bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py', + cmds_after_breakpoint=['py-print len']) + self.assertMultilineMatches(bt, + r".*\nbuiltin 'len' = <built-in function len>\n.*") + +class PyLocalsTests(DebuggerTests): + def test_basic_command(self): + bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py', + cmds_after_breakpoint=['py-locals']) + self.assertMultilineMatches(bt, + r".*\nargs = \(1, 2, 3\)\n.*") + + def test_locals_after_up(self): + bt = self.get_stack_trace(script='Lib/test/test_gdb_sample.py', + cmds_after_breakpoint=['py-up', 'py-locals']) + self.assertMultilineMatches(bt, + r".*\na = 1\nb = 2\nc = 3\n.*") + +def test_main(): + run_unittest(#PrettyPrintTests, + #PyListTests, + #StackNavigationTests, + #PyBtTests, + #PyPrintTests, + #PyLocalsTests + ) + +if __name__ == "__main__": + test_main() diff --git a/Makefile.pre.in b/Makefile.pre.in index c74b200..eaff822 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -378,7 +378,7 @@ LIBRARY_OBJS= \ # Default target all: build_all -build_all: $(BUILDPYTHON) oldsharedmods sharedmods +build_all: $(BUILDPYTHON) oldsharedmods sharedmods gdbhooks # Compile a binary with gcc profile guided optimization. profile-opt: @@ -451,6 +451,17 @@ libpython$(VERSION).dylib: $(LIBRARY_OBJS) libpython$(VERSION).sl: $(LIBRARY_OBJS) $(LDSHARED) $(LDFLAGS) -o $@ $(LIBRARY_OBJS) $(MODLIBS) $(SHLIBS) $(LIBC) $(LIBM) $(LDLAST) +# Copy up the gdb python hooks into a position where they can be automatically +# loaded by gdb during Lib/test/test_gdb.py +# +# Distributors are likely to want to install this somewhere else e.g. relative +# to the stripped DWARF data for the shared library. +gdbhooks: $(BUILDPYTHON)-gdb.py + +SRC_GDB_HOOKS=$(srcdir)/Tools/gdb/libpython.py +$(BUILDPYTHON)-gdb.py: $(SRC_GDB_HOOKS) + $(INSTALL_DATA) $(SRC_GDB_HOOKS) $(BUILDPYTHON)-gdb.py + # This rule is here for OPENSTEP/Rhapsody/MacOSX. It builds a temporary # minimal framework (not including the Lib directory and such) in the current # directory. @@ -1187,6 +1198,7 @@ distclean: clobber -rm -f core Makefile Makefile.pre config.status \ Modules/Setup Modules/Setup.local Modules/Setup.config \ Misc/python.pc + -rm -f python*-gdb.py find $(srcdir) '(' -name '*.fdc' -o -name '*~' \ -o -name '[@,#]*' -o -name '*.old' \ -o -name '*.orig' -o -name '*.rej' \ @@ -1250,5 +1262,6 @@ Python/thread.o: @THREADHEADERS@ .PHONY: frameworkinstallmaclib frameworkinstallapps frameworkinstallunixtools .PHONY: frameworkaltinstallunixtools recheck autoconf clean clobber distclean .PHONY: smelly funny patchcheck +.PHONY: gdbhooks # IF YOU PUT ANYTHING HERE IT WILL GO AWAY @@ -484,7 +484,7 @@ Nick Maclaren Don MacMillen Steve Majewski Grzegorz Makarewicz -Dave Malcolm +David Malcolm Ken Manheimer Vladimir Marangozov David Marek diff --git a/Tools/README b/Tools/README index 8cde228..9a93bfa 100644 --- a/Tools/README +++ b/Tools/README @@ -16,6 +16,9 @@ faqwiz FAQ Wizard. freeze Create a stand-alone executable from a Python program. +gdb Python code to be run inside gdb, to make it easier to + debug Python itself (by David Malcolm). + i18n Tools for internationalization. pygettext.py parses Python source code and generates .pot files, and msgfmt.py generates a binary message catalog diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py new file mode 100644 index 0000000..26b0704 --- /dev/null +++ b/Tools/gdb/libpython.py @@ -0,0 +1,1392 @@ +#!/usr/bin/python +''' +From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb +to be extended with Python code e.g. for library-specific data visualizations, +such as for the C++ STL types. Documentation on this API can be seen at: +http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html + + +This python module deals with the case when the process being debugged (the +"inferior process" in gdb parlance) is itself python, or more specifically, +linked against libpython. In this situation, almost every item of data is a +(PyObject*), and having the debugger merely print their addresses is not very +enlightening. + +This module embeds knowledge about the implementation details of libpython so +that we can emit useful visualizations e.g. a string, a list, a dict, a frame +giving file/line information and the state of local variables + +In particular, given a gdb.Value corresponding to a PyObject* in the inferior +process, we can generate a "proxy value" within the gdb process. For example, +given a PyObject* in the inferior process that is in fact a PyListObject* +holding three PyObject* that turn out to be PyStringObject* instances, we can +generate a proxy value within the gdb process that is a list of strings: + ["foo", "bar", "baz"] + +Doing so can be expensive for complicated graphs of objects, and could take +some time, so we also have a "write_repr" method that writes a representation +of the data to a file-like object. This allows us to stop the traversal by +having the file-like object raise an exception if it gets too much data. + +With both "proxyval" and "write_repr" we keep track of the set of all addresses +visited so far in the traversal, to avoid infinite recursion due to cycles in +the graph of object references. + +We try to defer gdb.lookup_type() invocations for python types until as late as +possible: for a dynamically linked python binary, when the process starts in +the debugger, the libpython.so hasn't been dynamically loaded yet, so none of +the type names are known to the debugger + +The module also extends gdb with some python-specific commands. +''' +from __future__ import with_statement +import gdb + +# Look up the gdb.Type for some standard types: +_type_char_ptr = gdb.lookup_type('char').pointer() # char* +_type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char* +_type_void_ptr = gdb.lookup_type('void').pointer() # void* +_type_size_t = gdb.lookup_type('size_t') + +SIZEOF_VOID_P = _type_void_ptr.sizeof + + +Py_TPFLAGS_HEAPTYPE = (1L << 9) + +Py_TPFLAGS_INT_SUBCLASS = (1L << 23) +Py_TPFLAGS_LONG_SUBCLASS = (1L << 24) +Py_TPFLAGS_LIST_SUBCLASS = (1L << 25) +Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26) +Py_TPFLAGS_STRING_SUBCLASS = (1L << 27) +Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28) +Py_TPFLAGS_DICT_SUBCLASS = (1L << 29) +Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30) +Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31) + + +MAX_OUTPUT_LEN=1024 + +class NullPyObjectPtr(RuntimeError): + pass + + +def safety_limit(val): + # Given a integer value from the process being debugged, limit it to some + # safety threshold so that arbitrary breakage within said process doesn't + # break the gdb process too much (e.g. sizes of iterations, sizes of lists) + return min(val, 1000) + + +def safe_range(val): + # As per range, but don't trust the value too much: cap it to a safety + # threshold in case the data was corrupted + return xrange(safety_limit(val)) + + +class StringTruncated(RuntimeError): + pass + +class TruncatedStringIO(object): + '''Similar to cStringIO, but can truncate the output by raising a + StringTruncated exception''' + def __init__(self, maxlen=None): + self._val = '' + self.maxlen = maxlen + + def write(self, data): + if self.maxlen: + if len(data) + len(self._val) > self.maxlen: + # Truncation: + self._val += data[0:self.maxlen - len(self._val)] + raise StringTruncated() + + self._val += data + + def getvalue(self): + return self._val + +class PyObjectPtr(object): + """ + Class wrapping a gdb.Value that's a either a (PyObject*) within the + inferior process, or some subclass pointer e.g. (PyStringObject*) + + There will be a subclass for every refined PyObject type that we care + about. + + Note that at every stage the underlying pointer could be NULL, point + to corrupt data, etc; this is the debugger, after all. + """ + _typename = 'PyObject' + + def __init__(self, gdbval, cast_to=None): + if cast_to: + self._gdbval = gdbval.cast(cast_to) + else: + self._gdbval = gdbval + + def field(self, name): + ''' + Get the gdb.Value for the given field within the PyObject, coping with + some python 2 versus python 3 differences. + + Various libpython types are defined using the "PyObject_HEAD" and + "PyObject_VAR_HEAD" macros. + + In Python 2, this these are defined so that "ob_type" and (for a var + object) "ob_size" are fields of the type in question. + + In Python 3, this is defined as an embedded PyVarObject type thus: + PyVarObject ob_base; + so that the "ob_size" field is located insize the "ob_base" field, and + the "ob_type" is most easily accessed by casting back to a (PyObject*). + ''' + if self.is_null(): + raise NullPyObjectPtr(self) + + if name == 'ob_type': + pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type()) + return pyo_ptr.dereference()[name] + + if name == 'ob_size': + try: + # Python 2: + return self._gdbval.dereference()[name] + except RuntimeError: + # Python 3: + return self._gdbval.dereference()['ob_base'][name] + + # General case: look it up inside the object: + return self._gdbval.dereference()[name] + + def pyop_field(self, name): + ''' + Get a PyObjectPtr for the given PyObject* field within this PyObject, + coping with some python 2 versus python 3 differences. + ''' + return PyObjectPtr.from_pyobject_ptr(self.field(name)) + + def write_field_repr(self, name, out, visited): + ''' + Extract the PyObject* field named "name", and write its representation + to file-like object "out" + ''' + field_obj = self.pyop_field(name) + field_obj.write_repr(out, visited) + + def get_truncated_repr(self, maxlen): + ''' + Get a repr-like string for the data, but truncate it at "maxlen" bytes + (ending the object graph traversal as soon as you do) + ''' + out = TruncatedStringIO(maxlen) + try: + self.write_repr(out, set()) + except StringTruncated: + # Truncation occurred: + return out.getvalue() + '...(truncated)' + + # No truncation occurred: + return out.getvalue() + + def type(self): + return PyTypeObjectPtr(self.field('ob_type')) + + def is_null(self): + return 0 == long(self._gdbval) + + def is_optimized_out(self): + ''' + Is the value of the underlying PyObject* visible to the debugger? + + This can vary with the precise version of the compiler used to build + Python, and the precise version of gdb. + + See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with + PyEval_EvalFrameEx's "f" + ''' + return self._gdbval.is_optimized_out + + def safe_tp_name(self): + try: + return self.type().field('tp_name').string() + except NullPyObjectPtr: + # NULL tp_name? + return 'unknown' + except RuntimeError: + # Can't even read the object at all? + return 'unknown' + + def proxyval(self, visited): + ''' + Scrape a value from the inferior process, and try to represent it + within the gdb process, whilst (hopefully) avoiding crashes when + the remote data is corrupt. + + Derived classes will override this. + + For example, a PyIntObject* with ob_ival 42 in the inferior process + should result in an int(42) in this process. + + visited: a set of all gdb.Value pyobject pointers already visited + whilst generating this value (to guard against infinite recursion when + visiting object graphs with loops). Analogous to Py_ReprEnter and + Py_ReprLeave + ''' + + class FakeRepr(object): + """ + Class representing a non-descript PyObject* value in the inferior + process for when we don't have a custom scraper, intended to have + a sane repr(). + """ + + def __init__(self, tp_name, address): + self.tp_name = tp_name + self.address = address + + def __repr__(self): + # For the NULL pointer, we have no way of knowing a type, so + # special-case it as per + # http://bugs.python.org/issue8032#msg100882 + if self.address == 0: + return '0x0' + return '<%s at remote 0x%x>' % (self.tp_name, self.address) + + return FakeRepr(self.safe_tp_name(), + long(self._gdbval)) + + def write_repr(self, out, visited): + ''' + Write a string representation of the value scraped from the inferior + process to "out", a file-like object. + ''' + # Default implementation: generate a proxy value and write its repr + # However, this could involve a lot of work for complicated objects, + # so for derived classes we specialize this + return out.write(repr(self.proxyval(visited))) + + @classmethod + def subclass_from_type(cls, t): + ''' + Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a + (PyTypeObject*), determine the corresponding subclass of PyObjectPtr + to use + + Ideally, we would look up the symbols for the global types, but that + isn't working yet: + (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value + Traceback (most recent call last): + File "<string>", line 1, in <module> + NotImplementedError: Symbol type not yet supported in Python scripts. + Error while executing Python code. + + For now, we use tp_flags, after doing some string comparisons on the + tp_name for some special-cases that don't seem to be visible through + flags + ''' + try: + tp_name = t.field('tp_name').string() + tp_flags = int(t.field('tp_flags')) + except RuntimeError: + # Handle any kind of error e.g. NULL ptrs by simply using the base + # class + return cls + + #print 'tp_flags = 0x%08x' % tp_flags + #print 'tp_name = %r' % tp_name + + name_map = {'bool': PyBoolObjectPtr, + 'classobj': PyClassObjectPtr, + 'instance': PyInstanceObjectPtr, + 'NoneType': PyNoneStructPtr, + 'frame': PyFrameObjectPtr, + 'set' : PySetObjectPtr, + 'frozenset' : PySetObjectPtr, + 'builtin_function_or_method' : PyCFunctionObjectPtr, + } + if tp_name in name_map: + return name_map[tp_name] + + if tp_flags & Py_TPFLAGS_HEAPTYPE: + return HeapTypeObjectPtr + + if tp_flags & Py_TPFLAGS_INT_SUBCLASS: + return PyIntObjectPtr + if tp_flags & Py_TPFLAGS_LONG_SUBCLASS: + return PyLongObjectPtr + if tp_flags & Py_TPFLAGS_LIST_SUBCLASS: + return PyListObjectPtr + if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS: + return PyTupleObjectPtr + if tp_flags & Py_TPFLAGS_STRING_SUBCLASS: + return PyStringObjectPtr + if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS: + return PyUnicodeObjectPtr + if tp_flags & Py_TPFLAGS_DICT_SUBCLASS: + return PyDictObjectPtr + if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS: + return PyBaseExceptionObjectPtr + #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS: + # return PyTypeObjectPtr + + # Use the base class: + return cls + + @classmethod + def from_pyobject_ptr(cls, gdbval): + ''' + Try to locate the appropriate derived class dynamically, and cast + the pointer accordingly. + ''' + try: + p = PyObjectPtr(gdbval) + cls = cls.subclass_from_type(p.type()) + return cls(gdbval, cast_to=cls.get_gdb_type()) + except RuntimeError: + # Handle any kind of error e.g. NULL ptrs by simply using the base + # class + pass + return cls(gdbval) + + @classmethod + def get_gdb_type(cls): + return gdb.lookup_type(cls._typename).pointer() + + def as_address(self): + return long(self._gdbval) + + +class ProxyAlreadyVisited(object): + ''' + Placeholder proxy to use when protecting against infinite recursion due to + loops in the object graph. + + Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave + ''' + def __init__(self, rep): + self._rep = rep + + def __repr__(self): + return self._rep + + +def _write_instance_repr(out, visited, name, pyop_attrdict, address): + '''Shared code for use by old-style and new-style classes: + write a representation to file-like object "out"''' + out.write('<') + out.write(name) + + # Write dictionary of instance attributes: + if isinstance(pyop_attrdict, PyDictObjectPtr): + out.write('(') + first = True + for pyop_arg, pyop_val in pyop_attrdict.iteritems(): + if not first: + out.write(', ') + first = False + out.write(pyop_arg.proxyval(visited)) + out.write('=') + pyop_val.write_repr(out, visited) + out.write(')') + out.write(' at remote 0x%x>' % address) + + +class InstanceProxy(object): + + def __init__(self, cl_name, attrdict, address): + self.cl_name = cl_name + self.attrdict = attrdict + self.address = address + + def __repr__(self): + if isinstance(self.attrdict, dict): + kwargs = ', '.join(["%s=%r" % (arg, val) + for arg, val in self.attrdict.iteritems()]) + return '<%s(%s) at remote 0x%x>' % (self.cl_name, + kwargs, self.address) + else: + return '<%s at remote 0x%x>' % (self.cl_name, + self.address) + +def _PyObject_VAR_SIZE(typeobj, nitems): + return ( ( typeobj.field('tp_basicsize') + + nitems * typeobj.field('tp_itemsize') + + (SIZEOF_VOID_P - 1) + ) & ~(SIZEOF_VOID_P - 1) + ).cast(_type_size_t) + +class HeapTypeObjectPtr(PyObjectPtr): + _typename = 'PyObject' + + def get_attr_dict(self): + ''' + Get the PyDictObject ptr representing the attribute dictionary + (or None if there's a problem) + ''' + try: + typeobj = self.type() + dictoffset = int_from_int(typeobj.field('tp_dictoffset')) + if dictoffset != 0: + if dictoffset < 0: + type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer() + tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size']) + if tsize < 0: + tsize = -tsize + size = _PyObject_VAR_SIZE(typeobj, tsize) + dictoffset += size + assert dictoffset > 0 + assert dictoffset % SIZEOF_VOID_P == 0 + + dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset + PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer() + dictptr = dictptr.cast(PyObjectPtrPtr) + return PyObjectPtr.from_pyobject_ptr(dictptr.dereference()) + except RuntimeError: + # Corrupt data somewhere; fail safe + pass + + # Not found, or some kind of error: + return None + + def proxyval(self, visited): + ''' + Support for new-style classes. + + Currently we just locate the dictionary using a transliteration to + python of _PyObject_GetDictPtr, ignoring descriptors + ''' + # Guard against infinite loops: + if self.as_address() in visited: + return ProxyAlreadyVisited('<...>') + visited.add(self.as_address()) + + pyop_attr_dict = self.get_attr_dict() + if pyop_attr_dict: + attr_dict = pyop_attr_dict.proxyval(visited) + else: + attr_dict = {} + tp_name = self.safe_tp_name() + + # New-style class: + return InstanceProxy(tp_name, attr_dict, long(self._gdbval)) + + def write_repr(self, out, visited): + # Guard against infinite loops: + if self.as_address() in visited: + out.write('<...>') + return + visited.add(self.as_address()) + + pyop_attrdict = self.get_attr_dict() + _write_instance_repr(out, visited, + self.safe_tp_name(), pyop_attrdict, self.as_address()) + +class ProxyException(Exception): + def __init__(self, tp_name, args): + self.tp_name = tp_name + self.args = args + + def __repr__(self): + return '%s%r' % (self.tp_name, self.args) + +class PyBaseExceptionObjectPtr(PyObjectPtr): + """ + Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception + within the process being debugged. + """ + _typename = 'PyBaseExceptionObject' + + def proxyval(self, visited): + # Guard against infinite loops: + if self.as_address() in visited: + return ProxyAlreadyVisited('(...)') + visited.add(self.as_address()) + arg_proxy = self.pyop_field('args').proxyval(visited) + return ProxyException(self.safe_tp_name(), + arg_proxy) + + def write_repr(self, out, visited): + # Guard against infinite loops: + if self.as_address() in visited: + out.write('(...)') + return + visited.add(self.as_address()) + + out.write(self.safe_tp_name()) + self.write_field_repr('args', out, visited) + +class PyBoolObjectPtr(PyObjectPtr): + """ + Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two + <bool> instances (Py_True/Py_False) within the process being debugged. + """ + _typename = 'PyBoolObject' + + def proxyval(self, visited): + if int_from_int(self.field('ob_ival')): + return True + else: + return False + + +class PyClassObjectPtr(PyObjectPtr): + """ + Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj> + instance within the process being debugged. + """ + _typename = 'PyClassObject' + + +class BuiltInFunctionProxy(object): + def __init__(self, ml_name): + self.ml_name = ml_name + + def __repr__(self): + return "<built-in function %s>" % self.ml_name + +class BuiltInMethodProxy(object): + def __init__(self, ml_name, pyop_m_self): + self.ml_name = ml_name + self.pyop_m_self = pyop_m_self + + def __repr__(self): + return ('<built-in method %s of %s object at remote 0x%x>' + % (self.ml_name, + self.pyop_m_self.safe_tp_name(), + self.pyop_m_self.as_address()) + ) + +class PyCFunctionObjectPtr(PyObjectPtr): + """ + Class wrapping a gdb.Value that's a PyCFunctionObject* + (see Include/methodobject.h and Objects/methodobject.c) + """ + _typename = 'PyCFunctionObject' + + def proxyval(self, visited): + m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*) + ml_name = m_ml['ml_name'].string() + + pyop_m_self = self.pyop_field('m_self') + if pyop_m_self.is_null(): + return BuiltInFunctionProxy(ml_name) + else: + return BuiltInMethodProxy(ml_name, pyop_m_self) + + +class PyCodeObjectPtr(PyObjectPtr): + """ + Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance + within the process being debugged. + """ + _typename = 'PyCodeObject' + + def addr2line(self, addrq): + ''' + Get the line number for a given bytecode offset + + Analogous to PyCode_Addr2Line; translated from pseudocode in + Objects/lnotab_notes.txt + ''' + co_lnotab = self.pyop_field('co_lnotab').proxyval(set()) + + # Initialize lineno to co_firstlineno as per PyCode_Addr2Line + # not 0, as lnotab_notes.txt has it: + lineno = int_from_int(self.field('co_firstlineno')) + + addr = 0 + for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]): + addr += ord(addr_incr) + if addr > addrq: + return lineno + lineno += ord(line_incr) + return lineno + + +class PyDictObjectPtr(PyObjectPtr): + """ + Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance + within the process being debugged. + """ + _typename = 'PyDictObject' + + def iteritems(self): + ''' + Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs, + analagous to dict.iteritems() + ''' + for i in safe_range(self.field('ma_mask') + 1): + ep = self.field('ma_table') + i + pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value']) + if not pyop_value.is_null(): + pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key']) + yield (pyop_key, pyop_value) + + def proxyval(self, visited): + # Guard against infinite loops: + if self.as_address() in visited: + return ProxyAlreadyVisited('{...}') + visited.add(self.as_address()) + + result = {} + for pyop_key, pyop_value in self.iteritems(): + proxy_key = pyop_key.proxyval(visited) + proxy_value = pyop_value.proxyval(visited) + result[proxy_key] = proxy_value + return result + + def write_repr(self, out, visited): + # Guard against infinite loops: + if self.as_address() in visited: + out.write('{...}') + return + visited.add(self.as_address()) + + out.write('{') + first = True + for pyop_key, pyop_value in self.iteritems(): + if not first: + out.write(', ') + first = False + pyop_key.write_repr(out, visited) + out.write(': ') + pyop_value.write_repr(out, visited) + out.write('}') + +class PyInstanceObjectPtr(PyObjectPtr): + _typename = 'PyInstanceObject' + + def proxyval(self, visited): + # Guard against infinite loops: + if self.as_address() in visited: + return ProxyAlreadyVisited('<...>') + visited.add(self.as_address()) + + # Get name of class: + in_class = self.pyop_field('in_class') + cl_name = in_class.pyop_field('cl_name').proxyval(visited) + + # Get dictionary of instance attributes: + in_dict = self.pyop_field('in_dict').proxyval(visited) + + # Old-style class: + return InstanceProxy(cl_name, in_dict, long(self._gdbval)) + + def write_repr(self, out, visited): + # Guard against infinite loops: + if self.as_address() in visited: + out.write('<...>') + return + visited.add(self.as_address()) + + # Old-style class: + + # Get name of class: + in_class = self.pyop_field('in_class') + cl_name = in_class.pyop_field('cl_name').proxyval(visited) + + # Get dictionary of instance attributes: + pyop_in_dict = self.pyop_field('in_dict') + + _write_instance_repr(out, visited, + cl_name, pyop_in_dict, self.as_address()) + +class PyIntObjectPtr(PyObjectPtr): + _typename = 'PyIntObject' + + def proxyval(self, visited): + result = int_from_int(self.field('ob_ival')) + return result + +class PyListObjectPtr(PyObjectPtr): + _typename = 'PyListObject' + + def __getitem__(self, i): + # Get the gdb.Value for the (PyObject*) with the given index: + field_ob_item = self.field('ob_item') + return field_ob_item[i] + + def proxyval(self, visited): + # Guard against infinite loops: + if self.as_address() in visited: + return ProxyAlreadyVisited('[...]') + visited.add(self.as_address()) + + result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) + for i in safe_range(int_from_int(self.field('ob_size')))] + return result + + def write_repr(self, out, visited): + # Guard against infinite loops: + if self.as_address() in visited: + out.write('[...]') + return + visited.add(self.as_address()) + + out.write('[') + for i in safe_range(int_from_int(self.field('ob_size'))): + if i > 0: + out.write(', ') + element = PyObjectPtr.from_pyobject_ptr(self[i]) + element.write_repr(out, visited) + out.write(']') + +class PyLongObjectPtr(PyObjectPtr): + _typename = 'PyLongObject' + + def proxyval(self, visited): + ''' + Python's Include/longobjrep.h has this declaration: + struct _longobject { + PyObject_VAR_HEAD + digit ob_digit[1]; + }; + + with this description: + The absolute value of a number is equal to + SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i) + Negative numbers are represented with ob_size < 0; + zero is represented by ob_size == 0. + + where SHIFT can be either: + #define PyLong_SHIFT 30 + #define PyLong_SHIFT 15 + ''' + ob_size = long(self.field('ob_size')) + if ob_size == 0: + return 0L + + ob_digit = self.field('ob_digit') + + if gdb.lookup_type('digit').sizeof == 2: + SHIFT = 15L + else: + SHIFT = 30L + + digits = [long(ob_digit[i]) * 2**(SHIFT*i) + for i in safe_range(abs(ob_size))] + result = sum(digits) + if ob_size < 0: + result = -result + return result + + +class PyNoneStructPtr(PyObjectPtr): + """ + Class wrapping a gdb.Value that's a PyObject* pointing to the + singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type + """ + _typename = 'PyObject' + + def proxyval(self, visited): + return None + + +class PyFrameObjectPtr(PyObjectPtr): + _typename = 'PyFrameObject' + + def __init__(self, gdbval, cast_to): + PyObjectPtr.__init__(self, gdbval, cast_to) + + if not self.is_optimized_out(): + self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code')) + self.co_name = self.co.pyop_field('co_name') + self.co_filename = self.co.pyop_field('co_filename') + + self.f_lineno = int_from_int(self.field('f_lineno')) + self.f_lasti = int_from_int(self.field('f_lasti')) + self.co_nlocals = int_from_int(self.co.field('co_nlocals')) + self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames')) + + def iter_locals(self): + ''' + Yield a sequence of (name,value) pairs of PyObjectPtr instances, for + the local variables of this frame + ''' + if self.is_optimized_out(): + return + + f_localsplus = self.field('f_localsplus') + for i in safe_range(self.co_nlocals): + pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i]) + if not pyop_value.is_null(): + pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i]) + yield (pyop_name, pyop_value) + + def iter_globals(self): + ''' + Yield a sequence of (name,value) pairs of PyObjectPtr instances, for + the global variables of this frame + ''' + if self.is_optimized_out(): + return + + pyop_globals = self.pyop_field('f_globals') + return pyop_globals.iteritems() + + def iter_builtins(self): + ''' + Yield a sequence of (name,value) pairs of PyObjectPtr instances, for + the builtin variables + ''' + if self.is_optimized_out(): + return + + pyop_builtins = self.pyop_field('f_builtins') + return pyop_builtins.iteritems() + + def get_var_by_name(self, name): + ''' + Look for the named local variable, returning a (PyObjectPtr, scope) pair + where scope is a string 'local', 'global', 'builtin' + + If not found, return (None, None) + ''' + for pyop_name, pyop_value in self.iter_locals(): + if name == pyop_name.proxyval(set()): + return pyop_value, 'local' + for pyop_name, pyop_value in self.iter_globals(): + if name == pyop_name.proxyval(set()): + return pyop_value, 'global' + for pyop_name, pyop_value in self.iter_builtins(): + if name == pyop_name.proxyval(set()): + return pyop_value, 'builtin' + return None, None + + def filename(self): + '''Get the path of the current Python source file, as a string''' + if self.is_optimized_out(): + return '(frame information optimized out)' + return self.co_filename.proxyval(set()) + + def current_line_num(self): + '''Get current line number as an integer (1-based) + + Translated from PyFrame_GetLineNumber and PyCode_Addr2Line + + See Objects/lnotab_notes.txt + ''' + if self.is_optimized_out(): + return None + f_trace = self.field('f_trace') + if long(f_trace) != 0: + # we have a non-NULL f_trace: + return self.f_lineno + else: + #try: + return self.co.addr2line(self.f_lasti) + #except ValueError: + # return self.f_lineno + + def current_line(self): + '''Get the text of the current source line as a string, with a trailing + newline character''' + if self.is_optimized_out(): + return '(frame information optimized out)' + with open(self.filename(), 'r') as f: + all_lines = f.readlines() + # Convert from 1-based current_line_num to 0-based list offset: + return all_lines[self.current_line_num()-1] + + def write_repr(self, out, visited): + if self.is_optimized_out(): + out.write('(frame information optimized out)') + return + out.write('Frame 0x%x, for file %s, line %i, in %s (' + % (self.as_address(), + self.co_filename, + self.current_line_num(), + self.co_name)) + first = True + for pyop_name, pyop_value in self.iter_locals(): + if not first: + out.write(', ') + first = False + + out.write(pyop_name.proxyval(visited)) + out.write('=') + pyop_value.write_repr(out, visited) + + out.write(')') + +class PySetObjectPtr(PyObjectPtr): + _typename = 'PySetObject' + + def proxyval(self, visited): + # Guard against infinite loops: + if self.as_address() in visited: + return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name()) + visited.add(self.as_address()) + + members = [] + table = self.field('table') + for i in safe_range(self.field('mask')+1): + setentry = table[i] + key = setentry['key'] + if key != 0: + key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited) + if key_proxy != '<dummy key>': + members.append(key_proxy) + if self.safe_tp_name() == 'frozenset': + return frozenset(members) + else: + return set(members) + + def write_repr(self, out, visited): + out.write(self.safe_tp_name()) + + # Guard against infinite loops: + if self.as_address() in visited: + out.write('(...)') + return + visited.add(self.as_address()) + + out.write('([') + first = True + table = self.field('table') + for i in safe_range(self.field('mask')+1): + setentry = table[i] + key = setentry['key'] + if key != 0: + pyop_key = PyObjectPtr.from_pyobject_ptr(key) + key_proxy = pyop_key.proxyval(visited) # FIXME! + if key_proxy != '<dummy key>': + if not first: + out.write(', ') + first = False + pyop_key.write_repr(out, visited) + out.write('])') + + +class PyStringObjectPtr(PyObjectPtr): + _typename = 'PyStringObject' + + def __str__(self): + field_ob_size = self.field('ob_size') + field_ob_sval = self.field('ob_sval') + char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr) + return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)]) + + def proxyval(self, visited): + return str(self) + +class PyTupleObjectPtr(PyObjectPtr): + _typename = 'PyTupleObject' + + def __getitem__(self, i): + # Get the gdb.Value for the (PyObject*) with the given index: + field_ob_item = self.field('ob_item') + return field_ob_item[i] + + def proxyval(self, visited): + # Guard against infinite loops: + if self.as_address() in visited: + return ProxyAlreadyVisited('(...)') + visited.add(self.as_address()) + + result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) + for i in safe_range(int_from_int(self.field('ob_size')))]) + return result + + def write_repr(self, out, visited): + # Guard against infinite loops: + if self.as_address() in visited: + out.write('(...)') + return + visited.add(self.as_address()) + + out.write('(') + for i in safe_range(int_from_int(self.field('ob_size'))): + if i > 0: + out.write(', ') + element = PyObjectPtr.from_pyobject_ptr(self[i]) + element.write_repr(out, visited) + if self.field('ob_size') == 1: + out.write(',)') + else: + out.write(')') + +class PyTypeObjectPtr(PyObjectPtr): + _typename = 'PyTypeObject' + + +class PyUnicodeObjectPtr(PyObjectPtr): + _typename = 'PyUnicodeObject' + + def proxyval(self, visited): + # From unicodeobject.h: + # Py_ssize_t length; /* Length of raw Unicode data in buffer */ + # Py_UNICODE *str; /* Raw Unicode buffer */ + field_length = long(self.field('length')) + field_str = self.field('str') + + # Gather a list of ints from the Py_UNICODE array; these are either + # UCS-2 or UCS-4 code points: + Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] + + # Convert the int code points to unicode characters, and generate a + # local unicode instance: + result = u''.join([unichr(ucs) for ucs in Py_UNICODEs]) + return result + + +def int_from_int(gdbval): + return int(str(gdbval)) + + +def stringify(val): + # TODO: repr() puts everything on one line; pformat can be nicer, but + # can lead to v.long results; this function isolates the choice + if True: + return repr(val) + else: + from pprint import pformat + return pformat(val) + + +class PyObjectPtrPrinter: + "Prints a (PyObject*)" + + def __init__ (self, gdbval): + self.gdbval = gdbval + + def to_string (self): + pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval) + if True: + return pyop.get_truncated_repr(MAX_OUTPUT_LEN) + else: + # Generate full proxy value then stringify it. + # Doing so could be expensive + proxyval = pyop.proxyval(set()) + return stringify(proxyval) + +def pretty_printer_lookup(gdbval): + type = gdbval.type.unqualified() + if type.code == gdb.TYPE_CODE_PTR: + type = type.target().unqualified() + t = str(type) + if t in ("PyObject", "PyFrameObject"): + return PyObjectPtrPrinter(gdbval) + +""" +During development, I've been manually invoking the code in this way: +(gdb) python + +import sys +sys.path.append('/home/david/coding/python-gdb') +import libpython +end + +then reloading it after each edit like this: +(gdb) python reload(libpython) + +The following code should ensure that the prettyprinter is registered +if the code is autoloaded by gdb when visiting libpython.so, provided +that this python file is installed to the same path as the library (or its +.debug file) plus a "-gdb.py" suffix, e.g: + /usr/lib/libpython2.6.so.1.0-gdb.py + /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py +""" +def register (obj): + if obj == None: + obj = gdb + + # Wire up the pretty-printer + obj.pretty_printers.append(pretty_printer_lookup) + +register (gdb.current_objfile ()) + + +class Frame(object): + ''' + Wrapper for gdb.Frame, adding various methods + ''' + def __init__(self, gdbframe): + self._gdbframe = gdbframe + + def older(self): + older = self._gdbframe.older() + if older: + return Frame(older) + else: + return None + + def newer(self): + newer = self._gdbframe.newer() + if newer: + return Frame(newer) + else: + return None + + def select(self): + self._gdbframe.select() + + def get_index(self): + '''Calculate index of frame, starting at 0 for the newest frame within + this thread''' + index = 0 + # Go down until you reach the newest frame: + iter_frame = self + while iter_frame.newer(): + index += 1 + iter_frame = iter_frame.newer() + return index + + def is_evalframeex(self): + if self._gdbframe.function(): + if self._gdbframe.function().name == 'PyEval_EvalFrameEx': + ''' + I believe we also need to filter on the inline + struct frame_id.inline_depth, only regarding frames with + an inline depth of 0 as actually being this function + + So we reject those with type gdb.INLINE_FRAME + ''' + if self._gdbframe.type() == gdb.NORMAL_FRAME: + # We have a PyEval_EvalFrameEx frame: + return True + + return False + + def get_pyop(self): + try: + f = self._gdbframe.read_var('f') + return PyFrameObjectPtr.from_pyobject_ptr(f) + except ValueError: + return None + + @classmethod + def get_selected_frame(cls): + _gdbframe = gdb.selected_frame() + if _gdbframe: + return Frame(_gdbframe) + return None + + @classmethod + def get_selected_python_frame(cls): + '''Try to obtain the Frame for the python code in the selected frame, + or None''' + frame = cls.get_selected_frame() + + while frame: + if frame.is_evalframeex(): + return frame + frame = frame.older() + + # Not found: + return None + + def print_summary(self): + if self.is_evalframeex(): + pyop = self.get_pyop() + if pyop: + sys.stdout.write('#%i %s\n' % (self.get_index(), pyop.get_truncated_repr(MAX_OUTPUT_LEN))) + sys.stdout.write(pyop.current_line()) + else: + sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index()) + else: + sys.stdout.write('#%i\n' % self.get_index()) + +class PyList(gdb.Command): + '''List the current Python source code, if any + + Use + py-list START + to list at a different line number within the python source. + + Use + py-list START, END + to list a specific range of lines within the python source. + ''' + + def __init__(self): + gdb.Command.__init__ (self, + "py-list", + gdb.COMMAND_FILES, + gdb.COMPLETE_NONE) + + + def invoke(self, args, from_tty): + import re + + start = None + end = None + + m = re.match(r'\s*(\d+)\s*', args) + if m: + start = int(m.group(0)) + end = start + 10 + + m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args) + if m: + start, end = map(int, m.groups()) + + frame = Frame.get_selected_python_frame() + if not frame: + print 'Unable to locate python frame' + return + + pyop = frame.get_pyop() + if not pyop: + print 'Unable to read information on python frame' + return + + filename = pyop.filename() + lineno = pyop.current_line_num() + + if start is None: + start = lineno - 5 + end = lineno + 5 + + if start<1: + start = 1 + + with open(filename, 'r') as f: + all_lines = f.readlines() + # start and end are 1-based, all_lines is 0-based; + # so [start-1:end] as a python slice gives us [start, end] as a + # closed interval + for i, line in enumerate(all_lines[start-1:end]): + linestr = str(i+start) + # Highlight current line: + if i + start == lineno: + linestr = '>' + linestr + sys.stdout.write('%4s %s' % (linestr, line)) + + +# ...and register the command: +PyList() + +def move_in_stack(move_up): + '''Move up or down the stack (for the py-up/py-down command)''' + frame = Frame.get_selected_python_frame() + while frame: + if move_up: + iter_frame = frame.older() + else: + iter_frame = frame.newer() + + if not iter_frame: + break + + if iter_frame.is_evalframeex(): + # Result: + iter_frame.select() + iter_frame.print_summary() + return + + frame = iter_frame + + if move_up: + print 'Unable to find an older python frame' + else: + print 'Unable to find a newer python frame' + +class PyUp(gdb.Command): + 'Select and print the python stack frame that called this one (if any)' + def __init__(self): + gdb.Command.__init__ (self, + "py-up", + gdb.COMMAND_STACK, + gdb.COMPLETE_NONE) + + + def invoke(self, args, from_tty): + move_in_stack(move_up=True) + +PyUp() + +class PyDown(gdb.Command): + 'Select and print the python stack frame called by this one (if any)' + def __init__(self): + gdb.Command.__init__ (self, + "py-down", + gdb.COMMAND_STACK, + gdb.COMPLETE_NONE) + + + def invoke(self, args, from_tty): + move_in_stack(move_up=False) + +PyDown() + +class PyBacktrace(gdb.Command): + 'Display the current python frame and all the frames within its call stack (if any)' + def __init__(self): + gdb.Command.__init__ (self, + "py-bt", + gdb.COMMAND_STACK, + gdb.COMPLETE_NONE) + + + def invoke(self, args, from_tty): + frame = Frame.get_selected_python_frame() + while frame: + if frame.is_evalframeex(): + frame.print_summary() + frame = frame.older() + +PyBacktrace() + +class PyPrint(gdb.Command): + 'Look up the given python variable name, and print it' + def __init__(self): + gdb.Command.__init__ (self, + "py-print", + gdb.COMMAND_DATA, + gdb.COMPLETE_NONE) + + + def invoke(self, args, from_tty): + name = str(args) + + frame = Frame.get_selected_python_frame() + if not frame: + print 'Unable to locate python frame' + return + + pyop_frame = frame.get_pyop() + if not pyop_frame: + print 'Unable to read information on python frame' + return + + pyop_var, scope = pyop_frame.get_var_by_name(name) + + if pyop_var: + print ('%s %r = %s' + % (scope, + name, + pyop_var.get_truncated_repr(MAX_OUTPUT_LEN))) + else: + print '%r not found' % name + +PyPrint() + +class PyLocals(gdb.Command): + 'Look up the given python variable name, and print it' + def __init__(self): + gdb.Command.__init__ (self, + "py-locals", + gdb.COMMAND_DATA, + gdb.COMPLETE_NONE) + + + def invoke(self, args, from_tty): + name = str(args) + + frame = Frame.get_selected_python_frame() + if not frame: + print 'Unable to locate python frame' + return + + pyop_frame = frame.get_pyop() + if not pyop_frame: + print 'Unable to read information on python frame' + return + + for pyop_name, pyop_value in pyop_frame.iter_locals(): + print ('%s = %s' + % (pyop_name.proxyval(set()), + pyop_value.get_truncated_repr(MAX_OUTPUT_LEN))) + +PyLocals() |