From 5ae681081975ac074bd030d3f23e99e3644cfe10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= Date: Wed, 21 Apr 2010 22:38:42 +0000 Subject: Issue #8380: Port gdb/libpython to 3.x. --- Lib/test/gdb_sample.py | 2 +- Lib/test/test_gdb.py | 231 ++++++++++++++++++++++--------------------------- Tools/gdb/libpython.py | 225 +++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 292 insertions(+), 166 deletions(-) diff --git a/Lib/test/gdb_sample.py b/Lib/test/gdb_sample.py index a732b25..cab13fb 100644 --- a/Lib/test/gdb_sample.py +++ b/Lib/test/gdb_sample.py @@ -7,6 +7,6 @@ def bar(a, b, c): baz(a, b, c) def baz(*args): - print(42) + id(42) foo(1, 2, 3) diff --git a/Lib/test/test_gdb.py b/Lib/test/test_gdb.py index 2776c1d..489ceef 100644 --- a/Lib/test/test_gdb.py +++ b/Lib/test/test_gdb.py @@ -45,6 +45,8 @@ def gdb_has_frame_select(): HAS_PYUP_PYDOWN = gdb_has_frame_select() +BREAKPOINT_FN='builtin_id' + class DebuggerTests(unittest.TestCase): """Test that the debugger can debug Python.""" @@ -57,10 +59,10 @@ class DebuggerTests(unittest.TestCase): out, err = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ).communicate() - return out.decode('iso-8859-1'), err.decode('iso-8859-1') + return out.decode('utf-8'), err.decode('utf-8') def get_stack_trace(self, source=None, script=None, - breakpoint='textiowrapper_write', + breakpoint=BREAKPOINT_FN, cmds_after_breakpoint=None, import_site=False): ''' @@ -129,19 +131,21 @@ class DebuggerTests(unittest.TestCase): cmds_after_breakpoint=None, import_site=False): # Given an input python source representation of data, - # run "python -c'print DATA'" under gdb with a breakpoint on - # textiowrapper_write and scrape out gdb's representation of the "op" + # run "python -c'id(DATA)'" under gdb with a breakpoint on + # builtin_id and scrape out gdb's representation of the "op" # parameter, and verify that the gdb displays the same string # + # Verify that the gdb displays the expected string + # # For a nested structure, the first time we hit the breakpoint will # give us the top-level structure - gdb_output = self.get_stack_trace(source, breakpoint='textiowrapper_write', + gdb_output = self.get_stack_trace(source, breakpoint=BREAKPOINT_FN, cmds_after_breakpoint=cmds_after_breakpoint, import_site=import_site) # gdb can insert additional '\n' and space characters in various places # in its output, depending on the width of the terminal it's connected # to (using its "wrap_here" function) - m = re.match('.*#0\s+textiowrapper_write\s+\(\s*op\=\s*(.*?),\s+fp=.*\).*', + m = re.match('.*#0\s+builtin_id\s+\(self\=.*,\s+v=\s*(.*?)\)\s+at\s+Python/bltinmodule.c.*', gdb_output, re.DOTALL) if not m: self.fail('Unexpected gdb output: %r\n%s' % (gdb_output, gdb_output)) @@ -154,37 +158,34 @@ class DebuggerTests(unittest.TestCase): def assertMultilineMatches(self, actual, pattern): m = re.match(pattern, actual, re.DOTALL) - self.assert_(m, - msg='%r did not match %r' % (actual, pattern)) + if not m: + self.fail(msg='%r did not match %r' % (actual, pattern)) def get_sample_script(self): return findfile('gdb_sample.py') class PrettyPrintTests(DebuggerTests): def test_getting_backtrace(self): - gdb_output = self.get_stack_trace('print(42)') - self.assertTrue('textiowrapper_write' in gdb_output) + gdb_output = self.get_stack_trace('id(42)') + self.assertTrue(BREAKPOINT_FN in gdb_output) - def assertGdbRepr(self, val, cmds_after_breakpoint=None): + def assertGdbRepr(self, val, exp_repr=None, cmds_after_breakpoint=None): # Ensure that gdb's rendering of the value in a debugged process # matches repr(value) in this process: - gdb_repr, gdb_output = self.get_gdb_repr('print(' + repr(val) + ')', + gdb_repr, gdb_output = self.get_gdb_repr('id(' + repr(val) + ')', cmds_after_breakpoint) - self.assertEquals(gdb_repr, repr(val), gdb_output) + if not exp_repr: + exp_repr = repr(val) + self.assertEquals(gdb_repr, exp_repr, + ('%r did not equal expected %r; full output was:\n%s' + % (gdb_repr, exp_repr, gdb_output))) def test_int(self): - 'Verify the pretty-printing of various "int" values' + 'Verify the pretty-printing of various "int"/long values' self.assertGdbRepr(42) self.assertGdbRepr(0) self.assertGdbRepr(-7) - self.assertGdbRepr(sys.maxint) - self.assertGdbRepr(-sys.maxint) - - def test_long(self): - 'Verify the pretty-printing of various "long" values' - self.assertGdbRepr(0) self.assertGdbRepr(1000000000000) - self.assertGdbRepr(-1) self.assertGdbRepr(-1000000000000000) def test_singletons(self): @@ -202,27 +203,27 @@ class PrettyPrintTests(DebuggerTests): def test_lists(self): 'Verify the pretty-printing of lists' self.assertGdbRepr([]) - self.assertGdbRepr(range(5)) + self.assertGdbRepr(list(range(5))) + + def test_bytes(self): + 'Verify the pretty-printing of bytes' + self.assertGdbRepr(b'') + self.assertGdbRepr(b'And now for something hopefully the same') + self.assertGdbRepr(b'string with embedded NUL here \0 and then some more text') + self.assertGdbRepr(b'this is a tab:\t' + b' this is a slash-N:\n' + b' this is a slash-R:\r' + ) + + self.assertGdbRepr(b'this is byte 255:\xff and byte 128:\x80') + + self.assertGdbRepr(bytes([b for b in range(255)])) def test_strings(self): - 'Verify the pretty-printing of strings' + 'Verify the pretty-printing of unicode strings' self.assertGdbRepr('') self.assertGdbRepr('And now for something hopefully the same') self.assertGdbRepr('string with embedded NUL here \0 and then some more text') - self.assertGdbRepr('this is byte 255:\xff and byte 128:\x80') - - def test_tuples(self): - 'Verify the pretty-printing of tuples' - self.assertGdbRepr(tuple()) - self.assertGdbRepr((1,)) - self.assertGdbRepr(('foo', 'bar', 'baz')) - - def test_unicode(self): - 'Verify the pretty-printing of unicode values' - # Test the empty unicode string: - self.assertGdbRepr('') - - self.assertGdbRepr('hello world') # Test printing a single character: # U+2620 SKULL AND CROSSBONES @@ -238,14 +239,19 @@ class PrettyPrintTests(DebuggerTests): # This is: # UTF-8: 0xF0 0x9D 0x84 0xA1 # UTF-16: 0xD834 0xDD21 - try: - # This will only work on wide-unicode builds: - self.assertGdbRepr(unichr(0x1D121)) - except ValueError as e: - # We're probably on a narrow-unicode build; if we're seeing a - # different problem, then re-raise it: - if e.args != ('unichr() arg not in range(0x10000) (narrow Python build)',): - raise e + if sys.maxunicode == 0x10FFFF: + # wide unicode: + self.assertGdbRepr(chr(0x1D121)) + else: + # narrow unicode: + self.assertGdbRepr(chr(0x1D121), + "'\\U0000d834\\U0000dd21'") + + def test_tuples(self): + 'Verify the pretty-printing of tuples' + self.assertGdbRepr(tuple()) + self.assertGdbRepr((1,), '(1,)') + self.assertGdbRepr(('foo', 'bar', 'baz')) def test_sets(self): 'Verify the pretty-printing of sets' @@ -253,12 +259,12 @@ class PrettyPrintTests(DebuggerTests): self.assertGdbRepr(set(['a', 'b'])) self.assertGdbRepr(set([4, 5, 6])) - # Ensure that we handled sets containing the "dummy" key value, + # Ensure that we handle sets containing the "dummy" key value, # which happens on deletion: gdb_repr, gdb_output = self.get_gdb_repr('''s = set(['a','b']) s.pop() -print(s)''') - self.assertEquals(gdb_repr, "set(['b'])") +id(s)''') + self.assertEquals(gdb_repr, "{'b'}") def test_frozensets(self): 'Verify the pretty-printing of frozensets' @@ -271,43 +277,31 @@ print(s)''') gdb_repr, gdb_output = self.get_gdb_repr(''' try: raise RuntimeError("I am an error") -except RuntimeError, e: - print(e) +except RuntimeError as e: + id(e) ''') self.assertEquals(gdb_repr, - "exceptions.RuntimeError('I am an error',)") + "RuntimeError('I am an error',)") # Test division by zero: gdb_repr, gdb_output = self.get_gdb_repr(''' try: a = 1 / 0 -except ZeroDivisionError, e: - print(e) +except ZeroDivisionError as e: + id(e) ''') self.assertEquals(gdb_repr, - "exceptions.ZeroDivisionError('integer division or modulo by zero',)") - - def test_classic_class(self): - 'Verify the pretty-printing of classic class instances' - gdb_repr, gdb_output = self.get_gdb_repr(''' -class Foo: - pass -foo = Foo() -foo.an_int = 42 -print(foo)''') - m = re.match(r'', gdb_repr) - self.assertTrue(m, - msg='Unexpected classic-class rendering %r' % gdb_repr) + "ZeroDivisionError('division by zero',)") def test_modern_class(self): 'Verify the pretty-printing of new-style class instances' gdb_repr, gdb_output = self.get_gdb_repr(''' -class Foo(object): +class Foo: pass foo = Foo() foo.an_int = 42 -print(foo)''') +id(foo)''') m = re.match(r'', gdb_repr) self.assertTrue(m, msg='Unexpected new-style class rendering %r' % gdb_repr) @@ -320,8 +314,9 @@ class Foo(list): foo = Foo() foo += [1, 2, 3] foo.an_int = 42 -print(foo)''') +id(foo)''') m = re.match(r'', gdb_repr) + self.assertTrue(m, msg='Unexpected new-style class rendering %r' % gdb_repr) @@ -334,12 +329,13 @@ class Foo(tuple): pass foo = Foo((1, 2, 3)) foo.an_int = 42 -print(foo)''') +id(foo)''') m = re.match(r'', gdb_repr) + self.assertTrue(m, msg='Unexpected new-style class rendering %r' % gdb_repr) - def assertSane(self, source, corruption, expvalue=None, exptype=None): + def assertSane(self, source, corruption, exprepr=None): '''Run Python under gdb, corrupting variables in the inferior process immediately before taking a backtrace. @@ -353,19 +349,15 @@ print(foo)''') gdb_repr, gdb_output = \ self.get_gdb_repr(source, cmds_after_breakpoint=cmds_after_breakpoint) - - if expvalue: - if gdb_repr == repr(expvalue): + if exprepr: + if gdb_repr == exprepr: # gdb managed to print the value in spite of the corruption; # this is good (see http://bugs.python.org/issue8330) return - if exptype: - pattern = '<' + exptype + ' at remote 0x[0-9a-f]+>' - else: - # Match anything for the type name; 0xDEADBEEF could point to - # something arbitrary (see http://bugs.python.org/issue8330) - pattern = '<.* at remote 0x[0-9a-f]+>' + # Match anything for the type name; 0xDEADBEEF could point to + # something arbitrary (see http://bugs.python.org/issue8330) + pattern = '<.* at remote 0x[0-9a-f]+>' m = re.match(pattern, gdb_repr) if not m: @@ -375,8 +367,8 @@ print(foo)''') def test_NULL_ptr(self): 'Ensure that a NULL PyObject* is handled gracefully' gdb_repr, gdb_output = ( - self.get_gdb_repr('print(42)', - cmds_after_breakpoint=['set variable op=0', + self.get_gdb_repr('id(42)', + cmds_after_breakpoint=['set variable v=0', 'backtrace']) ) @@ -384,44 +376,33 @@ print(foo)''') def test_NULL_ob_type(self): 'Ensure that a PyObject* with NULL ob_type is handled gracefully' - self.assertSane('print(42)', - 'set op->ob_type=0') + self.assertSane('id(42)', + 'set v->ob_type=0') def test_corrupt_ob_type(self): 'Ensure that a PyObject* with a corrupt ob_type is handled gracefully' - self.assertSane('print(42)', - 'set op->ob_type=0xDEADBEEF', - expvalue=42) + self.assertSane('id(42)', + 'set v->ob_type=0xDEADBEEF', + exprepr='42') def test_corrupt_tp_flags(self): 'Ensure that a PyObject* with a type with corrupt tp_flags is handled' - self.assertSane('print(42)', - 'set op->ob_type->tp_flags=0x0', - expvalue=42) + self.assertSane('id(42)', + 'set v->ob_type->tp_flags=0x0', + exprepr='42') def test_corrupt_tp_name(self): 'Ensure that a PyObject* with a type with corrupt tp_name is handled' - self.assertSane('print(42)', - 'set op->ob_type->tp_name=0xDEADBEEF', - expvalue=42) - - def test_NULL_instance_dict(self): - 'Ensure that a PyInstanceObject with with a NULL in_dict is handled' - self.assertSane(''' -class Foo: - pass -foo = Foo() -foo.an_int = 42 -print(foo)''', - 'set ((PyInstanceObject*)op)->in_dict = 0', - exptype='Foo') + self.assertSane('id(42)', + 'set v->ob_type->tp_name=0xDEADBEEF', + exprepr='42') def test_builtins_help(self): 'Ensure that the new-style class _Helper in site.py can be handled' # (this was the issue causing tracebacks in # http://bugs.python.org/issue8032#msg100537 ) + gdb_repr, gdb_output = self.get_gdb_repr('id(__builtins__.help)', import_site=True) - gdb_repr, gdb_output = self.get_gdb_repr('print(__builtins__.help)', import_site=True) m = re.match(r'<_Helper at remote 0x[0-9a-f]+>', gdb_repr) self.assertTrue(m, msg='Unexpected rendering %r' % gdb_repr) @@ -430,20 +411,18 @@ print(foo)''', '''Ensure that a reference loop involving a list doesn't lead proxyval into an infinite loop:''' gdb_repr, gdb_output = \ - self.get_gdb_repr("a = [3, 4, 5] ; a.append(a) ; print(a)") - + self.get_gdb_repr("a = [3, 4, 5] ; a.append(a) ; id(a)") self.assertEquals(gdb_repr, '[3, 4, 5, [...]]') gdb_repr, gdb_output = \ - self.get_gdb_repr("a = [3, 4, 5] ; b = [a] ; a.append(b) ; print(a)") - + self.get_gdb_repr("a = [3, 4, 5] ; b = [a] ; a.append(b) ; id(a)") self.assertEquals(gdb_repr, '[3, 4, 5, [[...]]]') def test_selfreferential_dict(self): '''Ensure that a reference loop involving a dict doesn't lead proxyval into an infinite loop:''' gdb_repr, gdb_output = \ - self.get_gdb_repr("a = {} ; b = {'bar':a} ; a['foo'] = b ; print(a)") + self.get_gdb_repr("a = {} ; b = {'bar':a} ; a['foo'] = b ; id(a)") self.assertEquals(gdb_repr, "{'foo': {'bar': {...}}}") @@ -454,7 +433,7 @@ class Foo: pass foo = Foo() foo.an_attr = foo -print(foo)''') +id(foo)''') self.assertTrue(re.match('\) at remote 0x[0-9a-f]+>', gdb_repr), 'Unexpected gdb representation: %r\n%s' % \ @@ -467,7 +446,7 @@ class Foo(object): pass foo = Foo() foo.an_attr = foo -print(foo)''') +id(foo)''') self.assertTrue(re.match('\) at remote 0x[0-9a-f]+>', gdb_repr), 'Unexpected gdb representation: %r\n%s' % \ @@ -481,7 +460,7 @@ a = Foo() b = Foo() a.an_attr = b b.an_attr = a -print(a)''') +id(a)''') self.assertTrue(re.match('\) at remote 0x[0-9a-f]+>\) at remote 0x[0-9a-f]+>', gdb_repr), 'Unexpected gdb representation: %r\n%s' % \ @@ -489,7 +468,7 @@ print(a)''') def test_truncation(self): 'Verify that very long output is truncated' - gdb_repr, gdb_output = self.get_gdb_repr('print(range(1000))') + gdb_repr, gdb_output = self.get_gdb_repr('id(list(range(1000)))') self.assertEquals(gdb_repr, "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, " "14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, " @@ -515,13 +494,9 @@ print(a)''') self.assertEquals(len(gdb_repr), 1024 + len('...(truncated)')) - def test_builtin_function(self): - gdb_repr, gdb_output = self.get_gdb_repr('print(len)') - self.assertEquals(gdb_repr, '') - def test_builtin_method(self): - gdb_repr, gdb_output = self.get_gdb_repr('import sys; print(sys.stdout.readlines)') - self.assertTrue(re.match('', + gdb_repr, gdb_output = self.get_gdb_repr('import sys; id(sys.stdout.readlines)') + self.assertTrue(re.match('', gdb_repr), 'Unexpected gdb representation: %r\n%s' % \ (gdb_repr, gdb_output)) @@ -532,11 +507,11 @@ def foo(a, b, c): pass foo(3, 4, 5) -print foo.__code__''', - breakpoint='textiowrapper_write', - cmds_after_breakpoint=['print (PyFrameObject*)(((PyCodeObject*)op)->co_zombieframe)'] +id(foo.__code__)''', + breakpoint='builtin_id', + cmds_after_breakpoint=['print (PyFrameObject*)(((PyCodeObject*)v)->co_zombieframe)'] ) - self.assertTrue(re.match(r'.*\s+\$1 =\s+Frame 0x[0-9a-f]+, for file , line 3, in foo \(\)\s+.*', + self.assertTrue(re.match('.*\s+\$1 =\s+Frame 0x[0-9a-f]+, for file , line 3, in foo \(\)\s+.*', gdb_output, re.DOTALL), 'Unexpected gdb representation: %r\n%s' % (gdb_output, gdb_output)) @@ -555,7 +530,7 @@ class PyListTests(DebuggerTests): ' 7 baz(a, b, c)\n' ' 8 \n' ' 9 def baz(*args):\n' - ' >10 print(42)\n' + ' >10 id(42)\n' ' 11 \n' ' 12 foo(1, 2, 3)\n', bt) @@ -566,7 +541,7 @@ class PyListTests(DebuggerTests): cmds_after_breakpoint=['py-list 9']) self.assertListing(' 9 def baz(*args):\n' - ' >10 print(42)\n' + ' >10 id(42)\n' ' 11 \n' ' 12 foo(1, 2, 3)\n', bt) @@ -619,7 +594,7 @@ $''') #[0-9]+ Frame 0x[0-9a-f]+, for file .*gdb_sample.py, line 7, in bar \(a=1, b=2, c=3\) baz\(a, b, c\) #[0-9]+ Frame 0x[0-9a-f]+, for file .*gdb_sample.py, line 10, in baz \(args=\(1, 2, 3\)\) - print\(42\) + id\(42\) $''') class PyBtTests(DebuggerTests): @@ -662,7 +637,7 @@ class PyPrintTests(DebuggerTests): bt = self.get_stack_trace(script=self.get_sample_script(), cmds_after_breakpoint=['py-print len']) self.assertMultilineMatches(bt, - r".*\nbuiltin 'len' = \n.*") + r".*\nbuiltin 'len' = \n.*") class PyLocalsTests(DebuggerTests): def test_basic_command(self): diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index a8f28d7..426fb7b 100644 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -20,8 +20,9 @@ In particular, given a gdb.Value corresponding to a PyObject* in the inferior process, we can generate a "proxy value" within the gdb process. For example, given a PyObject* in the inferior process that is in fact a PyListObject* holding three PyObject* that turn out to be PyBytesObject* instances, we can -generate a proxy value within the gdb process that is a list of strings: - ["foo", "bar", "baz"] +generate a proxy value within the gdb process that is a list of bytes +instances: + [b"foo", b"bar", b"baz"] Doing so can be expensive for complicated graphs of objects, and could take some time, so we also have a "write_repr" method that writes a representation @@ -57,7 +58,7 @@ Py_TPFLAGS_INT_SUBCLASS = (1L << 23) Py_TPFLAGS_LONG_SUBCLASS = (1L << 24) Py_TPFLAGS_LIST_SUBCLASS = (1L << 25) Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26) -Py_TPFLAGS_STRING_SUBCLASS = (1L << 27) +Py_TPFLAGS_BYTES_SUBCLASS = (1L << 27) Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28) Py_TPFLAGS_DICT_SUBCLASS = (1L << 29) Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30) @@ -66,6 +67,9 @@ Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31) MAX_OUTPUT_LEN=1024 +hexdigits = "0123456789abcdef" + + class NullPyObjectPtr(RuntimeError): pass @@ -148,12 +152,8 @@ class PyObjectPtr(object): return pyo_ptr.dereference()[name] if name == 'ob_size': - try: - # Python 2: - return self._gdbval.dereference()[name] - except RuntimeError: - # Python 3: - return self._gdbval.dereference()['ob_base'][name] + pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type()) + return pyo_ptr.dereference()[name] # General case: look it up inside the object: return self._gdbval.dereference()[name] @@ -318,7 +318,7 @@ class PyObjectPtr(object): return PyListObjectPtr if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS: return PyTupleObjectPtr - if tp_flags & Py_TPFLAGS_STRING_SUBCLASS: + if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS: return PyBytesObjectPtr if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS: return PyUnicodeObjectPtr @@ -355,6 +355,8 @@ class PyObjectPtr(object): def as_address(self): return long(self._gdbval) +class PyVarObjectPtr(PyObjectPtr): + _typename = 'PyVarObject' class ProxyAlreadyVisited(object): ''' @@ -515,20 +517,6 @@ class PyBaseExceptionObjectPtr(PyObjectPtr): out.write(self.safe_tp_name()) self.write_field_repr('args', out, visited) -class PyBoolObjectPtr(PyObjectPtr): - """ - Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two - instances (Py_True/Py_False) within the process being debugged. - """ - _typename = 'PyBoolObject' - - def proxyval(self, visited): - if int_from_int(self.field('ob_ival')): - return True - else: - return False - - class PyClassObjectPtr(PyObjectPtr): """ Class wrapping a gdb.Value that's a PyClassObject* i.e. a @@ -691,13 +679,6 @@ class PyInstanceObjectPtr(PyObjectPtr): _write_instance_repr(out, visited, cl_name, pyop_in_dict, self.as_address()) -class PyIntObjectPtr(PyObjectPtr): - _typename = 'PyIntObject' - - def proxyval(self, visited): - result = int_from_int(self.field('ob_ival')) - return result - class PyListObjectPtr(PyObjectPtr): _typename = 'PyListObject' @@ -770,6 +751,22 @@ class PyLongObjectPtr(PyObjectPtr): result = -result return result + def write_repr(self, out, visited): + # Write this out as a Python 3 int literal, i.e. without the "L" suffix + proxy = self.proxyval(visited) + out.write("%s" % proxy) + + +class PyBoolObjectPtr(PyLongObjectPtr): + """ + Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two + instances (Py_True/Py_False) within the process being debugged. + """ + def proxyval(self, visited): + if PyLongObjectPtr.proxyval(self, visited): + return True + else: + return False class PyNoneStructPtr(PyObjectPtr): """ @@ -894,9 +891,9 @@ class PyFrameObjectPtr(PyObjectPtr): return out.write('Frame 0x%x, for file %s, line %i, in %s (' % (self.as_address(), - self.co_filename, + self.co_filename.proxyval(visited), self.current_line_num(), - self.co_name)) + self.co_name.proxyval(visited))) first = True for pyop_name, pyop_value in self.iter_locals(): if not first: @@ -933,7 +930,8 @@ class PySetObjectPtr(PyObjectPtr): return set(members) def write_repr(self, out, visited): - out.write(self.safe_tp_name()) + # Emulate Python 3's set_repr + tp_name = self.safe_tp_name() # Guard against infinite loops: if self.as_address() in visited: @@ -941,7 +939,18 @@ class PySetObjectPtr(PyObjectPtr): return visited.add(self.as_address()) - out.write('([') + # Python 3's set_repr special-cases the empty set: + if not self.field('used'): + out.write(tp_name) + out.write('()') + return + + # Python 3 uses {} for set literals: + if tp_name != 'set': + out.write(tp_name) + out.write('(') + + out.write('{') first = True table = self.field('table') for i in safe_range(self.field('mask')+1): @@ -955,7 +964,10 @@ class PySetObjectPtr(PyObjectPtr): out.write(', ') first = False pyop_key.write_repr(out, visited) - out.write('])') + out.write('}') + + if tp_name != 'set': + out.write(')') class PyBytesObjectPtr(PyObjectPtr): @@ -970,6 +982,37 @@ class PyBytesObjectPtr(PyObjectPtr): def proxyval(self, visited): return str(self) + def write_repr(self, out, visited): + # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix + + # Get a PyStringObject* within the Python 2 gdb process: + proxy = self.proxyval(visited) + + # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr + # to Python 2 code: + quote = "'" + if "'" in proxy and not '"' in proxy: + quote = '"' + out.write('b') + out.write(quote) + for byte in proxy: + if byte == quote or byte == '\\': + out.write('\\') + out.write(byte) + elif byte == '\t': + out.write('\\t') + elif byte == '\n': + out.write('\\n') + elif byte == '\r': + out.write('\\r') + elif byte < ' ' or ord(byte) >= 0x7f: + out.write('\\x') + out.write(hexdigits[(ord(byte) & 0xf0) >> 4]) + out.write(hexdigits[ord(byte) & 0xf]) + else: + out.write(byte) + out.write(quote) + class PyTupleObjectPtr(PyObjectPtr): _typename = 'PyTupleObject' @@ -1010,9 +1053,21 @@ class PyTypeObjectPtr(PyObjectPtr): _typename = 'PyTypeObject' +def _unichr_is_printable(char): + # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py + if char == u" ": + return True + import unicodedata + return unicodedata.category(char)[0] not in ("C", "Z") + + class PyUnicodeObjectPtr(PyObjectPtr): _typename = 'PyUnicodeObject' + def char_width(self): + _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE') + return _type_Py_UNICODE.sizeof + def proxyval(self, visited): # From unicodeobject.h: # Py_ssize_t length; /* Length of raw Unicode data in buffer */ @@ -1029,6 +1084,102 @@ class PyUnicodeObjectPtr(PyObjectPtr): result = u''.join([unichr(ucs) for ucs in Py_UNICODEs]) return result + def write_repr(self, out, visited): + # Write this out as a Python 3 str literal, i.e. without a "u" prefix + + # Get a PyUnicodeObject* within the Python 2 gdb process: + proxy = self.proxyval(visited) + + # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr + # to Python 2: + if "'" in proxy and '"' not in proxy: + quote = '"' + else: + quote = "'" + out.write(quote) + + i = 0 + while i < len(proxy): + ch = proxy[i] + i += 1 + + # Escape quotes and backslashes + if ch == quote or ch == '\\': + out.write('\\') + out.write(ch) + + # Map special whitespace to '\t', \n', '\r' + elif ch == '\t': + out.write('\\t') + elif ch == '\n': + out.write('\\n') + elif ch == '\r': + out.write('\\r') + + # Map non-printable US ASCII to '\xhh' */ + elif ch < ' ' or ch == 0x7F: + out.write('\\x') + out.write(hexdigits[(ord(ch) >> 4) & 0x000F]) + out.write(hexdigits[ord(ch) & 0x000F]) + + # Copy ASCII characters as-is + elif ord(ch) < 0x7F: + out.write(ch) + + # Non-ASCII characters + else: + ucs = ch; + + if self.char_width == 2: + ch2 = 0 + # Get code point from surrogate pair + if i < len(proxy): + ch2 = proxy[i] + if (ord(ch) >= 0xD800 and ord(ch) < 0xDC00 + and ord(ch2) >= 0xDC00 and ord(ch2) <= 0xDFFF): + ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; + i += 1 + + # Map Unicode whitespace and control characters + # (categories Z* and C* except ASCII space) + if not _unichr_is_printable(ucs): + # Unfortuately, Python 2's unicode type doesn't seem + # to expose the "isprintable" method + + # Map 8-bit characters to '\\xhh' + if ucs <= 0xff: + out.write('\\x') + out.write(hexdigits[(ord(ucs) >> 4) & 0x000F]) + out.write(hexdigits[ord(ucs) & 0x000F]) + # Map 21-bit characters to '\U00xxxxxx' + elif ucs >= 0x10000: + out.write('\\U') + out.write(hexdigits[(ord(ucs) >> 28) & 0x0000000F]) + out.write(hexdigits[(ord(ucs) >> 24) & 0x0000000F]) + out.write(hexdigits[(ord(ucs) >> 20) & 0x0000000F]) + out.write(hexdigits[(ord(ucs) >> 16) & 0x0000000F]) + out.write(hexdigits[(ord(ucs) >> 12) & 0x0000000F]) + out.write(hexdigits[(ord(ucs) >> 8) & 0x0000000F]) + out.write(hexdigits[(ord(ucs) >> 4) & 0x0000000F]) + out.write(hexdigits[ord(ucs) & 0x0000000F]) + # Map 16-bit characters to '\uxxxx' + else: + out.write('\\u') + out.write(hexdigits[(ord(ucs) >> 12) & 0x000F]) + out.write(hexdigits[(ord(ucs) >> 8) & 0x000F]) + out.write(hexdigits[(ord(ucs) >> 4) & 0x000F]) + out.write(hexdigits[ord(ucs) & 0x000F]) + else: + # Copy characters as-is + out.write(ch) + if self.char_width == 2: + if ord(ucs) >= 0x10000: + out.write(ch2) + + out.write(quote) + + + def int_from_int(gdbval): return int(str(gdbval)) @@ -1065,7 +1216,7 @@ def pretty_printer_lookup(gdbval): if type.code == gdb.TYPE_CODE_PTR: type = type.target().unqualified() t = str(type) - if t in ("PyObject", "PyFrameObject"): + if t in ("PyObject", "PyFrameObject", "PyUnicodeObject"): return PyObjectPtrPrinter(gdbval) """ -- cgit v0.12