diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2010-04-21 22:38:42 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2010-04-21 22:38:42 (GMT) |
commit | 5ae681081975ac074bd030d3f23e99e3644cfe10 (patch) | |
tree | b67876132e2572d1512af292da1b5cedfc0e8c54 /Tools/gdb/libpython.py | |
parent | 4c4b078101fcb8502efac17d3f8d335700195337 (diff) | |
download | cpython-5ae681081975ac074bd030d3f23e99e3644cfe10.zip cpython-5ae681081975ac074bd030d3f23e99e3644cfe10.tar.gz cpython-5ae681081975ac074bd030d3f23e99e3644cfe10.tar.bz2 |
Issue #8380: Port gdb/libpython to 3.x.
Diffstat (limited to 'Tools/gdb/libpython.py')
-rw-r--r-- | Tools/gdb/libpython.py | 225 |
1 files changed, 188 insertions, 37 deletions
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index a8f28d7..426fb7b 100644 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -20,8 +20,9 @@ In particular, given a gdb.Value corresponding to a PyObject* in the inferior process, we can generate a "proxy value" within the gdb process. For example, given a PyObject* in the inferior process that is in fact a PyListObject* holding three PyObject* that turn out to be PyBytesObject* instances, we can -generate a proxy value within the gdb process that is a list of strings: - ["foo", "bar", "baz"] +generate a proxy value within the gdb process that is a list of bytes +instances: + [b"foo", b"bar", b"baz"] Doing so can be expensive for complicated graphs of objects, and could take some time, so we also have a "write_repr" method that writes a representation @@ -57,7 +58,7 @@ Py_TPFLAGS_INT_SUBCLASS = (1L << 23) Py_TPFLAGS_LONG_SUBCLASS = (1L << 24) Py_TPFLAGS_LIST_SUBCLASS = (1L << 25) Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26) -Py_TPFLAGS_STRING_SUBCLASS = (1L << 27) +Py_TPFLAGS_BYTES_SUBCLASS = (1L << 27) Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28) Py_TPFLAGS_DICT_SUBCLASS = (1L << 29) Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30) @@ -66,6 +67,9 @@ Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31) MAX_OUTPUT_LEN=1024 +hexdigits = "0123456789abcdef" + + class NullPyObjectPtr(RuntimeError): pass @@ -148,12 +152,8 @@ class PyObjectPtr(object): return pyo_ptr.dereference()[name] if name == 'ob_size': - try: - # Python 2: - return self._gdbval.dereference()[name] - except RuntimeError: - # Python 3: - return self._gdbval.dereference()['ob_base'][name] + pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type()) + return pyo_ptr.dereference()[name] # General case: look it up inside the object: return self._gdbval.dereference()[name] @@ -318,7 +318,7 @@ class PyObjectPtr(object): return PyListObjectPtr if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS: return PyTupleObjectPtr - if tp_flags & Py_TPFLAGS_STRING_SUBCLASS: + if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS: return PyBytesObjectPtr if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS: return PyUnicodeObjectPtr @@ -355,6 +355,8 @@ class PyObjectPtr(object): def as_address(self): return long(self._gdbval) +class PyVarObjectPtr(PyObjectPtr): + _typename = 'PyVarObject' class ProxyAlreadyVisited(object): ''' @@ -515,20 +517,6 @@ class PyBaseExceptionObjectPtr(PyObjectPtr): out.write(self.safe_tp_name()) self.write_field_repr('args', out, visited) -class PyBoolObjectPtr(PyObjectPtr): - """ - Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two - <bool> instances (Py_True/Py_False) within the process being debugged. - """ - _typename = 'PyBoolObject' - - def proxyval(self, visited): - if int_from_int(self.field('ob_ival')): - return True - else: - return False - - class PyClassObjectPtr(PyObjectPtr): """ Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj> @@ -691,13 +679,6 @@ class PyInstanceObjectPtr(PyObjectPtr): _write_instance_repr(out, visited, cl_name, pyop_in_dict, self.as_address()) -class PyIntObjectPtr(PyObjectPtr): - _typename = 'PyIntObject' - - def proxyval(self, visited): - result = int_from_int(self.field('ob_ival')) - return result - class PyListObjectPtr(PyObjectPtr): _typename = 'PyListObject' @@ -770,6 +751,22 @@ class PyLongObjectPtr(PyObjectPtr): result = -result return result + def write_repr(self, out, visited): + # Write this out as a Python 3 int literal, i.e. without the "L" suffix + proxy = self.proxyval(visited) + out.write("%s" % proxy) + + +class PyBoolObjectPtr(PyLongObjectPtr): + """ + Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two + <bool> instances (Py_True/Py_False) within the process being debugged. + """ + def proxyval(self, visited): + if PyLongObjectPtr.proxyval(self, visited): + return True + else: + return False class PyNoneStructPtr(PyObjectPtr): """ @@ -894,9 +891,9 @@ class PyFrameObjectPtr(PyObjectPtr): return out.write('Frame 0x%x, for file %s, line %i, in %s (' % (self.as_address(), - self.co_filename, + self.co_filename.proxyval(visited), self.current_line_num(), - self.co_name)) + self.co_name.proxyval(visited))) first = True for pyop_name, pyop_value in self.iter_locals(): if not first: @@ -933,7 +930,8 @@ class PySetObjectPtr(PyObjectPtr): return set(members) def write_repr(self, out, visited): - out.write(self.safe_tp_name()) + # Emulate Python 3's set_repr + tp_name = self.safe_tp_name() # Guard against infinite loops: if self.as_address() in visited: @@ -941,7 +939,18 @@ class PySetObjectPtr(PyObjectPtr): return visited.add(self.as_address()) - out.write('([') + # Python 3's set_repr special-cases the empty set: + if not self.field('used'): + out.write(tp_name) + out.write('()') + return + + # Python 3 uses {} for set literals: + if tp_name != 'set': + out.write(tp_name) + out.write('(') + + out.write('{') first = True table = self.field('table') for i in safe_range(self.field('mask')+1): @@ -955,7 +964,10 @@ class PySetObjectPtr(PyObjectPtr): out.write(', ') first = False pyop_key.write_repr(out, visited) - out.write('])') + out.write('}') + + if tp_name != 'set': + out.write(')') class PyBytesObjectPtr(PyObjectPtr): @@ -970,6 +982,37 @@ class PyBytesObjectPtr(PyObjectPtr): def proxyval(self, visited): return str(self) + def write_repr(self, out, visited): + # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix + + # Get a PyStringObject* within the Python 2 gdb process: + proxy = self.proxyval(visited) + + # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr + # to Python 2 code: + quote = "'" + if "'" in proxy and not '"' in proxy: + quote = '"' + out.write('b') + out.write(quote) + for byte in proxy: + if byte == quote or byte == '\\': + out.write('\\') + out.write(byte) + elif byte == '\t': + out.write('\\t') + elif byte == '\n': + out.write('\\n') + elif byte == '\r': + out.write('\\r') + elif byte < ' ' or ord(byte) >= 0x7f: + out.write('\\x') + out.write(hexdigits[(ord(byte) & 0xf0) >> 4]) + out.write(hexdigits[ord(byte) & 0xf]) + else: + out.write(byte) + out.write(quote) + class PyTupleObjectPtr(PyObjectPtr): _typename = 'PyTupleObject' @@ -1010,9 +1053,21 @@ class PyTypeObjectPtr(PyObjectPtr): _typename = 'PyTypeObject' +def _unichr_is_printable(char): + # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py + if char == u" ": + return True + import unicodedata + return unicodedata.category(char)[0] not in ("C", "Z") + + class PyUnicodeObjectPtr(PyObjectPtr): _typename = 'PyUnicodeObject' + def char_width(self): + _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE') + return _type_Py_UNICODE.sizeof + def proxyval(self, visited): # From unicodeobject.h: # Py_ssize_t length; /* Length of raw Unicode data in buffer */ @@ -1029,6 +1084,102 @@ class PyUnicodeObjectPtr(PyObjectPtr): result = u''.join([unichr(ucs) for ucs in Py_UNICODEs]) return result + def write_repr(self, out, visited): + # Write this out as a Python 3 str literal, i.e. without a "u" prefix + + # Get a PyUnicodeObject* within the Python 2 gdb process: + proxy = self.proxyval(visited) + + # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr + # to Python 2: + if "'" in proxy and '"' not in proxy: + quote = '"' + else: + quote = "'" + out.write(quote) + + i = 0 + while i < len(proxy): + ch = proxy[i] + i += 1 + + # Escape quotes and backslashes + if ch == quote or ch == '\\': + out.write('\\') + out.write(ch) + + # Map special whitespace to '\t', \n', '\r' + elif ch == '\t': + out.write('\\t') + elif ch == '\n': + out.write('\\n') + elif ch == '\r': + out.write('\\r') + + # Map non-printable US ASCII to '\xhh' */ + elif ch < ' ' or ch == 0x7F: + out.write('\\x') + out.write(hexdigits[(ord(ch) >> 4) & 0x000F]) + out.write(hexdigits[ord(ch) & 0x000F]) + + # Copy ASCII characters as-is + elif ord(ch) < 0x7F: + out.write(ch) + + # Non-ASCII characters + else: + ucs = ch; + + if self.char_width == 2: + ch2 = 0 + # Get code point from surrogate pair + if i < len(proxy): + ch2 = proxy[i] + if (ord(ch) >= 0xD800 and ord(ch) < 0xDC00 + and ord(ch2) >= 0xDC00 and ord(ch2) <= 0xDFFF): + ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; + i += 1 + + # Map Unicode whitespace and control characters + # (categories Z* and C* except ASCII space) + if not _unichr_is_printable(ucs): + # Unfortuately, Python 2's unicode type doesn't seem + # to expose the "isprintable" method + + # Map 8-bit characters to '\\xhh' + if ucs <= 0xff: + out.write('\\x') + out.write(hexdigits[(ord(ucs) >> 4) & 0x000F]) + out.write(hexdigits[ord(ucs) & 0x000F]) + # Map 21-bit characters to '\U00xxxxxx' + elif ucs >= 0x10000: + out.write('\\U') + out.write(hexdigits[(ord(ucs) >> 28) & 0x0000000F]) + out.write(hexdigits[(ord(ucs) >> 24) & 0x0000000F]) + out.write(hexdigits[(ord(ucs) >> 20) & 0x0000000F]) + out.write(hexdigits[(ord(ucs) >> 16) & 0x0000000F]) + out.write(hexdigits[(ord(ucs) >> 12) & 0x0000000F]) + out.write(hexdigits[(ord(ucs) >> 8) & 0x0000000F]) + out.write(hexdigits[(ord(ucs) >> 4) & 0x0000000F]) + out.write(hexdigits[ord(ucs) & 0x0000000F]) + # Map 16-bit characters to '\uxxxx' + else: + out.write('\\u') + out.write(hexdigits[(ord(ucs) >> 12) & 0x000F]) + out.write(hexdigits[(ord(ucs) >> 8) & 0x000F]) + out.write(hexdigits[(ord(ucs) >> 4) & 0x000F]) + out.write(hexdigits[ord(ucs) & 0x000F]) + else: + # Copy characters as-is + out.write(ch) + if self.char_width == 2: + if ord(ucs) >= 0x10000: + out.write(ch2) + + out.write(quote) + + + def int_from_int(gdbval): return int(str(gdbval)) @@ -1065,7 +1216,7 @@ def pretty_printer_lookup(gdbval): if type.code == gdb.TYPE_CODE_PTR: type = type.target().unqualified() t = str(type) - if t in ("PyObject", "PyFrameObject"): + if t in ("PyObject", "PyFrameObject", "PyUnicodeObject"): return PyObjectPtrPrinter(gdbval) """ |