diff options
Diffstat (limited to 'Tools/gdb/libpython.py')
-rw-r--r-- | Tools/gdb/libpython.py | 43 |
1 files changed, 39 insertions, 4 deletions
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index ca1dc60..4b42c8b 100644 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -50,6 +50,10 @@ _type_char_ptr = gdb.lookup_type('char').pointer() # char* _type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char* _type_void_ptr = gdb.lookup_type('void').pointer() # void* _type_size_t = gdb.lookup_type('size_t') +_type_unsigned_short_ptr = gdb.lookup_type('unsigned short').pointer() +_type_unsigned_int_ptr = gdb.lookup_type('unsigned int').pointer() + +_is_pep393 = 'data' in [f.name for f in gdb.lookup_type('PyUnicodeObject').target().fields()] SIZEOF_VOID_P = _type_void_ptr.sizeof @@ -1122,12 +1126,43 @@ class PyUnicodeObjectPtr(PyObjectPtr): # From unicodeobject.h: # Py_ssize_t length; /* Length of raw Unicode data in buffer */ # Py_UNICODE *str; /* Raw Unicode buffer */ - field_length = long(self.field('length')) - field_str = self.field('str') + if _is_pep393: + # Python 3.3 and newer + may_have_surrogates = False + compact = self.field('_base') + ascii = compact['_base'] + state = ascii['state'] + is_compact_ascii = (int(state['ascii']) and int(state['compact'])) + field_length = long(ascii['length']) + if not int(state['ready']): + # string is not ready + may_have_surrogates = True + field_str = ascii['wstr'] + if not is_compact_ascii: + field_length = compact('wstr_length') + else: + if is_compact_ascii: + field_str = ascii.address + 1 + elif int(state['compact']): + field_str = compact.address + 1 + else: + field_str = self.field('data')['any'] + repr_kind = int(state['kind']) + if repr_kind == 1: + field_str = field_str.cast(_type_unsigned_char_ptr) + elif repr_kind == 2: + field_str = field_str.cast(_type_unsigned_short_ptr) + elif repr_kind == 3: + field_str = field_str.cast(_type_unsigned_int_ptr) + else: + # Python 3.2 and earlier + field_length = long(self.field('length')) + field_str = self.field('str') + may_have_surrogates = self.char_width() == 2 # Gather a list of ints from the Py_UNICODE array; these are either - # UCS-2 or UCS-4 code points: - if self.char_width() > 2: + # UCS-1, UCS-2 or UCS-4 code points: + if not may_have_surrogates: Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] else: # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the |