1 files changed, 39 insertions, 4 deletions
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py
index ca1dc60..4b42c8b 100644
--- a/Tools/gdb/libpython.py
+++ b/Tools/gdb/libpython.py
@@ -50,6 +50,10 @@ _type_char_ptr = gdb.lookup_type('char').pointer() # char*
 _type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char*
 _type_void_ptr = gdb.lookup_type('void').pointer() # void*
 _type_size_t = gdb.lookup_type('size_t')
+_type_unsigned_short_ptr = gdb.lookup_type('unsigned short').pointer()
+_type_unsigned_int_ptr = gdb.lookup_type('unsigned int').pointer()
+
+_is_pep393 = 'data' in [f.name for f in gdb.lookup_type('PyUnicodeObject').target().fields()]
 
 SIZEOF_VOID_P = _type_void_ptr.sizeof
 
@@ -1122,12 +1126,43 @@ class PyUnicodeObjectPtr(PyObjectPtr):
         # From unicodeobject.h:
         #     Py_ssize_t length;  /* Length of raw Unicode data in buffer */
         #     Py_UNICODE *str;    /* Raw Unicode buffer */
-        field_length = long(self.field('length'))
-        field_str = self.field('str')
+        if _is_pep393:
+            # Python 3.3 and newer
+            may_have_surrogates = False
+            compact = self.field('_base')
+            ascii = compact['_base']
+            state = ascii['state']
+            is_compact_ascii = (int(state['ascii']) and int(state['compact']))
+            field_length = long(ascii['length'])
+            if not int(state['ready']):
+                # string is not ready
+                may_have_surrogates = True
+                field_str = ascii['wstr']
+                if not is_compact_ascii:
+                    field_length = compact('wstr_length')
+            else:
+                if is_compact_ascii:
+                    field_str = ascii.address + 1
+                elif int(state['compact']):
+                    field_str = compact.address + 1
+                else:
+                    field_str = self.field('data')['any']
+                repr_kind = int(state['kind'])
+                if repr_kind == 1:
+                    field_str = field_str.cast(_type_unsigned_char_ptr)
+                elif repr_kind == 2:
+                    field_str = field_str.cast(_type_unsigned_short_ptr)
+                elif repr_kind == 3:
+                    field_str = field_str.cast(_type_unsigned_int_ptr)
+        else:
+            # Python 3.2 and earlier
+            field_length = long(self.field('length'))
+            field_str = self.field('str')
+            may_have_surrogates = self.char_width() == 2
 
         # Gather a list of ints from the Py_UNICODE array; these are either
-        # UCS-2 or UCS-4 code points:
-        if self.char_width() > 2:
+        # UCS-1, UCS-2 or UCS-4 code points:
+        if not may_have_surrogates:
             Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
         else:
             # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the