diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2010-09-08 20:57:48 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2010-09-08 20:57:48 (GMT) |
commit | b41e128fe1e2a511748926d0837d1a87f090b9a9 (patch) | |
tree | 1f27cccacc9099c65e35634089a18a714d4d2d38 | |
parent | 63b17671f00aafefc01c9b6d541d48c842e523b7 (diff) | |
download | cpython-b41e128fe1e2a511748926d0837d1a87f090b9a9.zip cpython-b41e128fe1e2a511748926d0837d1a87f090b9a9.tar.gz cpython-b41e128fe1e2a511748926d0837d1a87f090b9a9.tar.bz2 |
Issue #9188: The gdb extension now handles correctly narrow (UCS2) as well
as wide (UCS4) unicode builds for both the host interpreter (embedded
inside gdb) and the interpreter under test.
-rw-r--r-- | Misc/NEWS | 7 | ||||
-rw-r--r-- | Tools/gdb/libpython.py | 56 |
2 files changed, 50 insertions, 13 deletions
@@ -76,6 +76,13 @@ Library guaranteed to exist in all Python implementations and the names of hash algorithms available in the current process. +Tools/Demos +----------- + +- Issue #9188: The gdb extension now handles correctly narrow (UCS2) as well + as wide (UCS4) unicode builds for both the host interpreter (embedded + inside gdb) and the interpreter under test. + Build ----- diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index b23a22e..79f21e3 100644 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -1065,7 +1065,19 @@ def _unichr_is_printable(char): if char == u" ": return True import unicodedata - return unicodedata.category(char)[0] not in ("C", "Z") + return unicodedata.category(char) not in ("C", "Z") + +if sys.maxunicode >= 0x10000: + _unichr = unichr +else: + # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb + def _unichr(x): + if x < 0x10000: + return unichr(x) + x -= 0x10000 + ch1 = 0xD800 | (x >> 10) + ch2 = 0xDC00 | (x & 0x3FF) + return unichr(ch1) + unichr(ch2) class PyUnicodeObjectPtr(PyObjectPtr): @@ -1084,11 +1096,33 @@ class PyUnicodeObjectPtr(PyObjectPtr): # Gather a list of ints from the Py_UNICODE array; these are either # UCS-2 or UCS-4 code points: - Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] + if self.char_width() > 2: + Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] + else: + # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the + # inferior process: we must join surrogate pairs. + Py_UNICODEs = [] + i = 0 + while i < field_length: + ucs = int(field_str[i]) + i += 1 + if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length: + Py_UNICODEs.append(ucs) + continue + # This could be a surrogate pair. + ucs2 = int(field_str[i]) + if ucs2 < 0xDC00 or ucs2 > 0xDFFF: + continue + code = (ucs & 0x03FF) << 10 + code |= ucs2 & 0x03FF + code += 0x00010000 + Py_UNICODEs.append(code) + i += 1 # Convert the int code points to unicode characters, and generate a - # local unicode instance: - result = u''.join([unichr(ucs) for ucs in Py_UNICODEs]) + # local unicode instance. + # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb). + result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs]) return result def write_repr(self, out, visited): @@ -1137,20 +1171,16 @@ class PyUnicodeObjectPtr(PyObjectPtr): else: ucs = ch orig_ucs = None + ch2 = None if self.char_width() == 2: - # Get code point from surrogate pair + # If sizeof(Py_UNICODE) is 2 here (in gdb), join + # surrogate pairs before calling _unichr_is_printable. if (i < len(proxy) and 0xD800 <= ord(ch) < 0xDC00 \ and 0xDC00 <= ord(proxy[i]) <= 0xDFFF): ch2 = proxy[i] - code = (ord(ch) & 0x03FF) << 10 - code |= ord(ch2) & 0x03FF - code += 0x00010000 - orig_ucs = ucs - ucs = unichr(code) + ucs = ch + ch2 i += 1 - else: - ch2 = None printable = _unichr_is_printable(ucs) if printable: @@ -1195,7 +1225,7 @@ class PyUnicodeObjectPtr(PyObjectPtr): else: # Copy characters as-is out.write(ch) - if self.char_width() == 2 and (ch2 is not None): + if ch2 is not None: out.write(ch2) out.write(quote) |