diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2010-09-08 21:12:36 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2010-09-08 21:12:36 (GMT) |
commit | 2fba0b3dfcd37d26e5c36987e87b099b7ecf1b4e (patch) | |
tree | 680f38665d62ff8d1a970420660caf78fde2767b /Tools | |
parent | 0a7b65b7da3df16bc8c81b3654756d64eb28ff80 (diff) | |
download | cpython-2fba0b3dfcd37d26e5c36987e87b099b7ecf1b4e.zip cpython-2fba0b3dfcd37d26e5c36987e87b099b7ecf1b4e.tar.gz cpython-2fba0b3dfcd37d26e5c36987e87b099b7ecf1b4e.tar.bz2 |
Merged revisions 84635-84636 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k
........
r84635 | antoine.pitrou | 2010-09-08 22:57:48 +0200 (mer., 08 sept. 2010) | 5 lines
Issue #9188: The gdb extension now handles correctly narrow (UCS2) as well
as wide (UCS4) unicode builds for both the host interpreter (embedded
inside gdb) and the interpreter under test.
........
r84636 | antoine.pitrou | 2010-09-08 23:07:40 +0200 (mer., 08 sept. 2010) | 4 lines
Add a safety limit to the number of unicode characters we fetch
(followup to r84635, suggested by Dave Malcolm).
........
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/gdb/libpython.py | 65 |
1 files changed, 38 insertions, 27 deletions
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index 22c0066..21e74d8 100644 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -1011,6 +1011,18 @@ class PyTypeObjectPtr(PyObjectPtr): _typename = 'PyTypeObject' +if sys.maxunicode >= 0x10000: + _unichr = unichr +else: + # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb + def _unichr(x): + if x < 0x10000: + return unichr(x) + x -= 0x10000 + ch1 = 0xD800 | (x >> 10) + ch2 = 0xDC00 | (x & 0x3FF) + return unichr(ch1) + unichr(ch2) + class PyUnicodeObjectPtr(PyObjectPtr): _typename = 'PyUnicodeObject' @@ -1027,37 +1039,36 @@ class PyUnicodeObjectPtr(PyObjectPtr): # Gather a list of ints from the Py_UNICODE array; these are either # UCS-2 or UCS-4 code points: - Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] + if self.char_width() > 2: + Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] + else: + # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the + # inferior process: we must join surrogate pairs. + Py_UNICODEs = [] + i = 0 + limit = safety_limit(field_length) + while i < limit: + ucs = int(field_str[i]) + i += 1 + if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length: + Py_UNICODEs.append(ucs) + continue + # This could be a surrogate pair. + ucs2 = int(field_str[i]) + if ucs2 < 0xDC00 or ucs2 > 0xDFFF: + continue + code = (ucs & 0x03FF) << 10 + code |= ucs2 & 0x03FF + code += 0x00010000 + Py_UNICODEs.append(code) + i += 1 # Convert the int code points to unicode characters, and generate a - # local unicode instance: - result = u''.join([unichr(ucs) for ucs in Py_UNICODEs]) + # local unicode instance. + # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb). + result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs]) return result - def write_repr(self, out, visited): - proxy = self.proxyval(visited) - if self.char_width() == 2: - # sizeof(Py_UNICODE)==2: join surrogates - proxy2 = [] - i = 0 - while i < len(proxy): - ch = proxy[i] - i += 1 - if (i < len(proxy) - and 0xD800 <= ord(ch) < 0xDC00 \ - and 0xDC00 <= ord(proxy[i]) <= 0xDFFF): - # Get code point from surrogate pair - ch2 = proxy[i] - code = (ord(ch) & 0x03FF) << 10 - code |= ord(ch2) & 0x03FF - code += 0x00010000 - i += 1 - proxy2.append(unichr(code)) - else: - proxy2.append(ch) - proxy = u''.join(proxy2) - out.write(repr(proxy)) - def int_from_int(gdbval): return int(str(gdbval)) |