summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2010-09-08 21:12:36 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2010-09-08 21:12:36 (GMT)
commit2fba0b3dfcd37d26e5c36987e87b099b7ecf1b4e (patch)
tree680f38665d62ff8d1a970420660caf78fde2767b /Tools
parent0a7b65b7da3df16bc8c81b3654756d64eb28ff80 (diff)
downloadcpython-2fba0b3dfcd37d26e5c36987e87b099b7ecf1b4e.zip
cpython-2fba0b3dfcd37d26e5c36987e87b099b7ecf1b4e.tar.gz
cpython-2fba0b3dfcd37d26e5c36987e87b099b7ecf1b4e.tar.bz2
Merged revisions 84635-84636 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r84635 | antoine.pitrou | 2010-09-08 22:57:48 +0200 (mer., 08 sept. 2010) | 5 lines Issue #9188: The gdb extension now handles correctly narrow (UCS2) as well as wide (UCS4) unicode builds for both the host interpreter (embedded inside gdb) and the interpreter under test. ........ r84636 | antoine.pitrou | 2010-09-08 23:07:40 +0200 (mer., 08 sept. 2010) | 4 lines Add a safety limit to the number of unicode characters we fetch (followup to r84635, suggested by Dave Malcolm). ........
Diffstat (limited to 'Tools')
-rw-r--r--Tools/gdb/libpython.py65
1 files changed, 38 insertions, 27 deletions
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py
index 22c0066..21e74d8 100644
--- a/Tools/gdb/libpython.py
+++ b/Tools/gdb/libpython.py
@@ -1011,6 +1011,18 @@ class PyTypeObjectPtr(PyObjectPtr):
_typename = 'PyTypeObject'
+if sys.maxunicode >= 0x10000:
+ _unichr = unichr
+else:
+ # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
+ def _unichr(x):
+ if x < 0x10000:
+ return unichr(x)
+ x -= 0x10000
+ ch1 = 0xD800 | (x >> 10)
+ ch2 = 0xDC00 | (x & 0x3FF)
+ return unichr(ch1) + unichr(ch2)
+
class PyUnicodeObjectPtr(PyObjectPtr):
_typename = 'PyUnicodeObject'
@@ -1027,37 +1039,36 @@ class PyUnicodeObjectPtr(PyObjectPtr):
# Gather a list of ints from the Py_UNICODE array; these are either
# UCS-2 or UCS-4 code points:
- Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
+ if self.char_width() > 2:
+ Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
+ else:
+ # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
+ # inferior process: we must join surrogate pairs.
+ Py_UNICODEs = []
+ i = 0
+ limit = safety_limit(field_length)
+ while i < limit:
+ ucs = int(field_str[i])
+ i += 1
+ if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
+ Py_UNICODEs.append(ucs)
+ continue
+ # This could be a surrogate pair.
+ ucs2 = int(field_str[i])
+ if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
+ continue
+ code = (ucs & 0x03FF) << 10
+ code |= ucs2 & 0x03FF
+ code += 0x00010000
+ Py_UNICODEs.append(code)
+ i += 1
# Convert the int code points to unicode characters, and generate a
- # local unicode instance:
- result = u''.join([unichr(ucs) for ucs in Py_UNICODEs])
+ # local unicode instance.
+ # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
+ result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
return result
- def write_repr(self, out, visited):
- proxy = self.proxyval(visited)
- if self.char_width() == 2:
- # sizeof(Py_UNICODE)==2: join surrogates
- proxy2 = []
- i = 0
- while i < len(proxy):
- ch = proxy[i]
- i += 1
- if (i < len(proxy)
- and 0xD800 <= ord(ch) < 0xDC00 \
- and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
- # Get code point from surrogate pair
- ch2 = proxy[i]
- code = (ord(ch) & 0x03FF) << 10
- code |= ord(ch2) & 0x03FF
- code += 0x00010000
- i += 1
- proxy2.append(unichr(code))
- else:
- proxy2.append(ch)
- proxy = u''.join(proxy2)
- out.write(repr(proxy))
-
def int_from_int(gdbval):
return int(str(gdbval))