summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2010-05-20 11:29:45 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2010-05-20 11:29:45 (GMT)
commitb1556c537d7c49978fa40594a9c9f40c6f88cdde (patch)
treedee0eb7ce8283451d3e407ce1b299094e93acafd /Tools
parent8f692275e9c2cd4e5f7959328f6f9da8538ffe9e (diff)
downloadcpython-b1556c537d7c49978fa40594a9c9f40c6f88cdde.zip
cpython-b1556c537d7c49978fa40594a9c9f40c6f88cdde.tar.gz
cpython-b1556c537d7c49978fa40594a9c9f40c6f88cdde.tar.bz2
libpython.py: fix support of non-BMP unicode characters
Forward port some code from Python3: * join surrogate pairs if sizeof(Py_UNICODE)==2 * Enable non-BMP test on narrow builds using u"\U0001D121" instead of unichr(0x1D121)
Diffstat (limited to 'Tools')
-rw-r--r--Tools/gdb/libpython.py28
1 files changed, 28 insertions, 0 deletions
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py
index f62735f..3481f71 100644
--- a/Tools/gdb/libpython.py
+++ b/Tools/gdb/libpython.py
@@ -1013,6 +1013,10 @@ class PyTypeObjectPtr(PyObjectPtr):
class PyUnicodeObjectPtr(PyObjectPtr):
_typename = 'PyUnicodeObject'
+ def char_width(self):
+ _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
+ return _type_Py_UNICODE.sizeof
+
def proxyval(self, visited):
# From unicodeobject.h:
# Py_ssize_t length; /* Length of raw Unicode data in buffer */
@@ -1029,6 +1033,30 @@ class PyUnicodeObjectPtr(PyObjectPtr):
result = u''.join([unichr(ucs) for ucs in Py_UNICODEs])
return result
+ def write_repr(self, out, visited):
+ proxy = self.proxyval(visited)
+ if self.char_width() == 2:
+ # sizeof(Py_UNICODE)==2: join surrogates
+ proxy2 = []
+ i = 0
+ while i < len(proxy):
+ ch = proxy[i]
+ i += 1
+ if (i < len(proxy)
+ and 0xD800 <= ord(ch) < 0xDC00 \
+ and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
+ # Get code point from surrogate pair
+ ch2 = proxy[i]
+ code = (ord(ch) & 0x03FF) << 10
+ code |= ord(ch2) & 0x03FF
+ code += 0x00010000
+ i += 1
+ proxy2.append(unichr(code))
+ else:
+ proxy2.append(ch)
+ proxy = u''.join(proxy2)
+ out.write(repr(proxy))
+
def int_from_int(gdbval):
return int(str(gdbval))