diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2010-05-20 11:29:45 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2010-05-20 11:29:45 (GMT) |
commit | b1556c537d7c49978fa40594a9c9f40c6f88cdde (patch) | |
tree | dee0eb7ce8283451d3e407ce1b299094e93acafd /Tools | |
parent | 8f692275e9c2cd4e5f7959328f6f9da8538ffe9e (diff) | |
download | cpython-b1556c537d7c49978fa40594a9c9f40c6f88cdde.zip cpython-b1556c537d7c49978fa40594a9c9f40c6f88cdde.tar.gz cpython-b1556c537d7c49978fa40594a9c9f40c6f88cdde.tar.bz2 |
libpython.py: fix support of non-BMP unicode characters
Forward port some code from Python3:
* join surrogate pairs if sizeof(Py_UNICODE)==2
* Enable non-BMP test on narrow builds using u"\U0001D121" instead of
unichr(0x1D121)
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/gdb/libpython.py | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index f62735f..3481f71 100644 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -1013,6 +1013,10 @@ class PyTypeObjectPtr(PyObjectPtr): class PyUnicodeObjectPtr(PyObjectPtr): _typename = 'PyUnicodeObject' + def char_width(self): + _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE') + return _type_Py_UNICODE.sizeof + def proxyval(self, visited): # From unicodeobject.h: # Py_ssize_t length; /* Length of raw Unicode data in buffer */ @@ -1029,6 +1033,30 @@ class PyUnicodeObjectPtr(PyObjectPtr): result = u''.join([unichr(ucs) for ucs in Py_UNICODEs]) return result + def write_repr(self, out, visited): + proxy = self.proxyval(visited) + if self.char_width() == 2: + # sizeof(Py_UNICODE)==2: join surrogates + proxy2 = [] + i = 0 + while i < len(proxy): + ch = proxy[i] + i += 1 + if (i < len(proxy) + and 0xD800 <= ord(ch) < 0xDC00 \ + and 0xDC00 <= ord(proxy[i]) <= 0xDFFF): + # Get code point from surrogate pair + ch2 = proxy[i] + code = (ord(ch) & 0x03FF) << 10 + code |= ord(ch2) & 0x03FF + code += 0x00010000 + i += 1 + proxy2.append(unichr(code)) + else: + proxy2.append(ch) + proxy = u''.join(proxy2) + out.write(repr(proxy)) + def int_from_int(gdbval): return int(str(gdbval)) |