summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2010-09-08 20:57:48 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2010-09-08 20:57:48 (GMT)
commitb41e128fe1e2a511748926d0837d1a87f090b9a9 (patch)
tree1f27cccacc9099c65e35634089a18a714d4d2d38
parent63b17671f00aafefc01c9b6d541d48c842e523b7 (diff)
downloadcpython-b41e128fe1e2a511748926d0837d1a87f090b9a9.zip
cpython-b41e128fe1e2a511748926d0837d1a87f090b9a9.tar.gz
cpython-b41e128fe1e2a511748926d0837d1a87f090b9a9.tar.bz2
Issue #9188: The gdb extension now handles correctly narrow (UCS2) as well
as wide (UCS4) unicode builds for both the host interpreter (embedded inside gdb) and the interpreter under test.
-rw-r--r--Misc/NEWS7
-rw-r--r--Tools/gdb/libpython.py56
2 files changed, 50 insertions, 13 deletions
diff --git a/Misc/NEWS b/Misc/NEWS
index 894983a..148ee79 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -76,6 +76,13 @@ Library
guaranteed to exist in all Python implementations and the names of hash
algorithms available in the current process.
+Tools/Demos
+-----------
+
+- Issue #9188: The gdb extension now handles correctly narrow (UCS2) as well
+ as wide (UCS4) unicode builds for both the host interpreter (embedded
+ inside gdb) and the interpreter under test.
+
Build
-----
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py
index b23a22e..79f21e3 100644
--- a/Tools/gdb/libpython.py
+++ b/Tools/gdb/libpython.py
@@ -1065,7 +1065,19 @@ def _unichr_is_printable(char):
if char == u" ":
return True
import unicodedata
- return unicodedata.category(char)[0] not in ("C", "Z")
+ return unicodedata.category(char) not in ("C", "Z")
+
+if sys.maxunicode >= 0x10000:
+ _unichr = unichr
+else:
+ # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
+ def _unichr(x):
+ if x < 0x10000:
+ return unichr(x)
+ x -= 0x10000
+ ch1 = 0xD800 | (x >> 10)
+ ch2 = 0xDC00 | (x & 0x3FF)
+ return unichr(ch1) + unichr(ch2)
class PyUnicodeObjectPtr(PyObjectPtr):
@@ -1084,11 +1096,33 @@ class PyUnicodeObjectPtr(PyObjectPtr):
# Gather a list of ints from the Py_UNICODE array; these are either
# UCS-2 or UCS-4 code points:
- Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
+ if self.char_width() > 2:
+ Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
+ else:
+ # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
+ # inferior process: we must join surrogate pairs.
+ Py_UNICODEs = []
+ i = 0
+ while i < field_length:
+ ucs = int(field_str[i])
+ i += 1
+ if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
+ Py_UNICODEs.append(ucs)
+ continue
+ # This could be a surrogate pair.
+ ucs2 = int(field_str[i])
+ if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
+ continue
+ code = (ucs & 0x03FF) << 10
+ code |= ucs2 & 0x03FF
+ code += 0x00010000
+ Py_UNICODEs.append(code)
+ i += 1
# Convert the int code points to unicode characters, and generate a
- # local unicode instance:
- result = u''.join([unichr(ucs) for ucs in Py_UNICODEs])
+ # local unicode instance.
+ # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
+ result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
return result
def write_repr(self, out, visited):
@@ -1137,20 +1171,16 @@ class PyUnicodeObjectPtr(PyObjectPtr):
else:
ucs = ch
orig_ucs = None
+ ch2 = None
if self.char_width() == 2:
- # Get code point from surrogate pair
+ # If sizeof(Py_UNICODE) is 2 here (in gdb), join
+ # surrogate pairs before calling _unichr_is_printable.
if (i < len(proxy)
and 0xD800 <= ord(ch) < 0xDC00 \
and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
ch2 = proxy[i]
- code = (ord(ch) & 0x03FF) << 10
- code |= ord(ch2) & 0x03FF
- code += 0x00010000
- orig_ucs = ucs
- ucs = unichr(code)
+ ucs = ch + ch2
i += 1
- else:
- ch2 = None
printable = _unichr_is_printable(ucs)
if printable:
@@ -1195,7 +1225,7 @@ class PyUnicodeObjectPtr(PyObjectPtr):
else:
# Copy characters as-is
out.write(ch)
- if self.char_width() == 2 and (ch2 is not None):
+ if ch2 is not None:
out.write(ch2)
out.write(quote)