summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_gdb.py25
-rw-r--r--Tools/gdb/libpython.py75
2 files changed, 61 insertions, 39 deletions
diff --git a/Lib/test/test_gdb.py b/Lib/test/test_gdb.py
index 15d8034..e5d1222 100644
--- a/Lib/test/test_gdb.py
+++ b/Lib/test/test_gdb.py
@@ -8,6 +8,7 @@ import re
import subprocess
import sys
import unittest
+import locale
from test.support import run_unittest, findfile
@@ -177,7 +178,7 @@ class PrettyPrintTests(DebuggerTests):
def assertGdbRepr(self, val, exp_repr=None, cmds_after_breakpoint=None):
# Ensure that gdb's rendering of the value in a debugged process
# matches repr(value) in this process:
- gdb_repr, gdb_output = self.get_gdb_repr('id(' + repr(val) + ')',
+ gdb_repr, gdb_output = self.get_gdb_repr('id(' + ascii(val) + ')',
cmds_after_breakpoint)
if not exp_repr:
exp_repr = repr(val)
@@ -226,31 +227,35 @@ class PrettyPrintTests(DebuggerTests):
def test_strings(self):
'Verify the pretty-printing of unicode strings'
+ encoding = locale.getpreferredencoding()
+ def check_repr(text):
+ try:
+ text.encode(encoding)
+ printable = True
+ except UnicodeEncodeError:
+ self.assertGdbRepr(text, ascii(text))
+ else:
+ self.assertGdbRepr(text)
+
self.assertGdbRepr('')
self.assertGdbRepr('And now for something hopefully the same')
self.assertGdbRepr('string with embedded NUL here \0 and then some more text')
# Test printing a single character:
# U+2620 SKULL AND CROSSBONES
- self.assertGdbRepr('\u2620')
+ check_repr('\u2620')
# Test printing a Japanese unicode string
# (I believe this reads "mojibake", using 3 characters from the CJK
# Unified Ideographs area, followed by U+3051 HIRAGANA LETTER KE)
- self.assertGdbRepr('\u6587\u5b57\u5316\u3051')
+ check_repr('\u6587\u5b57\u5316\u3051')
# Test a character outside the BMP:
# U+1D121 MUSICAL SYMBOL C CLEF
# This is:
# UTF-8: 0xF0 0x9D 0x84 0xA1
# UTF-16: 0xD834 0xDD21
- if sys.maxunicode == 0x10FFFF:
- # wide unicode:
- self.assertGdbRepr(chr(0x1D121))
- else:
- # narrow unicode:
- self.assertGdbRepr(chr(0x1D121),
- "'\\U0000d834\\U0000dd21'")
+ check_repr(chr(0x1D121))
def test_tuples(self):
'Verify the pretty-printing of tuples'
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py
index 426fb7b..6ed6087 100644
--- a/Tools/gdb/libpython.py
+++ b/Tools/gdb/libpython.py
@@ -42,6 +42,7 @@ The module also extends gdb with some python-specific commands.
'''
from __future__ import with_statement
import gdb
+import locale
# Look up the gdb.Type for some standard types:
_type_char_ptr = gdb.lookup_type('char').pointer() # char*
@@ -69,6 +70,7 @@ MAX_OUTPUT_LEN=1024
hexdigits = "0123456789abcdef"
+ENCODING = locale.getpreferredencoding()
class NullPyObjectPtr(RuntimeError):
pass
@@ -1128,53 +1130,68 @@ class PyUnicodeObjectPtr(PyObjectPtr):
# Non-ASCII characters
else:
- ucs = ch;
-
- if self.char_width == 2:
- ch2 = 0
+ ucs = ch
+ orig_ucs = None
+ if self.char_width() == 2:
# Get code point from surrogate pair
- if i < len(proxy):
+ if (i < len(proxy)
+ and 0xD800 <= ord(ch) < 0xDC00 \
+ and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
ch2 = proxy[i]
- if (ord(ch) >= 0xD800 and ord(ch) < 0xDC00
- and ord(ch2) >= 0xDC00 and ord(ch2) <= 0xDFFF):
- ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
- i += 1
+ code = (ord(ch) & 0x03FF) << 10
+ code |= ord(ch2) & 0x03FF
+ code += 0x00010000
+ orig_ucs = ucs
+ ucs = unichr(code)
+ i += 1
+ else:
+ ch2 = None
+
+ printable = _unichr_is_printable(ucs)
+ if printable:
+ try:
+ ucs.encode(ENCODING)
+ except UnicodeEncodeError:
+ printable = False
+ if orig_ucs is not None:
+ ucs = orig_ucs
+ i -= 1
# Map Unicode whitespace and control characters
# (categories Z* and C* except ASCII space)
- if not _unichr_is_printable(ucs):
+ if not printable:
# Unfortuately, Python 2's unicode type doesn't seem
# to expose the "isprintable" method
+ code = ord(ucs)
# Map 8-bit characters to '\\xhh'
- if ucs <= 0xff:
+ if code <= 0xff:
out.write('\\x')
- out.write(hexdigits[(ord(ucs) >> 4) & 0x000F])
- out.write(hexdigits[ord(ucs) & 0x000F])
+ out.write(hexdigits[(code >> 4) & 0x000F])
+ out.write(hexdigits[code & 0x000F])
# Map 21-bit characters to '\U00xxxxxx'
- elif ucs >= 0x10000:
+ elif code >= 0x10000:
out.write('\\U')
- out.write(hexdigits[(ord(ucs) >> 28) & 0x0000000F])
- out.write(hexdigits[(ord(ucs) >> 24) & 0x0000000F])
- out.write(hexdigits[(ord(ucs) >> 20) & 0x0000000F])
- out.write(hexdigits[(ord(ucs) >> 16) & 0x0000000F])
- out.write(hexdigits[(ord(ucs) >> 12) & 0x0000000F])
- out.write(hexdigits[(ord(ucs) >> 8) & 0x0000000F])
- out.write(hexdigits[(ord(ucs) >> 4) & 0x0000000F])
- out.write(hexdigits[ord(ucs) & 0x0000000F])
+ out.write(hexdigits[(code >> 28) & 0x0000000F])
+ out.write(hexdigits[(code >> 24) & 0x0000000F])
+ out.write(hexdigits[(code >> 20) & 0x0000000F])
+ out.write(hexdigits[(code >> 16) & 0x0000000F])
+ out.write(hexdigits[(code >> 12) & 0x0000000F])
+ out.write(hexdigits[(code >> 8) & 0x0000000F])
+ out.write(hexdigits[(code >> 4) & 0x0000000F])
+ out.write(hexdigits[code & 0x0000000F])
# Map 16-bit characters to '\uxxxx'
else:
out.write('\\u')
- out.write(hexdigits[(ord(ucs) >> 12) & 0x000F])
- out.write(hexdigits[(ord(ucs) >> 8) & 0x000F])
- out.write(hexdigits[(ord(ucs) >> 4) & 0x000F])
- out.write(hexdigits[ord(ucs) & 0x000F])
+ out.write(hexdigits[(code >> 12) & 0x000F])
+ out.write(hexdigits[(code >> 8) & 0x000F])
+ out.write(hexdigits[(code >> 4) & 0x000F])
+ out.write(hexdigits[code & 0x000F])
else:
# Copy characters as-is
out.write(ch)
- if self.char_width == 2:
- if ord(ucs) >= 0x10000:
- out.write(ch2)
+ if self.char_width() == 2 and (ch2 is not None):
+ out.write(ch2)
out.write(quote)