summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_unicode.py13
-rw-r--r--Misc/NEWS3
-rw-r--r--Objects/unicodeobject.c10
3 files changed, 25 insertions, 1 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 96c15f0..4b2d055 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -393,6 +393,19 @@ class UnicodeTest(
self.assertEqual(u'%c' % 0x1234, u'\u1234')
self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
+ for num in range(0x00,0x80):
+ char = chr(num)
+ self.assertEqual(u"%c" % char, char)
+ self.assertEqual(u"%c" % num, char)
+ # Issue 7649
+ for num in range(0x80,0x100):
+ uchar = unichr(num)
+ self.assertEqual(uchar, u"%c" % num) # works only with ints
+ self.assertEqual(uchar, u"%c" % uchar) # and unicode chars
+ # the implicit decoding should fail for non-ascii chars
+ self.assertRaises(UnicodeDecodeError, u"%c".__mod__, chr(num))
+ self.assertRaises(UnicodeDecodeError, u"%s".__mod__, chr(num))
+
# formatting jobs delegated from the string implementation:
self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
diff --git a/Misc/NEWS b/Misc/NEWS
index 8d8a66b..75c8f5c 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 2.6.5
Core and Builtins
-----------------
+- Issue #7649: Fix u'%c' % char for character in range 0x80..0xFF, raise an
+ UnicodeDecodeError
+
- Issue #5677: Explicitly forbid write operations on read-only file objects,
and read operations on write-only file objects. On Windows, the system C
library would return a bogus result; on Solaris, it was possible to crash
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 3731ac7..667afae 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8357,6 +8357,7 @@ formatchar(Py_UNICODE *buf,
size_t buflen,
PyObject *v)
{
+ PyObject *s;
/* presume that the buffer is at least 2 characters long */
if (PyUnicode_Check(v)) {
if (PyUnicode_GET_SIZE(v) != 1)
@@ -8367,7 +8368,14 @@ formatchar(Py_UNICODE *buf,
else if (PyString_Check(v)) {
if (PyString_GET_SIZE(v) != 1)
goto onError;
- buf[0] = (Py_UNICODE)PyString_AS_STRING(v)[0];
+ /* #7649: if the char is a non-ascii (i.e. in range(0x80,0x100)) byte
+ string, "u'%c' % char" should fail with a UnicodeDecodeError */
+ s = PyUnicode_FromStringAndSize(PyString_AS_STRING(v), 1);
+ /* if the char is not decodable return -1 */
+ if (s == NULL)
+ return -1;
+ buf[0] = PyUnicode_AS_UNICODE(s)[0];
+ Py_DECREF(s);
}
else {