Fix PyUnicode_FromFormatV("%c") for non-BMP char

Issue #10830: Fix PyUnicode_FromFormatV("%c") for non-BMP characters on narrow build.
author: Victor Stinner <victor.stinner@haypocalc.com> 2011-02-21 21:13:44 (GMT)
committer: Victor Stinner <victor.stinner@haypocalc.com> 2011-02-21 21:13:44 (GMT)
commit: 5ed8b2c737a71d6fd56757bd9fe108f2cf886664 (patch)
tree: 5dfc9fd7b3f19fcd1deb7c11e9367f4834c8e0b5
parent: ed4c71112a07a2adad9cb26b9b2d790725443b1f (diff)
download: cpython-5ed8b2c737a71d6fd56757bd9fe108f2cf886664.zip
cpython-5ed8b2c737a71d6fd56757bd9fe108f2cf886664.tar.gz
cpython-5ed8b2c737a71d6fd56757bd9fe108f2cf886664.tar.bz2
3 files changed, 30 insertions, 3 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index e2d67cd..9ad9eed 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1427,7 +1427,7 @@ class UnicodeTest(string_tests.CommonTest,
     # Test PyUnicode_FromFormat()
     def test_from_format(self):
         support.import_module('ctypes')
-        from ctypes import pythonapi, py_object
+        from ctypes import pythonapi, py_object, c_int
         if sys.maxunicode == 65535:
             name = "PyUnicodeUCS2_FromFormat"
         else:
@@ -1452,6 +1452,9 @@ class UnicodeTest(string_tests.CommonTest,
             'string, got a non-ASCII byte: 0xe9$',
             PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
 
+        self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd')
+        self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff')
+
         # other tests
         text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
         self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
diff --git a/Misc/NEWS b/Misc/NEWS
index 7b55d20..5f2f99c 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1?
 Core and Builtins
 -----------------
 
+- Issue #10830: Fix PyUnicode_FromFormatV("%c") for non-BMP characters on
+  narrow build.
+
 - Issue #11168: Remove filename debug variable from PyEval_EvalFrameEx().
   It encoded the Unicode filename to UTF-8, but the encoding fails on
   undecodable filename (on surrogate characters) which raises an unexpected
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 069be7b..57baebd 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -813,8 +813,19 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
 
             switch (*f) {
             case 'c':
+            {
+#ifndef Py_UNICODE_WIDE
+                int ordinal = va_arg(count, int);
+                if (ordinal > 0xffff)
+                    n += 2;
+                else
+                    n++;
+#else
                 (void)va_arg(count, int);
-                /* fall through... */
+                n++;
+#endif
+                break;
+            }
             case '%':
                 n++;
                 break;
@@ -992,8 +1003,18 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
 
             switch (*f) {
             case 'c':
-                *s++ = va_arg(vargs, int);
+            {
+                int ordinal = va_arg(vargs, int);
+#ifndef Py_UNICODE_WIDE
+                if (ordinal > 0xffff) {
+                    ordinal -= 0x10000;
+                    *s++ = 0xD800 | (ordinal >> 10);
+                    *s++ = 0xDC00 | (ordinal & 0x3FF);
+                } else
+#endif
+                *s++ = ordinal;
                 break;
+            }
             case 'd':
                 makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
                         width, precision, 'd');
author	Victor Stinner <victor.stinner@haypocalc.com>	2011-02-21 21:13:44 (GMT)
committer	Victor Stinner <victor.stinner@haypocalc.com>	2011-02-21 21:13:44 (GMT)
commit	5ed8b2c737a71d6fd56757bd9fe108f2cf886664 (patch)
tree	5dfc9fd7b3f19fcd1deb7c11e9367f4834c8e0b5
parent	ed4c71112a07a2adad9cb26b9b2d790725443b1f (diff)
download	cpython-5ed8b2c737a71d6fd56757bd9fe108f2cf886664.zip cpython-5ed8b2c737a71d6fd56757bd9fe108f2cf886664.tar.gz cpython-5ed8b2c737a71d6fd56757bd9fe108f2cf886664.tar.bz2