summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorMark Dickinson <dickinsm@gmail.com>2009-03-18 16:07:26 (GMT)
committerMark Dickinson <dickinsm@gmail.com>2009-03-18 16:07:26 (GMT)
commit6b265f1bf875762ba871028056613d1dd7ab6e11 (patch)
tree8910d66c857741a18cbf92993b563a9a6476c1e2 /Objects
parenteb15863a97a6b9d95c8b2c7ad13125c6a2c7c67e (diff)
downloadcpython-6b265f1bf875762ba871028056613d1dd7ab6e11.zip
cpython-6b265f1bf875762ba871028056613d1dd7ab6e11.tar.gz
cpython-6b265f1bf875762ba871028056613d1dd7ab6e11.tar.bz2
Issue 4474: On platforms with sizeof(wchar_t) == 4 and
sizeof(Py_UNICODE) == 2, PyUnicode_FromWideChar now converts each character outside the BMP to the appropriate surrogate pair. Thanks Victor Stinner for the patch. (backport of r70452 from py3k to trunk)
Diffstat (limited to 'Objects')
-rw-r--r--Objects/unicodeobject.c58
1 files changed, 58 insertions, 0 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 03bccaf..079eebf 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -529,6 +529,60 @@ PyObject *PyUnicode_FromString(const char *u)
#ifdef HAVE_WCHAR_H
+#if (Py_UNICODE_SIZE == 2) && defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
+# define CONVERT_WCHAR_TO_SURROGATES
+#endif
+
+#ifdef CONVERT_WCHAR_TO_SURROGATES
+
+/* Here sizeof(wchar_t) is 4 but Py_UNICODE_SIZE == 2, so we need
+ to convert from UTF32 to UTF16. */
+
+PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
+ Py_ssize_t size)
+{
+ PyUnicodeObject *unicode;
+ register Py_ssize_t i;
+ Py_ssize_t alloc;
+ const wchar_t *orig_w;
+
+ if (w == NULL) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+
+ alloc = size;
+ orig_w = w;
+ for (i = size; i > 0; i--) {
+ if (*w > 0xFFFF)
+ alloc++;
+ w++;
+ }
+ w = orig_w;
+ unicode = _PyUnicode_New(alloc);
+ if (!unicode)
+ return NULL;
+
+ /* Copy the wchar_t data into the new object */
+ {
+ register Py_UNICODE *u;
+ u = PyUnicode_AS_UNICODE(unicode);
+ for (i = size; i > 0; i--) {
+ if (*w > 0xFFFF) {
+ wchar_t ordinal = *w++;
+ ordinal -= 0x10000;
+ *u++ = 0xD800 | (ordinal >> 10);
+ *u++ = 0xDC00 | (ordinal & 0x3FF);
+ }
+ else
+ *u++ = *w++;
+ }
+ }
+ return (PyObject *)unicode;
+}
+
+#else
+
PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
Py_ssize_t size)
{
@@ -559,6 +613,10 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
return (PyObject *)unicode;
}
+#endif /* CONVERT_WCHAR_TO_SURROGATES */
+
+#undef CONVERT_WCHAR_TO_SURROGATES
+
static void
makefmt(char *fmt, int longflag, int size_tflag, int zeropad, int width, int precision, char c)
{