diff options
author | Mark Dickinson <dickinsm@gmail.com> | 2009-03-18 14:47:41 (GMT) |
---|---|---|
committer | Mark Dickinson <dickinsm@gmail.com> | 2009-03-18 14:47:41 (GMT) |
commit | 081dfee4f154f4dfd11a3cf14516340f385049bd (patch) | |
tree | 2b4fd8b8827acc861ee7e6ecbd6f39bd3a4bdaea /Objects | |
parent | ecdfd513a2a506f70c4d5aa0f3d39b9323f91e6e (diff) | |
download | cpython-081dfee4f154f4dfd11a3cf14516340f385049bd.zip cpython-081dfee4f154f4dfd11a3cf14516340f385049bd.tar.gz cpython-081dfee4f154f4dfd11a3cf14516340f385049bd.tar.bz2 |
Issue 4474: On platforms with sizeof(wchar_t) == 4 and
sizeof(Py_UNICODE) == 2, PyUnicode_FromWideChar now converts
each character outside the BMP to the appropriate surrogate pair.
Thanks Victor Stinner for the patch.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e88c8c1..03c65e3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -561,6 +561,66 @@ PyObject *PyUnicode_FromString(const char *u) #ifdef HAVE_WCHAR_H +#if (Py_UNICODE_SIZE == 2) && defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4) +# define CONVERT_WCHAR_TO_SURROGATES +#endif + +#ifdef CONVERT_WCHAR_TO_SURROGATES + +/* Here sizeof(wchar_t) is 4 but Py_UNICODE_SIZE == 2, so we need + to convert from UTF32 to UTF16. */ + +PyObject *PyUnicode_FromWideChar(register const wchar_t *w, + Py_ssize_t size) +{ + PyUnicodeObject *unicode; + register Py_ssize_t i; + Py_ssize_t alloc; + const wchar_t *orig_w; + + if (w == NULL) { + if (size == 0) + return PyUnicode_FromStringAndSize(NULL, 0); + PyErr_BadInternalCall(); + return NULL; + } + + if (size == -1) { + size = wcslen(w); + } + + alloc = size; + orig_w = w; + for (i = size; i > 0; i--) { + if (*w > 0xFFFF) + alloc++; + w++; + } + w = orig_w; + unicode = _PyUnicode_New(alloc); + if (!unicode) + return NULL; + + /* Copy the wchar_t data into the new object */ + { + register Py_UNICODE *u; + u = PyUnicode_AS_UNICODE(unicode); + for (i = size; i > 0; i--) { + if (*w > 0xFFFF) { + wchar_t ordinal = *w++; + ordinal -= 0x10000; + *u++ = 0xD800 | (ordinal >> 10); + *u++ = 0xDC00 | (ordinal & 0x3FF); + } + else + *u++ = *w++; + } + } + return (PyObject *)unicode; +} + +#else + PyObject *PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size) { @@ -597,6 +657,10 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w, return (PyObject *)unicode; } +#endif /* CONVERT_WCHAR_TO_SURROGATES */ + +#undef CONVERT_WCHAR_TO_SURROGATES + static void makefmt(char *fmt, int longflag, int size_tflag, int zeropad, int width, int precision, char c) { |