diff options
author | Jakub KulĂk <Kulikjak@gmail.com> | 2021-04-30 13:21:42 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-30 13:21:42 (GMT) |
commit | 9032cf5cb1e33c0349089cfb0f6bf11ed3c30e86 (patch) | |
tree | 86ccc15aac78e1225299e09c12215d942b147d6f /Objects | |
parent | 4908fae3d57f68694cf006e89fd7761f45003447 (diff) | |
download | cpython-9032cf5cb1e33c0349089cfb0f6bf11ed3c30e86.zip cpython-9032cf5cb1e33c0349089cfb0f6bf11ed3c30e86.tar.gz cpython-9032cf5cb1e33c0349089cfb0f6bf11ed3c30e86.tar.bz2 |
bpo-43667: Fix broken Unicode encoding in non-UTF locales on Solaris (GH-25096)
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 74c5888..bfd5c88 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -57,6 +57,10 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include <windows.h> #endif +#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION +#include "pycore_fileutils.h" // _Py_LocaleUsesNonUnicodeWchar() +#endif + /* Uncomment to display statistics on interned strings at exit in _PyUnicode_ClearInterned(). */ /* #define INTERNED_STATS 1 */ @@ -2217,6 +2221,20 @@ PyUnicode_FromWideChar(const wchar_t *u, Py_ssize_t size) if (size == 0) _Py_RETURN_UNICODE_EMPTY(); +#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION + /* Oracle Solaris uses non-Unicode internal wchar_t form for + non-Unicode locales and hence needs conversion to UCS-4 first. */ + if (_Py_LocaleUsesNonUnicodeWchar()) { + wchar_t* converted = _Py_DecodeNonUnicodeWchar(u, size); + if (!converted) { + return NULL; + } + PyObject *unicode = _PyUnicode_FromUCS4(converted, size); + PyMem_Free(converted); + return unicode; + } +#endif + /* Single character Unicode objects in the Latin-1 range are shared when using this constructor */ if (size == 1 && (Py_UCS4)*u < 256) @@ -3295,6 +3313,17 @@ PyUnicode_AsWideChar(PyObject *unicode, res = size; } unicode_copy_as_widechar(unicode, w, size); + +#if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION + /* Oracle Solaris uses non-Unicode internal wchar_t form for + non-Unicode locales and hence needs conversion first. */ + if (_Py_LocaleUsesNonUnicodeWchar()) { + if (_Py_EncodeNonUnicodeWchar_InPlace(w, size) < 0) { + return -1; + } + } +#endif + return res; } @@ -3321,6 +3350,17 @@ PyUnicode_AsWideCharString(PyObject *unicode, return NULL; } unicode_copy_as_widechar(unicode, buffer, buflen + 1); + +#if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION + /* Oracle Solaris uses non-Unicode internal wchar_t form for + non-Unicode locales and hence needs conversion first. */ + if (_Py_LocaleUsesNonUnicodeWchar()) { + if (_Py_EncodeNonUnicodeWchar_InPlace(buffer, (buflen + 1)) < 0) { + return NULL; + } + } +#endif + if (size != NULL) { *size = buflen; } |