summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorJakub KulĂ­k <Kulikjak@gmail.com>2021-04-30 13:21:42 (GMT)
committerGitHub <noreply@github.com>2021-04-30 13:21:42 (GMT)
commit9032cf5cb1e33c0349089cfb0f6bf11ed3c30e86 (patch)
tree86ccc15aac78e1225299e09c12215d942b147d6f /Objects
parent4908fae3d57f68694cf006e89fd7761f45003447 (diff)
downloadcpython-9032cf5cb1e33c0349089cfb0f6bf11ed3c30e86.zip
cpython-9032cf5cb1e33c0349089cfb0f6bf11ed3c30e86.tar.gz
cpython-9032cf5cb1e33c0349089cfb0f6bf11ed3c30e86.tar.bz2
bpo-43667: Fix broken Unicode encoding in non-UTF locales on Solaris (GH-25096)
Diffstat (limited to 'Objects')
-rw-r--r--Objects/unicodeobject.c40
1 files changed, 40 insertions, 0 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 74c5888..bfd5c88 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -57,6 +57,10 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <windows.h>
#endif
+#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
+#include "pycore_fileutils.h" // _Py_LocaleUsesNonUnicodeWchar()
+#endif
+
/* Uncomment to display statistics on interned strings at exit
in _PyUnicode_ClearInterned(). */
/* #define INTERNED_STATS 1 */
@@ -2217,6 +2221,20 @@ PyUnicode_FromWideChar(const wchar_t *u, Py_ssize_t size)
if (size == 0)
_Py_RETURN_UNICODE_EMPTY();
+#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
+ /* Oracle Solaris uses non-Unicode internal wchar_t form for
+ non-Unicode locales and hence needs conversion to UCS-4 first. */
+ if (_Py_LocaleUsesNonUnicodeWchar()) {
+ wchar_t* converted = _Py_DecodeNonUnicodeWchar(u, size);
+ if (!converted) {
+ return NULL;
+ }
+ PyObject *unicode = _PyUnicode_FromUCS4(converted, size);
+ PyMem_Free(converted);
+ return unicode;
+ }
+#endif
+
/* Single character Unicode objects in the Latin-1 range are
shared when using this constructor */
if (size == 1 && (Py_UCS4)*u < 256)
@@ -3295,6 +3313,17 @@ PyUnicode_AsWideChar(PyObject *unicode,
res = size;
}
unicode_copy_as_widechar(unicode, w, size);
+
+#if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
+ /* Oracle Solaris uses non-Unicode internal wchar_t form for
+ non-Unicode locales and hence needs conversion first. */
+ if (_Py_LocaleUsesNonUnicodeWchar()) {
+ if (_Py_EncodeNonUnicodeWchar_InPlace(w, size) < 0) {
+ return -1;
+ }
+ }
+#endif
+
return res;
}
@@ -3321,6 +3350,17 @@ PyUnicode_AsWideCharString(PyObject *unicode,
return NULL;
}
unicode_copy_as_widechar(unicode, buffer, buflen + 1);
+
+#if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
+ /* Oracle Solaris uses non-Unicode internal wchar_t form for
+ non-Unicode locales and hence needs conversion first. */
+ if (_Py_LocaleUsesNonUnicodeWchar()) {
+ if (_Py_EncodeNonUnicodeWchar_InPlace(buffer, (buflen + 1)) < 0) {
+ return NULL;
+ }
+ }
+#endif
+
if (size != NULL) {
*size = buflen;
}