Issue #16416: On Mac OS X, operating system data are now always

encoded/decoded to/from UTF-8/surrogateescape, instead of the locale encoding (which may be ASCII if no locale environment variable is set), to avoid inconsistencies with os.fsencode() and os.fsdecode() functions which are already using UTF-8/surrogateescape.
author: Victor Stinner <victor.stinner@gmail.com> 2012-12-03 11:47:59 (GMT)
committer: Victor Stinner <victor.stinner@gmail.com> 2012-12-03 11:47:59 (GMT)
commit: 27b1ca29ccf523e736a47c02f554de5374e241fc (patch)
tree: daf9a3fdd3e0fdd67b9b95795fa03f7a6c895398 /Python/fileutils.c
parent: ce31f66a6d23a5df75eb692c2991e7602b2b6571 (diff)
download: cpython-27b1ca29ccf523e736a47c02f554de5374e241fc.zip
cpython-27b1ca29ccf523e736a47c02f554de5374e241fc.tar.gz
cpython-27b1ca29ccf523e736a47c02f554de5374e241fc.tar.bz2
1 files changed, 54 insertions, 6 deletions
diff --git a/Python/fileutils.c b/Python/fileutils.c
index c563eaa..cba6696 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -3,6 +3,10 @@
 #  include <windows.h>
 #endif
 
+#ifdef __APPLE__
+extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
+#endif
+
 #ifdef HAVE_STAT
 
 /* Decode a byte string from the locale encoding with the
@@ -23,6 +27,17 @@
 wchar_t*
 _Py_char2wchar(const char* arg, size_t *size)
 {
+#ifdef __APPLE__
+    wchar_t *wstr;
+    wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
+    if (size != NULL) {
+        if (wstr != NULL)
+            *size = wcslen(wstr);
+        else
+            *size = (size_t)-1;
+    }
+    return wstr;
+#else
     wchar_t *res;
 #ifdef HAVE_BROKEN_MBSTOWCS
     /* Some platforms have a broken implementation of
@@ -107,7 +122,7 @@ _Py_char2wchar(const char* arg, size_t *size)
         argsize -= converted;
         out++;
     }
-#else
+#else   /* HAVE_MBRTOWC */
     /* Cannot use C locale for escaping; manually escape as if charset
        is ASCII (i.e. escape all bytes > 128. This will still roundtrip
        correctly in the locale's charset, which must be an ASCII superset. */
@@ -121,13 +136,14 @@ _Py_char2wchar(const char* arg, size_t *size)
         else
             *out++ = 0xdc00 + *in++;
     *out = 0;
-#endif
+#endif   /* HAVE_MBRTOWC */
     if (size != NULL)
         *size = out - res;
     return res;
 oom:
     fprintf(stderr, "out of memory\n");
     return NULL;
+#endif   /* __APPLE__ */
 }
 
 /* Encode a (wide) character string to the locale encoding with the
@@ -144,14 +160,42 @@ oom:
 char*
 _Py_wchar2char(const wchar_t *text, size_t *error_pos)
 {
+#ifdef __APPLE__
+    Py_ssize_t len;
+    PyObject *unicode, *bytes = NULL;
+    char *cpath;
+
+    unicode = PyUnicode_FromWideChar(text, wcslen(text));
+    if (unicode == NULL)
+        return NULL;
+
+    bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
+    Py_DECREF(unicode);
+    if (bytes == NULL) {
+        PyErr_Clear();
+        if (error_pos != NULL)
+            *error_pos = (size_t)-1;
+        return NULL;
+    }
+
+    len = PyBytes_GET_SIZE(bytes);
+    cpath = PyMem_Malloc(len+1);
+    if (cpath == NULL) {
+        PyErr_Clear();
+        Py_DECREF(bytes);
+        if (error_pos != NULL)
+            *error_pos = (size_t)-1;
+        return NULL;
+    }
+    memcpy(cpath, PyBytes_AsString(bytes), len + 1);
+    Py_DECREF(bytes);
+    return cpath;
+#else   /* __APPLE__ */
     const size_t len = wcslen(text);
     char *result = NULL, *bytes = NULL;
     size_t i, size, converted;
     wchar_t c, buf[2];
 
-    if (error_pos != NULL)
-        *error_pos = (size_t)-1;
-
     /* The function works in two steps:
        1. compute the length of the output buffer in bytes (size)
        2. outputs the bytes */
@@ -198,11 +242,15 @@ _Py_wchar2char(const wchar_t *text, size_t *error_pos)
 
         size += 1; /* nul byte at the end */
         result = PyMem_Malloc(size);
-        if (result == NULL)
+        if (result == NULL) {
+            if (error_pos != NULL)
+                *error_pos = (size_t)-1;
             return NULL;
+        }
         bytes = result;
     }
     return result;
+#endif   /* __APPLE__ */
 }
 
 /* In principle, this should use HAVE__WSTAT, and _wstat
author	Victor Stinner <victor.stinner@gmail.com>	2012-12-03 11:47:59 (GMT)
committer	Victor Stinner <victor.stinner@gmail.com>	2012-12-03 11:47:59 (GMT)
commit	27b1ca29ccf523e736a47c02f554de5374e241fc (patch)
tree	daf9a3fdd3e0fdd67b9b95795fa03f7a6c895398 /Python/fileutils.c
parent	ce31f66a6d23a5df75eb692c2991e7602b2b6571 (diff)
download	cpython-27b1ca29ccf523e736a47c02f554de5374e241fc.zip cpython-27b1ca29ccf523e736a47c02f554de5374e241fc.tar.gz cpython-27b1ca29ccf523e736a47c02f554de5374e241fc.tar.bz2