diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2012-12-03 11:48:53 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2012-12-03 11:48:53 (GMT) |
commit | 2660e427d1abcae9fc60115c45619ed2286fc560 (patch) | |
tree | 2bc8119cac17fb963f323f964f8774cffbaa1879 /Python | |
parent | a2816c2b11aeb24b232570b4073a9644bfdd82c7 (diff) | |
parent | 27b1ca29ccf523e736a47c02f554de5374e241fc (diff) | |
download | cpython-2660e427d1abcae9fc60115c45619ed2286fc560.zip cpython-2660e427d1abcae9fc60115c45619ed2286fc560.tar.gz cpython-2660e427d1abcae9fc60115c45619ed2286fc560.tar.bz2 |
(Merge 3.2) Issue #16416: On Mac OS X, operating system data are now always
encoded/decoded to/from UTF-8/surrogateescape, instead of the locale encoding
(which may be ASCII if no locale environment variable is set), to avoid
inconsistencies with os.fsencode() and os.fsdecode() functions which are
already using UTF-8/surrogateescape.
Diffstat (limited to 'Python')
-rw-r--r-- | Python/fileutils.c | 60 |
1 files changed, 54 insertions, 6 deletions
diff --git a/Python/fileutils.c b/Python/fileutils.c index 976c04b..2e9ea35 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -8,6 +8,10 @@ #include <langinfo.h> #endif +#ifdef __APPLE__ +extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size); +#endif + PyObject * _Py_device_encoding(int fd) { @@ -60,6 +64,17 @@ _Py_device_encoding(int fd) wchar_t* _Py_char2wchar(const char* arg, size_t *size) { +#ifdef __APPLE__ + wchar_t *wstr; + wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg)); + if (size != NULL) { + if (wstr != NULL) + *size = wcslen(wstr); + else + *size = (size_t)-1; + } + return wstr; +#else wchar_t *res; #ifdef HAVE_BROKEN_MBSTOWCS /* Some platforms have a broken implementation of @@ -145,7 +160,7 @@ _Py_char2wchar(const char* arg, size_t *size) argsize -= converted; out++; } -#else +#else /* HAVE_MBRTOWC */ /* Cannot use C locale for escaping; manually escape as if charset is ASCII (i.e. escape all bytes > 128. This will still roundtrip correctly in the locale's charset, which must be an ASCII superset. */ @@ -160,7 +175,7 @@ _Py_char2wchar(const char* arg, size_t *size) else *out++ = 0xdc00 + *in++; *out = 0; -#endif +#endif /* HAVE_MBRTOWC */ if (size != NULL) *size = out - res; return res; @@ -168,6 +183,7 @@ oom: if (size != NULL) *size = (size_t)-1; return NULL; +#endif /* __APPLE__ */ } /* Encode a (wide) character string to the locale encoding with the @@ -184,14 +200,42 @@ oom: char* _Py_wchar2char(const wchar_t *text, size_t *error_pos) { +#ifdef __APPLE__ + Py_ssize_t len; + PyObject *unicode, *bytes = NULL; + char *cpath; + + unicode = PyUnicode_FromWideChar(text, wcslen(text)); + if (unicode == NULL) + return NULL; + + bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape"); + Py_DECREF(unicode); + if (bytes == NULL) { + PyErr_Clear(); + if (error_pos != NULL) + *error_pos = (size_t)-1; + return NULL; + } + + len = PyBytes_GET_SIZE(bytes); + cpath = PyMem_Malloc(len+1); + if (cpath == NULL) { + PyErr_Clear(); + Py_DECREF(bytes); + if (error_pos != NULL) + *error_pos = (size_t)-1; + return NULL; + } + memcpy(cpath, PyBytes_AsString(bytes), len + 1); + Py_DECREF(bytes); + return cpath; +#else /* __APPLE__ */ const size_t len = wcslen(text); char *result = NULL, *bytes = NULL; size_t i, size, converted; wchar_t c, buf[2]; - if (error_pos != NULL) - *error_pos = (size_t)-1; - /* The function works in two steps: 1. compute the length of the output buffer in bytes (size) 2. outputs the bytes */ @@ -238,11 +282,15 @@ _Py_wchar2char(const wchar_t *text, size_t *error_pos) size += 1; /* nul byte at the end */ result = PyMem_Malloc(size); - if (result == NULL) + if (result == NULL) { + if (error_pos != NULL) + *error_pos = (size_t)-1; return NULL; + } bytes = result; } return result; +#endif /* __APPLE__ */ } /* In principle, this should use HAVE__WSTAT, and _wstat |