summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2012-12-03 11:47:59 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2012-12-03 11:47:59 (GMT)
commit27b1ca29ccf523e736a47c02f554de5374e241fc (patch)
treedaf9a3fdd3e0fdd67b9b95795fa03f7a6c895398
parentce31f66a6d23a5df75eb692c2991e7602b2b6571 (diff)
downloadcpython-27b1ca29ccf523e736a47c02f554de5374e241fc.zip
cpython-27b1ca29ccf523e736a47c02f554de5374e241fc.tar.gz
cpython-27b1ca29ccf523e736a47c02f554de5374e241fc.tar.bz2
Issue #16416: On Mac OS X, operating system data are now always
encoded/decoded to/from UTF-8/surrogateescape, instead of the locale encoding (which may be ASCII if no locale environment variable is set), to avoid inconsistencies with os.fsencode() and os.fsdecode() functions which are already using UTF-8/surrogateescape.
-rw-r--r--Misc/NEWS6
-rw-r--r--Modules/python.c8
-rw-r--r--Objects/unicodeobject.c9
-rw-r--r--Python/fileutils.c60
4 files changed, 65 insertions, 18 deletions
diff --git a/Misc/NEWS b/Misc/NEWS
index 9d8db75..fbcfe90 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,12 @@ What's New in Python 3.2.4
Core and Builtins
-----------------
+- Issue #16416: On Mac OS X, operating system data are now always
+ encoded/decoded to/from UTF-8/surrogateescape, instead of the locale encoding
+ (which may be ASCII if no locale environment variable is set), to avoid
+ inconsistencies with os.fsencode() and os.fsdecode() functions which are
+ already using UTF-8/surrogateescape.
+
- Issue #16588: Silence unused-but-set warnings in Python/thread_pthread.h
- Issue #16306: Fix multiple error messages when unknown command line
diff --git a/Modules/python.c b/Modules/python.c
index cf9383f..2be69f1 100644
--- a/Modules/python.c
+++ b/Modules/python.c
@@ -15,10 +15,6 @@ wmain(int argc, wchar_t **argv)
}
#else
-#ifdef __APPLE__
-extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
-#endif
-
int
main(int argc, char **argv)
{
@@ -45,11 +41,7 @@ main(int argc, char **argv)
oldloc = strdup(setlocale(LC_ALL, NULL));
setlocale(LC_ALL, "");
for (i = 0; i < argc; i++) {
-#ifdef __APPLE__
- argv_copy[i] = _Py_DecodeUTF8_surrogateescape(argv[i], strlen(argv[i]));
-#else
argv_copy[i] = _Py_char2wchar(argv[i], NULL);
-#endif
if (!argv_copy[i]) {
fprintf(stderr, "Fatal Python error: "
"unable to decode the command line argument #%i\n",
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 35b424e..565d298 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2792,7 +2792,10 @@ PyObject *PyUnicode_DecodeUTF8Stateful(const char *s,
#ifdef __APPLE__
/* Simplified UTF-8 decoder using surrogateescape error handler,
- used to decode the command line arguments on Mac OS X. */
+ used to decode the command line arguments on Mac OS X.
+
+ Return a pointer to a newly allocated wide character string (use
+ PyMem_Free() to free the memory), or NULL on memory allocation error. */
wchar_t*
_Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
@@ -2803,10 +2806,8 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
/* Note: size will always be longer than the resulting Unicode
character count */
- if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1)) {
- PyErr_NoMemory();
+ if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1))
return NULL;
- }
unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t));
if (!unicode)
return NULL;
diff --git a/Python/fileutils.c b/Python/fileutils.c
index c563eaa..cba6696 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -3,6 +3,10 @@
# include <windows.h>
#endif
+#ifdef __APPLE__
+extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
+#endif
+
#ifdef HAVE_STAT
/* Decode a byte string from the locale encoding with the
@@ -23,6 +27,17 @@
wchar_t*
_Py_char2wchar(const char* arg, size_t *size)
{
+#ifdef __APPLE__
+ wchar_t *wstr;
+ wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
+ if (size != NULL) {
+ if (wstr != NULL)
+ *size = wcslen(wstr);
+ else
+ *size = (size_t)-1;
+ }
+ return wstr;
+#else
wchar_t *res;
#ifdef HAVE_BROKEN_MBSTOWCS
/* Some platforms have a broken implementation of
@@ -107,7 +122,7 @@ _Py_char2wchar(const char* arg, size_t *size)
argsize -= converted;
out++;
}
-#else
+#else /* HAVE_MBRTOWC */
/* Cannot use C locale for escaping; manually escape as if charset
is ASCII (i.e. escape all bytes > 128. This will still roundtrip
correctly in the locale's charset, which must be an ASCII superset. */
@@ -121,13 +136,14 @@ _Py_char2wchar(const char* arg, size_t *size)
else
*out++ = 0xdc00 + *in++;
*out = 0;
-#endif
+#endif /* HAVE_MBRTOWC */
if (size != NULL)
*size = out - res;
return res;
oom:
fprintf(stderr, "out of memory\n");
return NULL;
+#endif /* __APPLE__ */
}
/* Encode a (wide) character string to the locale encoding with the
@@ -144,14 +160,42 @@ oom:
char*
_Py_wchar2char(const wchar_t *text, size_t *error_pos)
{
+#ifdef __APPLE__
+ Py_ssize_t len;
+ PyObject *unicode, *bytes = NULL;
+ char *cpath;
+
+ unicode = PyUnicode_FromWideChar(text, wcslen(text));
+ if (unicode == NULL)
+ return NULL;
+
+ bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
+ Py_DECREF(unicode);
+ if (bytes == NULL) {
+ PyErr_Clear();
+ if (error_pos != NULL)
+ *error_pos = (size_t)-1;
+ return NULL;
+ }
+
+ len = PyBytes_GET_SIZE(bytes);
+ cpath = PyMem_Malloc(len+1);
+ if (cpath == NULL) {
+ PyErr_Clear();
+ Py_DECREF(bytes);
+ if (error_pos != NULL)
+ *error_pos = (size_t)-1;
+ return NULL;
+ }
+ memcpy(cpath, PyBytes_AsString(bytes), len + 1);
+ Py_DECREF(bytes);
+ return cpath;
+#else /* __APPLE__ */
const size_t len = wcslen(text);
char *result = NULL, *bytes = NULL;
size_t i, size, converted;
wchar_t c, buf[2];
- if (error_pos != NULL)
- *error_pos = (size_t)-1;
-
/* The function works in two steps:
1. compute the length of the output buffer in bytes (size)
2. outputs the bytes */
@@ -198,11 +242,15 @@ _Py_wchar2char(const wchar_t *text, size_t *error_pos)
size += 1; /* nul byte at the end */
result = PyMem_Malloc(size);
- if (result == NULL)
+ if (result == NULL) {
+ if (error_pos != NULL)
+ *error_pos = (size_t)-1;
return NULL;
+ }
bytes = result;
}
return result;
+#endif /* __APPLE__ */
}
/* In principle, this should use HAVE__WSTAT, and _wstat