summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2011-12-16 22:56:01 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2011-12-16 22:56:01 (GMT)
commitaf02e1c85a66009cdc645a64de7d7ee1335c8301 (patch)
tree5bc78c3a8628589cf5a4c246afc0076871d51c62 /Objects
parent3607e3de278c89660f773064a94385066eebda1b (diff)
downloadcpython-af02e1c85a66009cdc645a64de7d7ee1335c8301.zip
cpython-af02e1c85a66009cdc645a64de7d7ee1335c8301.tar.gz
cpython-af02e1c85a66009cdc645a64de7d7ee1335c8301.tar.bz2
Add PyUnicode_DecodeLocaleAndSize() and PyUnicode_DecodeLocale()
* PyUnicode_DecodeLocaleAndSize() and PyUnicode_DecodeLocale() decode a string from the current locale encoding * _Py_char2wchar() writes an "error code" in the size argument to indicate if the function failed because of memory allocation failure or because of a decoding error. The function doesn't write the error message directly to stderr. * Fix time.strftime() (if wcsftime() is missing): decode strftime() result from the current locale encoding, not from the filesystem encoding.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/unicodeobject.c95
1 files changed, 78 insertions, 17 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 5758ffa..7444c8b 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3235,6 +3235,83 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
}
PyObject*
+PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
+ int surrogateescape)
+{
+ wchar_t smallbuf[256];
+ size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
+ wchar_t *wstr;
+ size_t wlen, wlen2;
+ PyObject *unicode;
+
+ if (str[len] != '\0' || len != strlen(str)) {
+ PyErr_SetString(PyExc_TypeError, "embedded null character");
+ return NULL;
+ }
+
+ if (surrogateescape)
+ {
+ wstr = _Py_char2wchar(str, &wlen);
+ if (wstr == NULL) {
+ if (wlen == (size_t)-1)
+ PyErr_NoMemory();
+ else
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ unicode = PyUnicode_FromWideChar(wstr, wlen);
+ PyMem_Free(wstr);
+ }
+ else {
+#ifndef HAVE_BROKEN_MBSTOWCS
+ wlen = mbstowcs(NULL, str, 0);
+#else
+ wlen = len;
+#endif
+ if (wlen == (size_t)-1) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+ if (wlen+1 <= smallbuf_len) {
+ wstr = smallbuf;
+ }
+ else {
+ if (wlen > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1)
+ return PyErr_NoMemory();
+
+ wstr = PyMem_Malloc((wlen+1) * sizeof(wchar_t));
+ if (!wstr)
+ return PyErr_NoMemory();
+ }
+
+ /* This shouldn't fail now */
+ wlen2 = mbstowcs(wstr, str, wlen+1);
+ if (wlen2 == (size_t)-1) {
+ if (wstr != smallbuf)
+ PyMem_Free(wstr);
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+#ifdef HAVE_BROKEN_MBSTOWCS
+ assert(wlen2 == wlen);
+#endif
+ unicode = PyUnicode_FromWideChar(wstr, wlen2);
+ if (wstr != smallbuf)
+ PyMem_Free(wstr);
+ }
+ return unicode;
+}
+
+PyObject*
+PyUnicode_DecodeLocale(const char *str, int surrogateescape)
+{
+ Py_ssize_t size = (Py_ssize_t)strlen(str);
+ return PyUnicode_DecodeLocaleAndSize(str, size, surrogateescape);
+}
+
+
+PyObject*
PyUnicode_DecodeFSDefault(const char *s) {
Py_ssize_t size = (Py_ssize_t)strlen(s);
return PyUnicode_DecodeFSDefaultAndSize(s, size);
@@ -3264,23 +3341,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
"surrogateescape");
}
else {
- /* locale encoding with surrogateescape */
- wchar_t *wchar;
- PyObject *unicode;
- size_t len;
-
- if (s[size] != '\0' || size != strlen(s)) {
- PyErr_SetString(PyExc_TypeError, "embedded NUL character");
- return NULL;
- }
-
- wchar = _Py_char2wchar(s, &len);
- if (wchar == NULL)
- return PyErr_NoMemory();
-
- unicode = PyUnicode_FromWideChar(wchar, len);
- PyMem_Free(wchar);
- return unicode;
+ return PyUnicode_DecodeLocaleAndSize(s, size, 1);
}
#endif
}