diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-12-16 22:56:01 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-12-16 22:56:01 (GMT) |
commit | af02e1c85a66009cdc645a64de7d7ee1335c8301 (patch) | |
tree | 5bc78c3a8628589cf5a4c246afc0076871d51c62 /Objects | |
parent | 3607e3de278c89660f773064a94385066eebda1b (diff) | |
download | cpython-af02e1c85a66009cdc645a64de7d7ee1335c8301.zip cpython-af02e1c85a66009cdc645a64de7d7ee1335c8301.tar.gz cpython-af02e1c85a66009cdc645a64de7d7ee1335c8301.tar.bz2 |
Add PyUnicode_DecodeLocaleAndSize() and PyUnicode_DecodeLocale()
* PyUnicode_DecodeLocaleAndSize() and PyUnicode_DecodeLocale() decode a string
from the current locale encoding
* _Py_char2wchar() writes an "error code" in the size argument to indicate
if the function failed because of memory allocation failure or because of a
decoding error. The function doesn't write the error message directly to
stderr.
* Fix time.strftime() (if wcsftime() is missing): decode strftime() result
from the current locale encoding, not from the filesystem encoding.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 95 |
1 files changed, 78 insertions, 17 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5758ffa..7444c8b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3235,6 +3235,83 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode, } PyObject* +PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len, + int surrogateescape) +{ + wchar_t smallbuf[256]; + size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf); + wchar_t *wstr; + size_t wlen, wlen2; + PyObject *unicode; + + if (str[len] != '\0' || len != strlen(str)) { + PyErr_SetString(PyExc_TypeError, "embedded null character"); + return NULL; + } + + if (surrogateescape) + { + wstr = _Py_char2wchar(str, &wlen); + if (wstr == NULL) { + if (wlen == (size_t)-1) + PyErr_NoMemory(); + else + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + + unicode = PyUnicode_FromWideChar(wstr, wlen); + PyMem_Free(wstr); + } + else { +#ifndef HAVE_BROKEN_MBSTOWCS + wlen = mbstowcs(NULL, str, 0); +#else + wlen = len; +#endif + if (wlen == (size_t)-1) { + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + if (wlen+1 <= smallbuf_len) { + wstr = smallbuf; + } + else { + if (wlen > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) + return PyErr_NoMemory(); + + wstr = PyMem_Malloc((wlen+1) * sizeof(wchar_t)); + if (!wstr) + return PyErr_NoMemory(); + } + + /* This shouldn't fail now */ + wlen2 = mbstowcs(wstr, str, wlen+1); + if (wlen2 == (size_t)-1) { + if (wstr != smallbuf) + PyMem_Free(wstr); + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } +#ifdef HAVE_BROKEN_MBSTOWCS + assert(wlen2 == wlen); +#endif + unicode = PyUnicode_FromWideChar(wstr, wlen2); + if (wstr != smallbuf) + PyMem_Free(wstr); + } + return unicode; +} + +PyObject* +PyUnicode_DecodeLocale(const char *str, int surrogateescape) +{ + Py_ssize_t size = (Py_ssize_t)strlen(str); + return PyUnicode_DecodeLocaleAndSize(str, size, surrogateescape); +} + + +PyObject* PyUnicode_DecodeFSDefault(const char *s) { Py_ssize_t size = (Py_ssize_t)strlen(s); return PyUnicode_DecodeFSDefaultAndSize(s, size); @@ -3264,23 +3341,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) "surrogateescape"); } else { - /* locale encoding with surrogateescape */ - wchar_t *wchar; - PyObject *unicode; - size_t len; - - if (s[size] != '\0' || size != strlen(s)) { - PyErr_SetString(PyExc_TypeError, "embedded NUL character"); - return NULL; - } - - wchar = _Py_char2wchar(s, &len); - if (wchar == NULL) - return PyErr_NoMemory(); - - unicode = PyUnicode_FromWideChar(wchar, len); - PyMem_Free(wchar); - return unicode; + return PyUnicode_DecodeLocaleAndSize(s, size, 1); } #endif } |