From 1b57967b96daeb505e9d2dbe3cd347625dcb0739 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 17 Dec 2011 05:47:23 +0100 Subject: Issue #13560: Locale codec functions use the classic "errors" parameter, instead of surrogateescape So it would be possible to support more error handlers later. --- Include/unicodeobject.h | 6 +++--- Modules/main.c | 2 +- Modules/posixmodule.c | 2 +- Modules/timemodule.c | 9 +++++---- Objects/unicodeobject.c | 45 ++++++++++++++++++++++++++++++++++++++------- Python/errors.c | 2 +- 6 files changed, 49 insertions(+), 17 deletions(-) diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 8a23c7d..c5480f1 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -1608,14 +1608,14 @@ PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII( PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize( const char *str, Py_ssize_t len, - int surrogateescape); + const char *errors); /* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string length using strlen(). */ PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale( const char *str, - int surrogateescape); + const char *errors); /* Encode a Unicode object to the current locale encoding. The encoder is strict is *surrogateescape* is equal to zero, otherwise the @@ -1624,7 +1624,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale( PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale( PyObject *unicode, - int surrogateescape + const char *errors ); /* --- File system encoding ---------------------------------------------- */ diff --git a/Modules/main.c b/Modules/main.c index 4899378..d8c5172 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -495,7 +495,7 @@ Py_Main(int argc, wchar_t **argv) /* Use utf-8 on Mac OS X */ unicode = PyUnicode_FromString(p); #else - unicode = PyUnicode_DecodeLocale(p, 1); + unicode = PyUnicode_DecodeLocale(p, "surrogateescape"); #endif if (unicode == NULL) { /* ignore errors */ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 6b832c0..acdc00c 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -7891,7 +7891,7 @@ posix_strerror(PyObject *self, PyObject *args) "strerror() argument out of range"); return NULL; } - return PyUnicode_DecodeLocale(message, 1); + return PyUnicode_DecodeLocale(message, "surrogateescape"); } diff --git a/Modules/timemodule.c b/Modules/timemodule.c index ad1c54e..ad28e58 100644 --- a/Modules/timemodule.c +++ b/Modules/timemodule.c @@ -486,7 +486,7 @@ time_strftime(PyObject *self, PyObject *args) fmt = format; #else /* Convert the unicode string to an ascii one */ - format = PyUnicode_EncodeLocale(format_arg, 1); + format = PyUnicode_EncodeLocale(format_arg, "surrogateescape"); if (format == NULL) return NULL; fmt = PyBytes_AS_STRING(format); @@ -532,7 +532,8 @@ time_strftime(PyObject *self, PyObject *args) #ifdef HAVE_WCSFTIME ret = PyUnicode_FromWideChar(outbuf, buflen); #else - ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen, 1); + ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen, + "surrogateescape"); #endif PyMem_Free(outbuf); break; @@ -764,8 +765,8 @@ PyInit_timezone(PyObject *m) { #endif /* PYOS_OS2 */ #endif PyModule_AddIntConstant(m, "daylight", daylight); - otz0 = PyUnicode_DecodeLocale(tzname[0], 1); - otz1 = PyUnicode_DecodeLocale(tzname[1], 1); + otz0 = PyUnicode_DecodeLocale(tzname[0], "surrogateescape"); + otz1 = PyUnicode_DecodeLocale(tzname[1], "surrogateescape"); PyModule_AddObject(m, "tzname", Py_BuildValue("(NN)", otz0, otz1)); #else /* !HAVE_TZNAME || __GLIBC__ || __CYGWIN__*/ #ifdef HAVE_STRUCT_TM_TM_ZONE diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index aeccfe4..b3d6de2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3125,8 +3125,31 @@ wcstombs_errorpos(const wchar_t *wstr) return 0; } +static int +locale_error_handler(const char *errors, int *surrogateescape) +{ + if (errors == NULL) { + *surrogateescape = 0; + return 0; + } + + if (strcmp(errors, "strict") == 0) { + *surrogateescape = 0; + return 0; + } + if (strcmp(errors, "surrogateescape") == 0) { + *surrogateescape = 1; + return 0; + } + PyErr_Format(PyExc_ValueError, + "only 'strict' and 'surrogateescape' error handlers " + "are supported, not '%s'", + errors); + return -1; +} + PyObject * -PyUnicode_EncodeLocale(PyObject *unicode, int surrogateescape) +PyUnicode_EncodeLocale(PyObject *unicode, const char *errors) { Py_ssize_t wlen, wlen2; wchar_t *wstr; @@ -3135,6 +3158,10 @@ PyUnicode_EncodeLocale(PyObject *unicode, int surrogateescape) PyObject *reason; PyObject *exc; size_t error_pos; + int surrogateescape; + + if (locale_error_handler(errors, &surrogateescape) < 0) + return NULL; wstr = PyUnicode_AsWideCharString(unicode, &wlen); if (wstr == NULL) @@ -3198,7 +3225,7 @@ encode_error: Py_XDECREF(bytes); if (errmsg != NULL) - reason = PyUnicode_DecodeLocale(errmsg, 1); + reason = PyUnicode_DecodeLocale(errmsg, "surrogateescape"); else reason = PyUnicode_FromString( "wcstombs() encountered an unencodable " @@ -3243,7 +3270,7 @@ PyUnicode_EncodeFSDefault(PyObject *unicode) "surrogateescape"); } else { - return PyUnicode_EncodeLocale(unicode, 1); + return PyUnicode_EncodeLocale(unicode, "surrogateescape"); } #endif } @@ -3351,13 +3378,17 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode, PyObject* PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len, - int surrogateescape) + const char *errors) { wchar_t smallbuf[256]; size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf); wchar_t *wstr; size_t wlen, wlen2; PyObject *unicode; + int surrogateescape; + + if (locale_error_handler(errors, &surrogateescape) < 0) + return NULL; if (str[len] != '\0' || len != strlen(str)) { PyErr_SetString(PyExc_TypeError, "embedded null character"); @@ -3419,10 +3450,10 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len, } PyObject* -PyUnicode_DecodeLocale(const char *str, int surrogateescape) +PyUnicode_DecodeLocale(const char *str, const char *errors) { Py_ssize_t size = (Py_ssize_t)strlen(str); - return PyUnicode_DecodeLocaleAndSize(str, size, surrogateescape); + return PyUnicode_DecodeLocaleAndSize(str, size, errors); } @@ -3456,7 +3487,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) "surrogateescape"); } else { - return PyUnicode_DecodeLocaleAndSize(s, size, 1); + return PyUnicode_DecodeLocaleAndSize(s, size, "surrogateescape"); } #endif } diff --git a/Python/errors.c b/Python/errors.c index 122e444..31fa9e2 100644 --- a/Python/errors.c +++ b/Python/errors.c @@ -355,7 +355,7 @@ PyErr_SetFromErrnoWithFilenameObject(PyObject *exc, PyObject *filenameObject) #ifndef MS_WINDOWS if (i != 0) { char *s = strerror(i); - message = PyUnicode_DecodeLocale(s, 1); + message = PyUnicode_DecodeLocale(s, "surrogateescape"); } else { /* Sometimes errno didn't get set */ -- cgit v0.12