diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 68 |
1 files changed, 63 insertions, 5 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index cb1456e..b6f3d8f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -427,6 +427,48 @@ get_error_handler_wide(const wchar_t *errors) } +static inline int +unicode_check_encoding_errors(const char *encoding, const char *errors) +{ + if (encoding == NULL && errors == NULL) { + return 0; + } + + PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE(); +#ifndef Py_DEBUG + /* In release mode, only check in development mode (-X dev) */ + if (!interp->config.dev_mode) { + return 0; + } +#else + /* Always check in debug mode */ +#endif + + /* Avoid calling _PyCodec_Lookup() and PyCodec_LookupError() before the + codec registry is ready: before_PyUnicode_InitEncodings() is called. */ + if (!interp->fs_codec.encoding) { + return 0; + } + + if (encoding != NULL) { + PyObject *handler = _PyCodec_Lookup(encoding); + if (handler == NULL) { + return -1; + } + Py_DECREF(handler); + } + + if (errors != NULL) { + PyObject *handler = PyCodec_LookupError(errors); + if (handler == NULL) { + return -1; + } + Py_DECREF(handler); + } + return 0; +} + + /* The max unicode value is always 0x10FFFF while using the PEP-393 API. This function is kept for backward compatibility with the old API. */ Py_UNICODE @@ -3211,12 +3253,15 @@ PyUnicode_FromEncodedObject(PyObject *obj, /* Decoding bytes objects is the most common case and should be fast */ if (PyBytes_Check(obj)) { - if (PyBytes_GET_SIZE(obj) == 0) + if (PyBytes_GET_SIZE(obj) == 0) { + if (unicode_check_encoding_errors(encoding, errors) < 0) { + return NULL; + } _Py_RETURN_UNICODE_EMPTY(); - v = PyUnicode_Decode( + } + return PyUnicode_Decode( PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj), encoding, errors); - return v; } if (PyUnicode_Check(obj)) { @@ -3235,6 +3280,9 @@ PyUnicode_FromEncodedObject(PyObject *obj, if (buffer.len == 0) { PyBuffer_Release(&buffer); + if (unicode_check_encoding_errors(encoding, errors) < 0) { + return NULL; + } _Py_RETURN_UNICODE_EMPTY(); } @@ -3302,6 +3350,10 @@ PyUnicode_Decode(const char *s, Py_buffer info; char buflower[11]; /* strlen("iso-8859-1\0") == 11, longest shortcut */ + if (unicode_check_encoding_errors(encoding, errors) < 0) { + return NULL; + } + if (encoding == NULL) { return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL); } @@ -3562,7 +3614,8 @@ PyUnicode_EncodeFSDefault(PyObject *unicode) cannot use it to encode and decode filenames before it is loaded. Load the Python codec requires to encode at least its own filename. Use the C implementation of the locale codec until the codec registry is - initialized and the Python codec is loaded. See initfsencoding(). */ + initialized and the Python codec is loaded. + See _PyUnicode_InitEncodings(). */ if (interp->fs_codec.encoding) { return PyUnicode_AsEncodedString(unicode, interp->fs_codec.encoding, @@ -3591,6 +3644,10 @@ PyUnicode_AsEncodedString(PyObject *unicode, return NULL; } + if (unicode_check_encoding_errors(encoding, errors) < 0) { + return NULL; + } + if (encoding == NULL) { return _PyUnicode_AsUTF8String(unicode, errors); } @@ -3800,7 +3857,8 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) cannot use it to encode and decode filenames before it is loaded. Load the Python codec requires to encode at least its own filename. Use the C implementation of the locale codec until the codec registry is - initialized and the Python codec is loaded. See initfsencoding(). */ + initialized and the Python codec is loaded. + See _PyUnicode_InitEncodings(). */ if (interp->fs_codec.encoding) { return PyUnicode_Decode(s, size, interp->fs_codec.encoding, |