diff options
author | Nick Coghlan <ncoghlan@gmail.com> | 2013-11-13 13:49:21 (GMT) |
---|---|---|
committer | Nick Coghlan <ncoghlan@gmail.com> | 2013-11-13 13:49:21 (GMT) |
commit | 8b097b4ed726b8282fce582cb2c20ab9c986fc21 (patch) | |
tree | ca9b18d186c9132f62378e1bde87e766beb2b379 /Objects | |
parent | 59799a83995f135bdb1b1a0994052c1f24c68e83 (diff) | |
download | cpython-8b097b4ed726b8282fce582cb2c20ab9c986fc21.zip cpython-8b097b4ed726b8282fce582cb2c20ab9c986fc21.tar.gz cpython-8b097b4ed726b8282fce582cb2c20ab9c986fc21.tar.bz2 |
Close #17828: better handling of codec errors
- output type errors now redirect users to the type-neutral
convenience functions in the codecs module
- stateless errors that occur during encoding and decoding
will now be automatically wrapped in exceptions that give
the name of the codec involved
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/exceptions.c | 113 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 27 |
2 files changed, 131 insertions, 9 deletions
diff --git a/Objects/exceptions.c b/Objects/exceptions.c index de5d746..53d8b66 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2591,3 +2591,116 @@ _PyExc_Fini(void) free_preallocated_memerrors(); Py_CLEAR(errnomap); } + +/* Helper to do the equivalent of "raise X from Y" in C, but always using + * the current exception rather than passing one in. + * + * We currently limit this to *only* exceptions that use the BaseException + * tp_init and tp_new methods, since we can be reasonably sure we can wrap + * those correctly without losing data and without losing backwards + * compatibility. + * + * We also aim to rule out *all* exceptions that might be storing additional + * state, whether by having a size difference relative to BaseException, + * additional arguments passed in during construction or by having a + * non-empty instance dict. + * + * We need to be very careful with what we wrap, since changing types to + * a broader exception type would be backwards incompatible for + * existing codecs, and with different init or new method implementations + * may either not support instantiation with PyErr_Format or lose + * information when instantiated that way. + * + * XXX (ncoghlan): This could be made more comprehensive by exploiting the + * fact that exceptions are expected to support pickling. If more builtin + * exceptions (e.g. AttributeError) start to be converted to rich + * exceptions with additional attributes, that's probably a better approach + * to pursue over adding special cases for particular stateful subclasses. + * + * Returns a borrowed reference to the new exception (if any), NULL if the + * existing exception was left in place. + */ +PyObject * +_PyErr_TrySetFromCause(const char *format, ...) +{ + PyObject* msg_prefix; + PyObject *exc, *val, *tb; + PyTypeObject *caught_type; + PyObject *instance_dict; + PyObject *instance_args; + Py_ssize_t num_args; + PyObject *new_exc, *new_val, *new_tb; + va_list vargs; + +#ifdef HAVE_STDARG_PROTOTYPES + va_start(vargs, format); +#else + va_start(vargs); +#endif + + PyErr_Fetch(&exc, &val, &tb); + caught_type = (PyTypeObject *) exc; + /* Ensure type info indicates no extra state is stored at the C level */ + if (caught_type->tp_init != (initproc) BaseException_init || + caught_type->tp_new != BaseException_new || + caught_type->tp_basicsize != _PyExc_BaseException.tp_basicsize || + caught_type->tp_itemsize != _PyExc_BaseException.tp_itemsize + ) { + /* We can't be sure we can wrap this safely, since it may contain + * more state than just the exception type. Accordingly, we just + * leave it alone. + */ + PyErr_Restore(exc, val, tb); + return NULL; + } + + /* Check the args are empty or contain a single string */ + PyErr_NormalizeException(&exc, &val, &tb); + instance_args = ((PyBaseExceptionObject *) val)->args; + num_args = PyTuple_GET_SIZE(instance_args); + if ((num_args > 1) || + (num_args == 1 && + !PyUnicode_CheckExact(PyTuple_GET_ITEM(instance_args, 0)) + ) + ) { + /* More than 1 arg, or the one arg we do have isn't a string + */ + PyErr_Restore(exc, val, tb); + return NULL; + } + + /* Ensure the instance dict is also empty */ + instance_dict = *_PyObject_GetDictPtr(val); + if (instance_dict != NULL && PyObject_Length(instance_dict) > 0) { + /* While we could potentially copy a non-empty instance dictionary + * to the replacement exception, for now we take the more + * conservative path of leaving exceptions with attributes set + * alone. + */ + PyErr_Restore(exc, val, tb); + return NULL; + } + + /* For exceptions that we can wrap safely, we chain the original + * exception to a new one of the exact same type with an + * error message that mentions the additional details and the + * original exception. + * + * It would be nice to wrap OSError and various other exception + * types as well, but that's quite a bit trickier due to the extra + * state potentially stored on OSError instances. + */ + msg_prefix = PyUnicode_FromFormatV(format, vargs); + if (msg_prefix == NULL) + return NULL; + + PyErr_Format(exc, "%U (%s: %S)", + msg_prefix, Py_TYPE(val)->tp_name, val); + Py_DECREF(exc); + Py_XDECREF(tb); + PyErr_Fetch(&new_exc, &new_val, &new_tb); + PyErr_NormalizeException(&new_exc, &new_val, &new_tb); + PyException_SetCause(new_val, val); + PyErr_Restore(new_exc, new_val, new_tb); + return new_val; +} diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 224a80b..7789816 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3054,8 +3054,10 @@ PyUnicode_Decode(const char *s, goto onError; if (!PyUnicode_Check(unicode)) { PyErr_Format(PyExc_TypeError, - "decoder did not return a str object (type=%.400s)", - Py_TYPE(unicode)->tp_name); + "'%.400s' decoder returned '%.400s' instead of 'str'; " + "use codecs.decode() to decode to arbitrary types", + encoding, + Py_TYPE(unicode)->tp_name, Py_TYPE(unicode)->tp_name); Py_DECREF(unicode); goto onError; } @@ -3113,8 +3115,10 @@ PyUnicode_AsDecodedUnicode(PyObject *unicode, goto onError; if (!PyUnicode_Check(v)) { PyErr_Format(PyExc_TypeError, - "decoder did not return a str object (type=%.400s)", - Py_TYPE(v)->tp_name); + "'%.400s' decoder returned '%.400s' instead of 'str'; " + "use codecs.decode() to decode to arbitrary types", + encoding, + Py_TYPE(unicode)->tp_name, Py_TYPE(unicode)->tp_name); Py_DECREF(v); goto onError; } @@ -3425,7 +3429,8 @@ PyUnicode_AsEncodedString(PyObject *unicode, PyObject *b; error = PyErr_WarnFormat(PyExc_RuntimeWarning, 1, - "encoder %s returned bytearray instead of bytes", + "encoder %s returned bytearray instead of bytes; " + "use codecs.encode() to encode to arbitrary types", encoding); if (error) { Py_DECREF(v); @@ -3438,8 +3443,10 @@ PyUnicode_AsEncodedString(PyObject *unicode, } PyErr_Format(PyExc_TypeError, - "encoder did not return a bytes object (type=%.400s)", - Py_TYPE(v)->tp_name); + "'%.400s' encoder returned '%.400s' instead of 'bytes'; " + "use codecs.encode() to encode to arbitrary types", + encoding, + Py_TYPE(v)->tp_name, Py_TYPE(v)->tp_name); Py_DECREF(v); return NULL; } @@ -3465,8 +3472,10 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode, goto onError; if (!PyUnicode_Check(v)) { PyErr_Format(PyExc_TypeError, - "encoder did not return an str object (type=%.400s)", - Py_TYPE(v)->tp_name); + "'%.400s' encoder returned '%.400s' instead of 'str'; " + "use codecs.encode() to encode to arbitrary types", + encoding, + Py_TYPE(v)->tp_name, Py_TYPE(v)->tp_name); Py_DECREF(v); goto onError; } |