summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorNick Coghlan <ncoghlan@gmail.com>2013-11-13 13:49:21 (GMT)
committerNick Coghlan <ncoghlan@gmail.com>2013-11-13 13:49:21 (GMT)
commit8b097b4ed726b8282fce582cb2c20ab9c986fc21 (patch)
treeca9b18d186c9132f62378e1bde87e766beb2b379 /Objects
parent59799a83995f135bdb1b1a0994052c1f24c68e83 (diff)
downloadcpython-8b097b4ed726b8282fce582cb2c20ab9c986fc21.zip
cpython-8b097b4ed726b8282fce582cb2c20ab9c986fc21.tar.gz
cpython-8b097b4ed726b8282fce582cb2c20ab9c986fc21.tar.bz2
Close #17828: better handling of codec errors
- output type errors now redirect users to the type-neutral convenience functions in the codecs module - stateless errors that occur during encoding and decoding will now be automatically wrapped in exceptions that give the name of the codec involved
Diffstat (limited to 'Objects')
-rw-r--r--Objects/exceptions.c113
-rw-r--r--Objects/unicodeobject.c27
2 files changed, 131 insertions, 9 deletions
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index de5d746..53d8b66 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2591,3 +2591,116 @@ _PyExc_Fini(void)
free_preallocated_memerrors();
Py_CLEAR(errnomap);
}
+
+/* Helper to do the equivalent of "raise X from Y" in C, but always using
+ * the current exception rather than passing one in.
+ *
+ * We currently limit this to *only* exceptions that use the BaseException
+ * tp_init and tp_new methods, since we can be reasonably sure we can wrap
+ * those correctly without losing data and without losing backwards
+ * compatibility.
+ *
+ * We also aim to rule out *all* exceptions that might be storing additional
+ * state, whether by having a size difference relative to BaseException,
+ * additional arguments passed in during construction or by having a
+ * non-empty instance dict.
+ *
+ * We need to be very careful with what we wrap, since changing types to
+ * a broader exception type would be backwards incompatible for
+ * existing codecs, and with different init or new method implementations
+ * may either not support instantiation with PyErr_Format or lose
+ * information when instantiated that way.
+ *
+ * XXX (ncoghlan): This could be made more comprehensive by exploiting the
+ * fact that exceptions are expected to support pickling. If more builtin
+ * exceptions (e.g. AttributeError) start to be converted to rich
+ * exceptions with additional attributes, that's probably a better approach
+ * to pursue over adding special cases for particular stateful subclasses.
+ *
+ * Returns a borrowed reference to the new exception (if any), NULL if the
+ * existing exception was left in place.
+ */
+PyObject *
+_PyErr_TrySetFromCause(const char *format, ...)
+{
+ PyObject* msg_prefix;
+ PyObject *exc, *val, *tb;
+ PyTypeObject *caught_type;
+ PyObject *instance_dict;
+ PyObject *instance_args;
+ Py_ssize_t num_args;
+ PyObject *new_exc, *new_val, *new_tb;
+ va_list vargs;
+
+#ifdef HAVE_STDARG_PROTOTYPES
+ va_start(vargs, format);
+#else
+ va_start(vargs);
+#endif
+
+ PyErr_Fetch(&exc, &val, &tb);
+ caught_type = (PyTypeObject *) exc;
+ /* Ensure type info indicates no extra state is stored at the C level */
+ if (caught_type->tp_init != (initproc) BaseException_init ||
+ caught_type->tp_new != BaseException_new ||
+ caught_type->tp_basicsize != _PyExc_BaseException.tp_basicsize ||
+ caught_type->tp_itemsize != _PyExc_BaseException.tp_itemsize
+ ) {
+ /* We can't be sure we can wrap this safely, since it may contain
+ * more state than just the exception type. Accordingly, we just
+ * leave it alone.
+ */
+ PyErr_Restore(exc, val, tb);
+ return NULL;
+ }
+
+ /* Check the args are empty or contain a single string */
+ PyErr_NormalizeException(&exc, &val, &tb);
+ instance_args = ((PyBaseExceptionObject *) val)->args;
+ num_args = PyTuple_GET_SIZE(instance_args);
+ if ((num_args > 1) ||
+ (num_args == 1 &&
+ !PyUnicode_CheckExact(PyTuple_GET_ITEM(instance_args, 0))
+ )
+ ) {
+ /* More than 1 arg, or the one arg we do have isn't a string
+ */
+ PyErr_Restore(exc, val, tb);
+ return NULL;
+ }
+
+ /* Ensure the instance dict is also empty */
+ instance_dict = *_PyObject_GetDictPtr(val);
+ if (instance_dict != NULL && PyObject_Length(instance_dict) > 0) {
+ /* While we could potentially copy a non-empty instance dictionary
+ * to the replacement exception, for now we take the more
+ * conservative path of leaving exceptions with attributes set
+ * alone.
+ */
+ PyErr_Restore(exc, val, tb);
+ return NULL;
+ }
+
+ /* For exceptions that we can wrap safely, we chain the original
+ * exception to a new one of the exact same type with an
+ * error message that mentions the additional details and the
+ * original exception.
+ *
+ * It would be nice to wrap OSError and various other exception
+ * types as well, but that's quite a bit trickier due to the extra
+ * state potentially stored on OSError instances.
+ */
+ msg_prefix = PyUnicode_FromFormatV(format, vargs);
+ if (msg_prefix == NULL)
+ return NULL;
+
+ PyErr_Format(exc, "%U (%s: %S)",
+ msg_prefix, Py_TYPE(val)->tp_name, val);
+ Py_DECREF(exc);
+ Py_XDECREF(tb);
+ PyErr_Fetch(&new_exc, &new_val, &new_tb);
+ PyErr_NormalizeException(&new_exc, &new_val, &new_tb);
+ PyException_SetCause(new_val, val);
+ PyErr_Restore(new_exc, new_val, new_tb);
+ return new_val;
+}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 224a80b..7789816 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3054,8 +3054,10 @@ PyUnicode_Decode(const char *s,
goto onError;
if (!PyUnicode_Check(unicode)) {
PyErr_Format(PyExc_TypeError,
- "decoder did not return a str object (type=%.400s)",
- Py_TYPE(unicode)->tp_name);
+ "'%.400s' decoder returned '%.400s' instead of 'str'; "
+ "use codecs.decode() to decode to arbitrary types",
+ encoding,
+ Py_TYPE(unicode)->tp_name, Py_TYPE(unicode)->tp_name);
Py_DECREF(unicode);
goto onError;
}
@@ -3113,8 +3115,10 @@ PyUnicode_AsDecodedUnicode(PyObject *unicode,
goto onError;
if (!PyUnicode_Check(v)) {
PyErr_Format(PyExc_TypeError,
- "decoder did not return a str object (type=%.400s)",
- Py_TYPE(v)->tp_name);
+ "'%.400s' decoder returned '%.400s' instead of 'str'; "
+ "use codecs.decode() to decode to arbitrary types",
+ encoding,
+ Py_TYPE(unicode)->tp_name, Py_TYPE(unicode)->tp_name);
Py_DECREF(v);
goto onError;
}
@@ -3425,7 +3429,8 @@ PyUnicode_AsEncodedString(PyObject *unicode,
PyObject *b;
error = PyErr_WarnFormat(PyExc_RuntimeWarning, 1,
- "encoder %s returned bytearray instead of bytes",
+ "encoder %s returned bytearray instead of bytes; "
+ "use codecs.encode() to encode to arbitrary types",
encoding);
if (error) {
Py_DECREF(v);
@@ -3438,8 +3443,10 @@ PyUnicode_AsEncodedString(PyObject *unicode,
}
PyErr_Format(PyExc_TypeError,
- "encoder did not return a bytes object (type=%.400s)",
- Py_TYPE(v)->tp_name);
+ "'%.400s' encoder returned '%.400s' instead of 'bytes'; "
+ "use codecs.encode() to encode to arbitrary types",
+ encoding,
+ Py_TYPE(v)->tp_name, Py_TYPE(v)->tp_name);
Py_DECREF(v);
return NULL;
}
@@ -3465,8 +3472,10 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
goto onError;
if (!PyUnicode_Check(v)) {
PyErr_Format(PyExc_TypeError,
- "encoder did not return an str object (type=%.400s)",
- Py_TYPE(v)->tp_name);
+ "'%.400s' encoder returned '%.400s' instead of 'str'; "
+ "use codecs.encode() to encode to arbitrary types",
+ encoding,
+ Py_TYPE(v)->tp_name, Py_TYPE(v)->tp_name);
Py_DECREF(v);
goto onError;
}