summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Coghlan <ncoghlan@gmail.com>2013-11-13 13:49:21 (GMT)
committerNick Coghlan <ncoghlan@gmail.com>2013-11-13 13:49:21 (GMT)
commit8b097b4ed726b8282fce582cb2c20ab9c986fc21 (patch)
treeca9b18d186c9132f62378e1bde87e766beb2b379
parent59799a83995f135bdb1b1a0994052c1f24c68e83 (diff)
downloadcpython-8b097b4ed726b8282fce582cb2c20ab9c986fc21.zip
cpython-8b097b4ed726b8282fce582cb2c20ab9c986fc21.tar.gz
cpython-8b097b4ed726b8282fce582cb2c20ab9c986fc21.tar.bz2
Close #17828: better handling of codec errors
- output type errors now redirect users to the type-neutral convenience functions in the codecs module - stateless errors that occur during encoding and decoding will now be automatically wrapped in exceptions that give the name of the codec involved
-rw-r--r--Doc/whatsnew/3.4.rst78
-rw-r--r--Include/pyerrors.h22
-rw-r--r--Lib/test/test_codecs.py193
-rw-r--r--Misc/NEWS9
-rw-r--r--Objects/exceptions.c113
-rw-r--r--Objects/unicodeobject.c27
-rw-r--r--Python/codecs.c18
7 files changed, 414 insertions, 46 deletions
diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst
index dd992ed..a04aee8 100644
--- a/Doc/whatsnew/3.4.rst
+++ b/Doc/whatsnew/3.4.rst
@@ -102,6 +102,7 @@ New expected features for Python implementations:
* :ref:`PEP 446: Make newly created file descriptors non-inheritable <pep-446>`.
* command line option for :ref:`isolated mode <using-on-misc-options>`,
(:issue:`16499`).
+* improvements to handling of non-Unicode codecs
Significantly Improved Library Modules:
@@ -170,6 +171,70 @@ PEP 446: Make newly created file descriptors non-inheritable
PEP written and implemented by Victor Stinner.
+Improvements to handling of non-Unicode codecs
+==============================================
+
+Since it was first introduced, the :mod:`codecs` module has always been
+intended to operate as a type-neutral dynamic encoding and decoding
+system. However, its close coupling with the Python text model, especially
+the type restricted convenience methods on the builtin :class:`str`,
+:class:`bytes` and :class:`bytearray` types, has historically obscured that
+fact.
+
+As a key step in clarifying the situation, the :meth:`codecs.encode` and
+:meth:`codecs.decode` convenience functions are now properly documented in
+Python 2.7, 3.3 and 3.4. These functions have existed in the :mod:`codecs`
+module and have been covered by the regression test suite since Python 2.4,
+but were previously only discoverable through runtime introspection.
+
+Unlike the convenience methods on :class:`str`, :class:`bytes` and
+:class:`bytearray`, these convenience functions support arbitrary codecs
+in both Python 2 and Python 3, rather than being limited to Unicode text
+encodings (in Python 3) or ``basestring`` <-> ``basestring`` conversions
+(in Python 2).
+
+In Python 3.4, the errors raised by the convenience methods when a codec
+produces the incorrect output type have also been updated to direct users
+towards these general purpose convenience functions::
+
+ >>> import codecs
+
+ >>> codecs.encode(b"hello", "bz2_codec").decode("bz2_codec")
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ TypeError: 'bz2_codec' decoder returned 'bytes' instead of 'str'; use codecs.decode() to decode to arbitrary types
+
+ >>> "hello".encode("rot_13")
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ TypeError: 'rot_13' encoder returned 'str' instead of 'bytes'; use codecs.encode() to encode to arbitrary types
+
+In a related change, whenever it is feasible without breaking backwards
+compatibility, exceptions raised during encoding and decoding operations
+will be wrapped in a chained exception of the same type that mentions the
+name of the codec responsible for producing the error::
+
+ >>> b"hello".decode("uu_codec")
+ ValueError: Missing "begin" line in input data
+
+ The above exception was the direct cause of the following exception:
+
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ ValueError: decoding with 'uu_codec' codec failed (ValueError: Missing "begin" line in input data)
+
+ >>> "hello".encode("bz2_codec")
+ TypeError: 'str' does not support the buffer interface
+
+ The above exception was the direct cause of the following exception:
+
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ TypeError: encoding with 'bz2_codec' codec failed (TypeError: 'str' does not support the buffer interface)
+
+(Contributed by Nick Coghlan in :issue:`17827` and :issue:`17828`)
+
+
Other Language Changes
======================
@@ -262,19 +327,6 @@ audioop
Added support for 24-bit samples (:issue:`12866`).
-codecs
-------
-
-The :meth:`codecs.encode` and :meth:`codecs.decode` convenience functions are
-now properly documented. These functions have existed in the :mod:`codecs`
-module since ~2004, but were previously only discoverable through runtime
-introspection.
-
-Unlike the convenience methods on :class:`str`, :class:`bytes` and
-:class:`bytearray`, these convenience functions support arbitrary codecs,
-rather than being limited to Unicode text encodings.
-
-
colorsys
--------
diff --git a/Include/pyerrors.h b/Include/pyerrors.h
index 224567b..6a8e0e8 100644
--- a/Include/pyerrors.h
+++ b/Include/pyerrors.h
@@ -285,6 +285,28 @@ PyAPI_FUNC(PyObject *) PyErr_NewExceptionWithDoc(
const char *name, const char *doc, PyObject *base, PyObject *dict);
PyAPI_FUNC(void) PyErr_WriteUnraisable(PyObject *);
+/* In exceptions.c */
+#ifndef Py_LIMITED_API
+/* Helper that attempts to replace the current exception with one of the
+ * same type but with a prefix added to the exception text. The resulting
+ * exception description looks like:
+ *
+ * prefix (exc_type: original_exc_str)
+ *
+ * Only some exceptions can be safely replaced. If the function determines
+ * it isn't safe to perform the replacement, it will leave the original
+ * unmodified exception in place.
+ *
+ * Returns a borrowed reference to the new exception (if any), NULL if the
+ * existing exception was left in place.
+ */
+PyAPI_FUNC(PyObject *) _PyErr_TrySetFromCause(
+ const char *prefix_format, /* ASCII-encoded string */
+ ...
+ );
+#endif
+
+
/* In sigcheck.c or signalmodule.c */
PyAPI_FUNC(int) PyErr_CheckSignals(void);
PyAPI_FUNC(void) PyErr_SetInterrupt(void);
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 5cef4da..f43ac3a 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1,5 +1,6 @@
import _testcapi
import codecs
+import contextlib
import io
import locale
import sys
@@ -2292,28 +2293,31 @@ class TransformCodecTest(unittest.TestCase):
def test_basics(self):
binput = bytes(range(256))
for encoding in bytes_transform_encodings:
- # generic codecs interface
- (o, size) = codecs.getencoder(encoding)(binput)
- self.assertEqual(size, len(binput))
- (i, size) = codecs.getdecoder(encoding)(o)
- self.assertEqual(size, len(o))
- self.assertEqual(i, binput)
+ with self.subTest(encoding=encoding):
+ # generic codecs interface
+ (o, size) = codecs.getencoder(encoding)(binput)
+ self.assertEqual(size, len(binput))
+ (i, size) = codecs.getdecoder(encoding)(o)
+ self.assertEqual(size, len(o))
+ self.assertEqual(i, binput)
def test_read(self):
for encoding in bytes_transform_encodings:
- sin = codecs.encode(b"\x80", encoding)
- reader = codecs.getreader(encoding)(io.BytesIO(sin))
- sout = reader.read()
- self.assertEqual(sout, b"\x80")
+ with self.subTest(encoding=encoding):
+ sin = codecs.encode(b"\x80", encoding)
+ reader = codecs.getreader(encoding)(io.BytesIO(sin))
+ sout = reader.read()
+ self.assertEqual(sout, b"\x80")
def test_readline(self):
for encoding in bytes_transform_encodings:
if encoding in ['uu_codec', 'zlib_codec']:
continue
- sin = codecs.encode(b"\x80", encoding)
- reader = codecs.getreader(encoding)(io.BytesIO(sin))
- sout = reader.readline()
- self.assertEqual(sout, b"\x80")
+ with self.subTest(encoding=encoding):
+ sin = codecs.encode(b"\x80", encoding)
+ reader = codecs.getreader(encoding)(io.BytesIO(sin))
+ sout = reader.readline()
+ self.assertEqual(sout, b"\x80")
def test_buffer_api_usage(self):
# We check all the transform codecs accept memoryview input
@@ -2321,17 +2325,158 @@ class TransformCodecTest(unittest.TestCase):
# and also that they roundtrip correctly
original = b"12345\x80"
for encoding in bytes_transform_encodings:
- data = original
- view = memoryview(data)
- data = codecs.encode(data, encoding)
- view_encoded = codecs.encode(view, encoding)
- self.assertEqual(view_encoded, data)
- view = memoryview(data)
- data = codecs.decode(data, encoding)
- self.assertEqual(data, original)
- view_decoded = codecs.decode(view, encoding)
- self.assertEqual(view_decoded, data)
+ with self.subTest(encoding=encoding):
+ data = original
+ view = memoryview(data)
+ data = codecs.encode(data, encoding)
+ view_encoded = codecs.encode(view, encoding)
+ self.assertEqual(view_encoded, data)
+ view = memoryview(data)
+ data = codecs.decode(data, encoding)
+ self.assertEqual(data, original)
+ view_decoded = codecs.decode(view, encoding)
+ self.assertEqual(view_decoded, data)
+
+ def test_type_error_for_text_input(self):
+ # Check binary -> binary codecs give a good error for str input
+ bad_input = "bad input type"
+ for encoding in bytes_transform_encodings:
+ with self.subTest(encoding=encoding):
+ msg = "^encoding with '{}' codec failed".format(encoding)
+ with self.assertRaisesRegex(TypeError, msg) as failure:
+ bad_input.encode(encoding)
+ self.assertTrue(isinstance(failure.exception.__cause__,
+ TypeError))
+
+ def test_type_error_for_binary_input(self):
+ # Check str -> str codec gives a good error for binary input
+ for bad_input in (b"immutable", bytearray(b"mutable")):
+ with self.subTest(bad_input=bad_input):
+ msg = "^decoding with 'rot_13' codec failed"
+ with self.assertRaisesRegex(AttributeError, msg) as failure:
+ bad_input.decode("rot_13")
+ self.assertTrue(isinstance(failure.exception.__cause__,
+ AttributeError))
+
+ def test_bad_decoding_output_type(self):
+ # Check bytes.decode and bytearray.decode give a good error
+ # message for binary -> binary codecs
+ data = b"encode first to ensure we meet any format restrictions"
+ for encoding in bytes_transform_encodings:
+ with self.subTest(encoding=encoding):
+ encoded_data = codecs.encode(data, encoding)
+ fmt = ("'{}' decoder returned 'bytes' instead of 'str'; "
+ "use codecs.decode\(\) to decode to arbitrary types")
+ msg = fmt.format(encoding)
+ with self.assertRaisesRegex(TypeError, msg):
+ encoded_data.decode(encoding)
+ with self.assertRaisesRegex(TypeError, msg):
+ bytearray(encoded_data).decode(encoding)
+
+ def test_bad_encoding_output_type(self):
+ # Check str.encode gives a good error message for str -> str codecs
+ msg = ("'rot_13' encoder returned 'str' instead of 'bytes'; "
+ "use codecs.encode\(\) to encode to arbitrary types")
+ with self.assertRaisesRegex(TypeError, msg):
+ "just an example message".encode("rot_13")
+
+
+# The codec system tries to wrap exceptions in order to ensure the error
+# mentions the operation being performed and the codec involved. We
+# currently *only* want this to happen for relatively stateless
+# exceptions, where the only significant information they contain is their
+# type and a single str argument.
+class ExceptionChainingTest(unittest.TestCase):
+ def setUp(self):
+ # There's no way to unregister a codec search function, so we just
+ # ensure we render this one fairly harmless after the test
+ # case finishes by using the test case repr as the codec name
+ # The codecs module normalizes codec names, although this doesn't
+ # appear to be formally documented...
+ self.codec_name = repr(self).lower().replace(" ", "-")
+ self.codec_info = None
+ codecs.register(self.get_codec)
+
+ def get_codec(self, codec_name):
+ if codec_name != self.codec_name:
+ return None
+ return self.codec_info
+
+ def set_codec(self, obj_to_raise):
+ def raise_obj(*args, **kwds):
+ raise obj_to_raise
+ self.codec_info = codecs.CodecInfo(raise_obj, raise_obj,
+ name=self.codec_name)
+
+ @contextlib.contextmanager
+ def assertWrapped(self, operation, exc_type, msg):
+ full_msg = "{} with '{}' codec failed \({}: {}\)".format(
+ operation, self.codec_name, exc_type.__name__, msg)
+ with self.assertRaisesRegex(exc_type, full_msg) as caught:
+ yield caught
+
+ def check_wrapped(self, obj_to_raise, msg):
+ self.set_codec(obj_to_raise)
+ with self.assertWrapped("encoding", RuntimeError, msg):
+ "str_input".encode(self.codec_name)
+ with self.assertWrapped("encoding", RuntimeError, msg):
+ codecs.encode("str_input", self.codec_name)
+ with self.assertWrapped("decoding", RuntimeError, msg):
+ b"bytes input".decode(self.codec_name)
+ with self.assertWrapped("decoding", RuntimeError, msg):
+ codecs.decode(b"bytes input", self.codec_name)
+
+ def test_raise_by_type(self):
+ self.check_wrapped(RuntimeError, "")
+
+ def test_raise_by_value(self):
+ msg = "This should be wrapped"
+ self.check_wrapped(RuntimeError(msg), msg)
+
+ @contextlib.contextmanager
+ def assertNotWrapped(self, operation, exc_type, msg):
+ with self.assertRaisesRegex(exc_type, msg) as caught:
+ yield caught
+ actual_msg = str(caught.exception)
+ self.assertNotIn(operation, actual_msg)
+ self.assertNotIn(self.codec_name, actual_msg)
+
+ def check_not_wrapped(self, obj_to_raise, msg):
+ self.set_codec(obj_to_raise)
+ with self.assertNotWrapped("encoding", RuntimeError, msg):
+ "str input".encode(self.codec_name)
+ with self.assertNotWrapped("encoding", RuntimeError, msg):
+ codecs.encode("str input", self.codec_name)
+ with self.assertNotWrapped("decoding", RuntimeError, msg):
+ b"bytes input".decode(self.codec_name)
+ with self.assertNotWrapped("decoding", RuntimeError, msg):
+ codecs.decode(b"bytes input", self.codec_name)
+
+ def test_init_override_is_not_wrapped(self):
+ class CustomInit(RuntimeError):
+ def __init__(self):
+ pass
+ self.check_not_wrapped(CustomInit, "")
+
+ def test_new_override_is_not_wrapped(self):
+ class CustomNew(RuntimeError):
+ def __new__(cls):
+ return super().__new__(cls)
+ self.check_not_wrapped(CustomNew, "")
+
+ def test_instance_attribute_is_not_wrapped(self):
+ msg = "This should NOT be wrapped"
+ exc = RuntimeError(msg)
+ exc.attr = 1
+ self.check_not_wrapped(exc, msg)
+
+ def test_non_str_arg_is_not_wrapped(self):
+ self.check_not_wrapped(RuntimeError(1), "1")
+
+ def test_multiple_args_is_not_wrapped(self):
+ msg = "\('a', 'b', 'c'\)"
+ self.check_not_wrapped(RuntimeError('a', 'b', 'c'), msg)
@unittest.skipUnless(sys.platform == 'win32',
diff --git a/Misc/NEWS b/Misc/NEWS
index 16c0475..0083fe0 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,15 @@ Projected release date: 2013-11-24
Core and Builtins
-----------------
+- Issue #17828: Output type errors in str.encode(), bytes.decode() and
+ bytearray.decode() now direct users to codecs.encode() or codecs.decode()
+ as appropriate.
+
+- Issue #17828: The interpreter now attempts to chain errors that occur in
+ codec processing with a replacement exception of the same type that
+ includes the codec name in the error message. It ensures it only does this
+ when the creation of the replacement exception won't lose any information.
+
- Issue #19466: Clear the frames of daemon threads earlier during the
Python shutdown to call objects destructors. So "unclosed file" resource
warnings are now corretly emitted for daemon threads.
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index de5d746..53d8b66 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2591,3 +2591,116 @@ _PyExc_Fini(void)
free_preallocated_memerrors();
Py_CLEAR(errnomap);
}
+
+/* Helper to do the equivalent of "raise X from Y" in C, but always using
+ * the current exception rather than passing one in.
+ *
+ * We currently limit this to *only* exceptions that use the BaseException
+ * tp_init and tp_new methods, since we can be reasonably sure we can wrap
+ * those correctly without losing data and without losing backwards
+ * compatibility.
+ *
+ * We also aim to rule out *all* exceptions that might be storing additional
+ * state, whether by having a size difference relative to BaseException,
+ * additional arguments passed in during construction or by having a
+ * non-empty instance dict.
+ *
+ * We need to be very careful with what we wrap, since changing types to
+ * a broader exception type would be backwards incompatible for
+ * existing codecs, and with different init or new method implementations
+ * may either not support instantiation with PyErr_Format or lose
+ * information when instantiated that way.
+ *
+ * XXX (ncoghlan): This could be made more comprehensive by exploiting the
+ * fact that exceptions are expected to support pickling. If more builtin
+ * exceptions (e.g. AttributeError) start to be converted to rich
+ * exceptions with additional attributes, that's probably a better approach
+ * to pursue over adding special cases for particular stateful subclasses.
+ *
+ * Returns a borrowed reference to the new exception (if any), NULL if the
+ * existing exception was left in place.
+ */
+PyObject *
+_PyErr_TrySetFromCause(const char *format, ...)
+{
+ PyObject* msg_prefix;
+ PyObject *exc, *val, *tb;
+ PyTypeObject *caught_type;
+ PyObject *instance_dict;
+ PyObject *instance_args;
+ Py_ssize_t num_args;
+ PyObject *new_exc, *new_val, *new_tb;
+ va_list vargs;
+
+#ifdef HAVE_STDARG_PROTOTYPES
+ va_start(vargs, format);
+#else
+ va_start(vargs);
+#endif
+
+ PyErr_Fetch(&exc, &val, &tb);
+ caught_type = (PyTypeObject *) exc;
+ /* Ensure type info indicates no extra state is stored at the C level */
+ if (caught_type->tp_init != (initproc) BaseException_init ||
+ caught_type->tp_new != BaseException_new ||
+ caught_type->tp_basicsize != _PyExc_BaseException.tp_basicsize ||
+ caught_type->tp_itemsize != _PyExc_BaseException.tp_itemsize
+ ) {
+ /* We can't be sure we can wrap this safely, since it may contain
+ * more state than just the exception type. Accordingly, we just
+ * leave it alone.
+ */
+ PyErr_Restore(exc, val, tb);
+ return NULL;
+ }
+
+ /* Check the args are empty or contain a single string */
+ PyErr_NormalizeException(&exc, &val, &tb);
+ instance_args = ((PyBaseExceptionObject *) val)->args;
+ num_args = PyTuple_GET_SIZE(instance_args);
+ if ((num_args > 1) ||
+ (num_args == 1 &&
+ !PyUnicode_CheckExact(PyTuple_GET_ITEM(instance_args, 0))
+ )
+ ) {
+ /* More than 1 arg, or the one arg we do have isn't a string
+ */
+ PyErr_Restore(exc, val, tb);
+ return NULL;
+ }
+
+ /* Ensure the instance dict is also empty */
+ instance_dict = *_PyObject_GetDictPtr(val);
+ if (instance_dict != NULL && PyObject_Length(instance_dict) > 0) {
+ /* While we could potentially copy a non-empty instance dictionary
+ * to the replacement exception, for now we take the more
+ * conservative path of leaving exceptions with attributes set
+ * alone.
+ */
+ PyErr_Restore(exc, val, tb);
+ return NULL;
+ }
+
+ /* For exceptions that we can wrap safely, we chain the original
+ * exception to a new one of the exact same type with an
+ * error message that mentions the additional details and the
+ * original exception.
+ *
+ * It would be nice to wrap OSError and various other exception
+ * types as well, but that's quite a bit trickier due to the extra
+ * state potentially stored on OSError instances.
+ */
+ msg_prefix = PyUnicode_FromFormatV(format, vargs);
+ if (msg_prefix == NULL)
+ return NULL;
+
+ PyErr_Format(exc, "%U (%s: %S)",
+ msg_prefix, Py_TYPE(val)->tp_name, val);
+ Py_DECREF(exc);
+ Py_XDECREF(tb);
+ PyErr_Fetch(&new_exc, &new_val, &new_tb);
+ PyErr_NormalizeException(&new_exc, &new_val, &new_tb);
+ PyException_SetCause(new_val, val);
+ PyErr_Restore(new_exc, new_val, new_tb);
+ return new_val;
+}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 224a80b..7789816 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3054,8 +3054,10 @@ PyUnicode_Decode(const char *s,
goto onError;
if (!PyUnicode_Check(unicode)) {
PyErr_Format(PyExc_TypeError,
- "decoder did not return a str object (type=%.400s)",
- Py_TYPE(unicode)->tp_name);
+ "'%.400s' decoder returned '%.400s' instead of 'str'; "
+ "use codecs.decode() to decode to arbitrary types",
+ encoding,
+ Py_TYPE(unicode)->tp_name, Py_TYPE(unicode)->tp_name);
Py_DECREF(unicode);
goto onError;
}
@@ -3113,8 +3115,10 @@ PyUnicode_AsDecodedUnicode(PyObject *unicode,
goto onError;
if (!PyUnicode_Check(v)) {
PyErr_Format(PyExc_TypeError,
- "decoder did not return a str object (type=%.400s)",
- Py_TYPE(v)->tp_name);
+ "'%.400s' decoder returned '%.400s' instead of 'str'; "
+ "use codecs.decode() to decode to arbitrary types",
+ encoding,
+ Py_TYPE(unicode)->tp_name, Py_TYPE(unicode)->tp_name);
Py_DECREF(v);
goto onError;
}
@@ -3425,7 +3429,8 @@ PyUnicode_AsEncodedString(PyObject *unicode,
PyObject *b;
error = PyErr_WarnFormat(PyExc_RuntimeWarning, 1,
- "encoder %s returned bytearray instead of bytes",
+ "encoder %s returned bytearray instead of bytes; "
+ "use codecs.encode() to encode to arbitrary types",
encoding);
if (error) {
Py_DECREF(v);
@@ -3438,8 +3443,10 @@ PyUnicode_AsEncodedString(PyObject *unicode,
}
PyErr_Format(PyExc_TypeError,
- "encoder did not return a bytes object (type=%.400s)",
- Py_TYPE(v)->tp_name);
+ "'%.400s' encoder returned '%.400s' instead of 'bytes'; "
+ "use codecs.encode() to encode to arbitrary types",
+ encoding,
+ Py_TYPE(v)->tp_name, Py_TYPE(v)->tp_name);
Py_DECREF(v);
return NULL;
}
@@ -3465,8 +3472,10 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
goto onError;
if (!PyUnicode_Check(v)) {
PyErr_Format(PyExc_TypeError,
- "encoder did not return an str object (type=%.400s)",
- Py_TYPE(v)->tp_name);
+ "'%.400s' encoder returned '%.400s' instead of 'str'; "
+ "use codecs.encode() to encode to arbitrary types",
+ encoding,
+ Py_TYPE(v)->tp_name, Py_TYPE(v)->tp_name);
Py_DECREF(v);
goto onError;
}
diff --git a/Python/codecs.c b/Python/codecs.c
index c541ba0..e2edc26 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -332,6 +332,22 @@ PyObject *PyCodec_StreamWriter(const char *encoding,
return codec_getstreamcodec(encoding, stream, errors, 3);
}
+/* Helper that tries to ensure the reported exception chain indicates the
+ * codec that was invoked to trigger the failure without changing the type
+ * of the exception raised.
+ */
+static void
+wrap_codec_error(const char *operation,
+ const char *encoding)
+{
+ /* TrySetFromCause will replace the active exception with a suitably
+ * updated clone if it can, otherwise it will leave the original
+ * exception alone.
+ */
+ _PyErr_TrySetFromCause("%s with '%s' codec failed",
+ operation, encoding);
+}
+
/* Encode an object (e.g. an Unicode object) using the given encoding
and return the resulting encoded object (usually a Python string).
@@ -376,6 +392,7 @@ PyObject *PyCodec_Encode(PyObject *object,
Py_XDECREF(result);
Py_XDECREF(args);
Py_XDECREF(encoder);
+ wrap_codec_error("encoding", encoding);
return NULL;
}
@@ -422,6 +439,7 @@ PyObject *PyCodec_Decode(PyObject *object,
Py_XDECREF(args);
Py_XDECREF(decoder);
Py_XDECREF(result);
+ wrap_codec_error("decoding", encoding);
return NULL;
}