From 76350e85ebf96df588384f3d9bdc20d11045bef4 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Tue, 21 Mar 2023 21:36:31 +0000 Subject: gh-102406: replace exception chaining by PEP-678 notes in codecs (#102407) --- Include/cpython/pyerrors.h | 18 ---- Lib/test/test_codecs.py | 106 ++++++++------------ .../2023-03-03-23-21-16.gh-issue-102406.XLqYO3.rst | 1 + Objects/exceptions.c | 110 --------------------- Python/codecs.c | 35 ++++--- 5 files changed, 64 insertions(+), 206 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-03-03-23-21-16.gh-issue-102406.XLqYO3.rst diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h index d0300f6..65bdc94 100644 --- a/Include/cpython/pyerrors.h +++ b/Include/cpython/pyerrors.h @@ -116,24 +116,6 @@ PyAPI_FUNC(int) _PyException_AddNote( PyObject *exc, PyObject *note); -/* Helper that attempts to replace the current exception with one of the - * same type but with a prefix added to the exception text. The resulting - * exception description looks like: - * - * prefix (exc_type: original_exc_str) - * - * Only some exceptions can be safely replaced. If the function determines - * it isn't safe to perform the replacement, it will leave the original - * unmodified exception in place. - * - * Returns a borrowed reference to the new exception (if any), NULL if the - * existing exception was left in place. - */ -PyAPI_FUNC(PyObject *) _PyErr_TrySetFromCause( - const char *prefix_format, /* ASCII-encoded string */ - ... - ); - /* In signalmodule.c */ int PySignal_SetWakeupFd(int fd); diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index e3add0c..376175f 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -2819,24 +2819,19 @@ class TransformCodecTest(unittest.TestCase): self.assertIsNone(failure.exception.__cause__) @unittest.skipUnless(zlib, "Requires zlib support") - def test_custom_zlib_error_is_wrapped(self): + def test_custom_zlib_error_is_noted(self): # Check zlib codec gives a good error for malformed input - msg = "^decoding with 'zlib_codec' codec failed" - with self.assertRaisesRegex(Exception, msg) as failure: + msg = "decoding with 'zlib_codec' codec failed" + with self.assertRaises(Exception) as failure: codecs.decode(b"hello", "zlib_codec") - self.assertIsInstance(failure.exception.__cause__, - type(failure.exception)) + self.assertEqual(msg, failure.exception.__notes__[0]) - def test_custom_hex_error_is_wrapped(self): + def test_custom_hex_error_is_noted(self): # Check hex codec gives a good error for malformed input - msg = "^decoding with 'hex_codec' codec failed" - with self.assertRaisesRegex(Exception, msg) as failure: + msg = "decoding with 'hex_codec' codec failed" + with self.assertRaises(Exception) as failure: codecs.decode(b"hello", "hex_codec") - self.assertIsInstance(failure.exception.__cause__, - type(failure.exception)) - - # Unfortunately, the bz2 module throws OSError, which the codec - # machinery currently can't wrap :( + self.assertEqual(msg, failure.exception.__notes__[0]) # Ensure codec aliases from http://bugs.python.org/issue7475 work def test_aliases(self): @@ -2860,11 +2855,8 @@ class TransformCodecTest(unittest.TestCase): self.assertRaises(ValueError, codecs.decode, b"", "uu-codec") -# The codec system tries to wrap exceptions in order to ensure the error -# mentions the operation being performed and the codec involved. We -# currently *only* want this to happen for relatively stateless -# exceptions, where the only significant information they contain is their -# type and a single str argument. +# The codec system tries to add notes to exceptions in order to ensure +# the error mentions the operation being performed and the codec involved. # Use a local codec registry to avoid appearing to leak objects when # registering multiple search functions @@ -2874,10 +2866,10 @@ def _get_test_codec(codec_name): return _TEST_CODECS.get(codec_name) -class ExceptionChainingTest(unittest.TestCase): +class ExceptionNotesTest(unittest.TestCase): def setUp(self): - self.codec_name = 'exception_chaining_test' + self.codec_name = 'exception_notes_test' codecs.register(_get_test_codec) self.addCleanup(codecs.unregister, _get_test_codec) @@ -2901,91 +2893,77 @@ class ExceptionChainingTest(unittest.TestCase): _TEST_CODECS[self.codec_name] = codec_info @contextlib.contextmanager - def assertWrapped(self, operation, exc_type, msg): - full_msg = r"{} with {!r} codec failed \({}: {}\)".format( - operation, self.codec_name, exc_type.__name__, msg) - with self.assertRaisesRegex(exc_type, full_msg) as caught: + def assertNoted(self, operation, exc_type, msg): + full_msg = r"{} with {!r} codec failed".format( + operation, self.codec_name) + with self.assertRaises(exc_type) as caught: yield caught - self.assertIsInstance(caught.exception.__cause__, exc_type) - self.assertIsNotNone(caught.exception.__cause__.__traceback__) + self.assertIn(full_msg, caught.exception.__notes__[0]) + caught.exception.__notes__.clear() def raise_obj(self, *args, **kwds): # Helper to dynamically change the object raised by a test codec raise self.obj_to_raise - def check_wrapped(self, obj_to_raise, msg, exc_type=RuntimeError): + def check_note(self, obj_to_raise, msg, exc_type=RuntimeError): self.obj_to_raise = obj_to_raise self.set_codec(self.raise_obj, self.raise_obj) - with self.assertWrapped("encoding", exc_type, msg): + with self.assertNoted("encoding", exc_type, msg): "str_input".encode(self.codec_name) - with self.assertWrapped("encoding", exc_type, msg): + with self.assertNoted("encoding", exc_type, msg): codecs.encode("str_input", self.codec_name) - with self.assertWrapped("decoding", exc_type, msg): + with self.assertNoted("decoding", exc_type, msg): b"bytes input".decode(self.codec_name) - with self.assertWrapped("decoding", exc_type, msg): + with self.assertNoted("decoding", exc_type, msg): codecs.decode(b"bytes input", self.codec_name) def test_raise_by_type(self): - self.check_wrapped(RuntimeError, "") + self.check_note(RuntimeError, "") def test_raise_by_value(self): - msg = "This should be wrapped" - self.check_wrapped(RuntimeError(msg), msg) + msg = "This should be noted" + self.check_note(RuntimeError(msg), msg) def test_raise_grandchild_subclass_exact_size(self): - msg = "This should be wrapped" + msg = "This should be noted" class MyRuntimeError(RuntimeError): __slots__ = () - self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError) + self.check_note(MyRuntimeError(msg), msg, MyRuntimeError) def test_raise_subclass_with_weakref_support(self): - msg = "This should be wrapped" + msg = "This should be noted" class MyRuntimeError(RuntimeError): pass - self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError) - - def check_not_wrapped(self, obj_to_raise, msg): - def raise_obj(*args, **kwds): - raise obj_to_raise - self.set_codec(raise_obj, raise_obj) - with self.assertRaisesRegex(RuntimeError, msg): - "str input".encode(self.codec_name) - with self.assertRaisesRegex(RuntimeError, msg): - codecs.encode("str input", self.codec_name) - with self.assertRaisesRegex(RuntimeError, msg): - b"bytes input".decode(self.codec_name) - with self.assertRaisesRegex(RuntimeError, msg): - codecs.decode(b"bytes input", self.codec_name) + self.check_note(MyRuntimeError(msg), msg, MyRuntimeError) - def test_init_override_is_not_wrapped(self): + def test_init_override(self): class CustomInit(RuntimeError): def __init__(self): pass - self.check_not_wrapped(CustomInit, "") + self.check_note(CustomInit, "") - def test_new_override_is_not_wrapped(self): + def test_new_override(self): class CustomNew(RuntimeError): def __new__(cls): return super().__new__(cls) - self.check_not_wrapped(CustomNew, "") + self.check_note(CustomNew, "") - def test_instance_attribute_is_not_wrapped(self): - msg = "This should NOT be wrapped" + def test_instance_attribute(self): + msg = "This should be noted" exc = RuntimeError(msg) exc.attr = 1 - self.check_not_wrapped(exc, "^{}$".format(msg)) + self.check_note(exc, "^{}$".format(msg)) - def test_non_str_arg_is_not_wrapped(self): - self.check_not_wrapped(RuntimeError(1), "1") + def test_non_str_arg(self): + self.check_note(RuntimeError(1), "1") - def test_multiple_args_is_not_wrapped(self): + def test_multiple_args(self): msg_re = r"^\('a', 'b', 'c'\)$" - self.check_not_wrapped(RuntimeError('a', 'b', 'c'), msg_re) + self.check_note(RuntimeError('a', 'b', 'c'), msg_re) # http://bugs.python.org/issue19609 - def test_codec_lookup_failure_not_wrapped(self): + def test_codec_lookup_failure(self): msg = "^unknown encoding: {}$".format(self.codec_name) - # The initial codec lookup should not be wrapped with self.assertRaisesRegex(LookupError, msg): "str input".encode(self.codec_name) with self.assertRaisesRegex(LookupError, msg): diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-03-03-23-21-16.gh-issue-102406.XLqYO3.rst b/Misc/NEWS.d/next/Core and Builtins/2023-03-03-23-21-16.gh-issue-102406.XLqYO3.rst new file mode 100644 index 0000000..e0d061c --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-03-03-23-21-16.gh-issue-102406.XLqYO3.rst @@ -0,0 +1 @@ +:mod:`codecs` encoding/decoding errors now get the context information (which operation and which codecs) attached as :pep:`678` notes instead of through chaining a new instance of the exception. diff --git a/Objects/exceptions.c b/Objects/exceptions.c index d69f740..a355244 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -3764,113 +3764,3 @@ _PyException_AddNote(PyObject *exc, PyObject *note) return res; } -/* Helper to do the equivalent of "raise X from Y" in C, but always using - * the current exception rather than passing one in. - * - * We currently limit this to *only* exceptions that use the BaseException - * tp_init and tp_new methods, since we can be reasonably sure we can wrap - * those correctly without losing data and without losing backwards - * compatibility. - * - * We also aim to rule out *all* exceptions that might be storing additional - * state, whether by having a size difference relative to BaseException, - * additional arguments passed in during construction or by having a - * non-empty instance dict. - * - * We need to be very careful with what we wrap, since changing types to - * a broader exception type would be backwards incompatible for - * existing codecs, and with different init or new method implementations - * may either not support instantiation with PyErr_Format or lose - * information when instantiated that way. - * - * XXX (ncoghlan): This could be made more comprehensive by exploiting the - * fact that exceptions are expected to support pickling. If more builtin - * exceptions (e.g. AttributeError) start to be converted to rich - * exceptions with additional attributes, that's probably a better approach - * to pursue over adding special cases for particular stateful subclasses. - * - * Returns a borrowed reference to the new exception (if any), NULL if the - * existing exception was left in place. - */ -PyObject * -_PyErr_TrySetFromCause(const char *format, ...) -{ - PyObject* msg_prefix; - PyObject *instance_args; - Py_ssize_t num_args, caught_type_size, base_exc_size; - va_list vargs; - int same_basic_size; - - PyObject *exc = PyErr_GetRaisedException(); - PyTypeObject *caught_type = Py_TYPE(exc); - /* Ensure type info indicates no extra state is stored at the C level - * and that the type can be reinstantiated using PyErr_Format - */ - caught_type_size = caught_type->tp_basicsize; - base_exc_size = _PyExc_BaseException.tp_basicsize; - same_basic_size = ( - caught_type_size == base_exc_size || - (_PyType_SUPPORTS_WEAKREFS(caught_type) && - (caught_type_size == base_exc_size + (Py_ssize_t)sizeof(PyObject *)) - ) - ); - if (caught_type->tp_init != (initproc)BaseException_init || - caught_type->tp_new != BaseException_new || - !same_basic_size || - caught_type->tp_itemsize != _PyExc_BaseException.tp_itemsize) { - /* We can't be sure we can wrap this safely, since it may contain - * more state than just the exception type. Accordingly, we just - * leave it alone. - */ - PyErr_SetRaisedException(exc); - return NULL; - } - - /* Check the args are empty or contain a single string */ - instance_args = ((PyBaseExceptionObject *)exc)->args; - num_args = PyTuple_GET_SIZE(instance_args); - if (num_args > 1 || - (num_args == 1 && - !PyUnicode_CheckExact(PyTuple_GET_ITEM(instance_args, 0)))) { - /* More than 1 arg, or the one arg we do have isn't a string - */ - PyErr_SetRaisedException(exc); - return NULL; - } - - /* Ensure the instance dict is also empty */ - if (!_PyObject_IsInstanceDictEmpty(exc)) { - /* While we could potentially copy a non-empty instance dictionary - * to the replacement exception, for now we take the more - * conservative path of leaving exceptions with attributes set - * alone. - */ - PyErr_SetRaisedException(exc); - return NULL; - } - - /* For exceptions that we can wrap safely, we chain the original - * exception to a new one of the exact same type with an - * error message that mentions the additional details and the - * original exception. - * - * It would be nice to wrap OSError and various other exception - * types as well, but that's quite a bit trickier due to the extra - * state potentially stored on OSError instances. - */ - va_start(vargs, format); - msg_prefix = PyUnicode_FromFormatV(format, vargs); - va_end(vargs); - if (msg_prefix == NULL) { - Py_DECREF(exc); - return NULL; - } - - PyErr_Format((PyObject*)Py_TYPE(exc), "%U (%s: %S)", - msg_prefix, Py_TYPE(exc)->tp_name, exc); - Py_DECREF(msg_prefix); - PyObject *new_exc = PyErr_GetRaisedException(); - PyException_SetCause(new_exc, exc); - PyErr_SetRaisedException(new_exc); - return new_exc; -} diff --git a/Python/codecs.c b/Python/codecs.c index b2087b4..9d800f9 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -382,20 +382,27 @@ PyObject *PyCodec_StreamWriter(const char *encoding, return codec_getstreamcodec(encoding, stream, errors, 3); } -/* Helper that tries to ensure the reported exception chain indicates the - * codec that was invoked to trigger the failure without changing the type - * of the exception raised. - */ static void -wrap_codec_error(const char *operation, - const char *encoding) +add_note_to_codec_error(const char *operation, + const char *encoding) { - /* TrySetFromCause will replace the active exception with a suitably - * updated clone if it can, otherwise it will leave the original - * exception alone. - */ - _PyErr_TrySetFromCause("%s with '%s' codec failed", - operation, encoding); + PyObject *exc = PyErr_GetRaisedException(); + if (exc == NULL) { + return; + } + PyObject *note = PyUnicode_FromFormat("%s with '%s' codec failed", + operation, encoding); + if (note == NULL) { + _PyErr_ChainExceptions1(exc); + return; + } + int res = _PyException_AddNote(exc, note); + Py_DECREF(note); + if (res < 0) { + _PyErr_ChainExceptions1(exc); + return; + } + PyErr_SetRaisedException(exc); } /* Encode an object (e.g. a Unicode object) using the given encoding @@ -418,7 +425,7 @@ _PyCodec_EncodeInternal(PyObject *object, result = PyObject_Call(encoder, args, NULL); if (result == NULL) { - wrap_codec_error("encoding", encoding); + add_note_to_codec_error("encoding", encoding); goto onError; } @@ -463,7 +470,7 @@ _PyCodec_DecodeInternal(PyObject *object, result = PyObject_Call(decoder, args, NULL); if (result == NULL) { - wrap_codec_error("decoding", encoding); + add_note_to_codec_error("decoding", encoding); goto onError; } if (!PyTuple_Check(result) || -- cgit v0.12