From e6b25e9a09dbe09839b36f97b9174a30b1db2dbf Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 23 Jul 2024 20:02:54 +0300 Subject: gh-122163: Add notes for JSON serialization errors (GH-122165) This allows to identify the source of the error. --- Doc/whatsnew/3.14.rst | 7 ++ Include/internal/pycore_pyerrors.h | 3 +- Lib/json/encoder.py | 119 ++++++++++++--------- Lib/test/test_json/test_default.py | 18 ++++ Lib/test/test_json/test_fail.py | 21 +++- Lib/test/test_json/test_recursion.py | 18 ++-- .../2024-07-23-15-11-13.gh-issue-122163.4wRUuM.rst | 2 + Modules/_json.c | 13 ++- 8 files changed, 135 insertions(+), 66 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-07-23-15-11-13.gh-issue-122163.4wRUuM.rst diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index f45a44b..bd8bdcb 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -112,6 +112,13 @@ Added support for converting any objects that have the :meth:`!as_integer_ratio` method to a :class:`~fractions.Fraction`. (Contributed by Serhiy Storchaka in :gh:`82017`.) +json +---- + +Add notes for JSON serialization errors that allow to identify the source +of the error. +(Contributed by Serhiy Storchaka in :gh:`122163`.) + os -- diff --git a/Include/internal/pycore_pyerrors.h b/Include/internal/pycore_pyerrors.h index 1507163..9835e49 100644 --- a/Include/internal/pycore_pyerrors.h +++ b/Include/internal/pycore_pyerrors.h @@ -161,7 +161,8 @@ extern PyObject* _Py_Offer_Suggestions(PyObject* exception); PyAPI_FUNC(Py_ssize_t) _Py_UTF8_Edit_Cost(PyObject *str_a, PyObject *str_b, Py_ssize_t max_cost); -void _PyErr_FormatNote(const char *format, ...); +// Export for '_json' shared extension +PyAPI_FUNC(void) _PyErr_FormatNote(const char *format, ...); /* Context manipulation (PEP 3134) */ diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py index 323332f..b804224 100644 --- a/Lib/json/encoder.py +++ b/Lib/json/encoder.py @@ -293,37 +293,40 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, else: newline_indent = None separator = _item_separator - first = True - for value in lst: - if first: - first = False - else: + for i, value in enumerate(lst): + if i: buf = separator - if isinstance(value, str): - yield buf + _encoder(value) - elif value is None: - yield buf + 'null' - elif value is True: - yield buf + 'true' - elif value is False: - yield buf + 'false' - elif isinstance(value, int): - # Subclasses of int/float may override __repr__, but we still - # want to encode them as integers/floats in JSON. One example - # within the standard library is IntEnum. - yield buf + _intstr(value) - elif isinstance(value, float): - # see comment above for int - yield buf + _floatstr(value) - else: - yield buf - if isinstance(value, (list, tuple)): - chunks = _iterencode_list(value, _current_indent_level) - elif isinstance(value, dict): - chunks = _iterencode_dict(value, _current_indent_level) + try: + if isinstance(value, str): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, int): + # Subclasses of int/float may override __repr__, but we still + # want to encode them as integers/floats in JSON. One example + # within the standard library is IntEnum. + yield buf + _intstr(value) + elif isinstance(value, float): + # see comment above for int + yield buf + _floatstr(value) else: - chunks = _iterencode(value, _current_indent_level) - yield from chunks + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + yield from chunks + except GeneratorExit: + raise + except BaseException as exc: + exc.add_note(f'when serializing {type(lst).__name__} item {i}') + raise if newline_indent is not None: _current_indent_level -= 1 yield '\n' + _indent * _current_indent_level @@ -382,28 +385,34 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, yield item_separator yield _encoder(key) yield _key_separator - if isinstance(value, str): - yield _encoder(value) - elif value is None: - yield 'null' - elif value is True: - yield 'true' - elif value is False: - yield 'false' - elif isinstance(value, int): - # see comment for int/float in _make_iterencode - yield _intstr(value) - elif isinstance(value, float): - # see comment for int/float in _make_iterencode - yield _floatstr(value) - else: - if isinstance(value, (list, tuple)): - chunks = _iterencode_list(value, _current_indent_level) - elif isinstance(value, dict): - chunks = _iterencode_dict(value, _current_indent_level) + try: + if isinstance(value, str): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, int): + # see comment for int/float in _make_iterencode + yield _intstr(value) + elif isinstance(value, float): + # see comment for int/float in _make_iterencode + yield _floatstr(value) else: - chunks = _iterencode(value, _current_indent_level) - yield from chunks + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + yield from chunks + except GeneratorExit: + raise + except BaseException as exc: + exc.add_note(f'when serializing {type(dct).__name__} item {key!r}') + raise if newline_indent is not None: _current_indent_level -= 1 yield '\n' + _indent * _current_indent_level @@ -436,8 +445,14 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, if markerid in markers: raise ValueError("Circular reference detected") markers[markerid] = o - o = _default(o) - yield from _iterencode(o, _current_indent_level) + newobj = _default(o) + try: + yield from _iterencode(newobj, _current_indent_level) + except GeneratorExit: + raise + except BaseException as exc: + exc.add_note(f'when serializing {type(o).__name__} object') + raise if markers is not None: del markers[markerid] return _iterencode diff --git a/Lib/test/test_json/test_default.py b/Lib/test/test_json/test_default.py index 3ce1668..811880a 100644 --- a/Lib/test/test_json/test_default.py +++ b/Lib/test/test_json/test_default.py @@ -8,6 +8,24 @@ class TestDefault: self.dumps(type, default=repr), self.dumps(repr(type))) + def test_bad_default(self): + def default(obj): + if obj is NotImplemented: + raise ValueError + if obj is ...: + return NotImplemented + if obj is type: + return collections + return [...] + + with self.assertRaises(ValueError) as cm: + self.dumps(type, default=default) + self.assertEqual(cm.exception.__notes__, + ['when serializing ellipsis object', + 'when serializing list item 0', + 'when serializing module object', + 'when serializing type object']) + def test_ordereddict(self): od = collections.OrderedDict(a=1, b=2, c=3, d=4) od.move_to_end('b') diff --git a/Lib/test/test_json/test_fail.py b/Lib/test/test_json/test_fail.py index a74240f..7c1696c 100644 --- a/Lib/test/test_json/test_fail.py +++ b/Lib/test/test_json/test_fail.py @@ -100,8 +100,27 @@ class TestFail: def test_not_serializable(self): import sys with self.assertRaisesRegex(TypeError, - 'Object of type module is not JSON serializable'): + 'Object of type module is not JSON serializable') as cm: self.dumps(sys) + self.assertFalse(hasattr(cm.exception, '__notes__')) + + with self.assertRaises(TypeError) as cm: + self.dumps([1, [2, 3, sys]]) + self.assertEqual(cm.exception.__notes__, + ['when serializing list item 2', + 'when serializing list item 1']) + + with self.assertRaises(TypeError) as cm: + self.dumps((1, (2, 3, sys))) + self.assertEqual(cm.exception.__notes__, + ['when serializing tuple item 2', + 'when serializing tuple item 1']) + + with self.assertRaises(TypeError) as cm: + self.dumps({'a': {'b': sys}}) + self.assertEqual(cm.exception.__notes__, + ["when serializing dict item 'b'", + "when serializing dict item 'a'"]) def test_truncated_input(self): test_cases = [ diff --git a/Lib/test/test_json/test_recursion.py b/Lib/test/test_json/test_recursion.py index 164ff20..290207e 100644 --- a/Lib/test/test_json/test_recursion.py +++ b/Lib/test/test_json/test_recursion.py @@ -12,8 +12,8 @@ class TestRecursion: x.append(x) try: self.dumps(x) - except ValueError: - pass + except ValueError as exc: + self.assertEqual(exc.__notes__, ["when serializing list item 0"]) else: self.fail("didn't raise ValueError on list recursion") x = [] @@ -21,8 +21,8 @@ class TestRecursion: x.append(y) try: self.dumps(x) - except ValueError: - pass + except ValueError as exc: + self.assertEqual(exc.__notes__, ["when serializing list item 0"]*2) else: self.fail("didn't raise ValueError on alternating list recursion") y = [] @@ -35,8 +35,8 @@ class TestRecursion: x["test"] = x try: self.dumps(x) - except ValueError: - pass + except ValueError as exc: + self.assertEqual(exc.__notes__, ["when serializing dict item 'test'"]) else: self.fail("didn't raise ValueError on dict recursion") x = {} @@ -60,8 +60,10 @@ class TestRecursion: enc.recurse = True try: enc.encode(JSONTestObject) - except ValueError: - pass + except ValueError as exc: + self.assertEqual(exc.__notes__, + ["when serializing list item 0", + "when serializing type object"]) else: self.fail("didn't raise ValueError on default recursion") diff --git a/Misc/NEWS.d/next/Library/2024-07-23-15-11-13.gh-issue-122163.4wRUuM.rst b/Misc/NEWS.d/next/Library/2024-07-23-15-11-13.gh-issue-122163.4wRUuM.rst new file mode 100644 index 0000000..a4625c2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-23-15-11-13.gh-issue-122163.4wRUuM.rst @@ -0,0 +1,2 @@ +Add notes for JSON serialization errors that allow to identify the source of +the error. diff --git a/Modules/_json.c b/Modules/_json.c index c7fe156..9e29de0 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -11,6 +11,7 @@ #include "Python.h" #include "pycore_ceval.h" // _Py_EnterRecursiveCall() #include "pycore_runtime.h" // _PyRuntime +#include "pycore_pyerrors.h" // _PyErr_FormatNote #include "pycore_global_strings.h" // _Py_ID() #include // bool @@ -1461,6 +1462,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, Py_DECREF(newobj); if (rv) { + _PyErr_FormatNote("when serializing %T object", obj); Py_XDECREF(ident); return -1; } @@ -1477,7 +1479,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, static int encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first, - PyObject *key, PyObject *value, + PyObject *dct, PyObject *key, PyObject *value, PyObject *newline_indent, PyObject *item_separator) { @@ -1535,6 +1537,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir return -1; } if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) { + _PyErr_FormatNote("when serializing %T item %R", dct, key); return -1; } return 0; @@ -1606,7 +1609,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, key = PyTuple_GET_ITEM(item, 0); value = PyTuple_GET_ITEM(item, 1); - if (encoder_encode_key_value(s, writer, &first, key, value, + if (encoder_encode_key_value(s, writer, &first, dct, key, value, new_newline_indent, current_item_separator) < 0) goto bail; @@ -1616,7 +1619,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } else { Py_ssize_t pos = 0; while (PyDict_Next(dct, &pos, &key, &value)) { - if (encoder_encode_key_value(s, writer, &first, key, value, + if (encoder_encode_key_value(s, writer, &first, dct, key, value, new_newline_indent, current_item_separator) < 0) goto bail; @@ -1710,8 +1713,10 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) goto bail; } - if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) + if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) { + _PyErr_FormatNote("when serializing %T item %zd", seq, i); goto bail; + } } if (ident != NULL) { if (PyDict_DelItem(s->markers, ident)) -- cgit v0.12