From 37623ab5f12b9034b1935f9507c3a5d1786dfd9e Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Thu, 3 Jan 2013 08:44:15 +0200 Subject: #16009: JSON error messages now provide more information. Patch by Serhiy Storchaka. --- Lib/json/decoder.py | 14 ++++---- Lib/json/scanner.py | 4 +-- Lib/test/json_tests/test_fail.py | 77 ++++++++++++++++++++++++++++++++++++++++ Misc/NEWS | 2 ++ Modules/_json.c | 48 ++++++++++++------------- 5 files changed, 112 insertions(+), 33 deletions(-) diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index 9b7438c..7e5a099 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -188,8 +188,8 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, try: value, end = scan_once(s, end) - except StopIteration: - raise ValueError(errmsg("Expecting object", s, end)) + except StopIteration as err: + raise ValueError(errmsg("Expecting value", s, err.value)) from None pairs_append((key, value)) try: nextchar = s[end] @@ -232,8 +232,8 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): while True: try: value, end = scan_once(s, end) - except StopIteration: - raise ValueError(errmsg("Expecting object", s, end)) + except StopIteration as err: + raise ValueError(errmsg("Expecting value", s, err.value)) from None _append(value) nextchar = s[end:end + 1] if nextchar in _ws: @@ -243,7 +243,7 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): if nextchar == ']': break elif nextchar != ',': - raise ValueError(errmsg("Expecting ',' delimiter", s, end)) + raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) try: if s[end] in _ws: end += 1 @@ -358,6 +358,6 @@ class JSONDecoder(object): """ try: obj, end = self.scan_once(s, idx) - except StopIteration: - raise ValueError("No JSON object could be decoded") + except StopIteration as err: + raise ValueError(errmsg("Expecting value", s, err.value)) from None return obj, end diff --git a/Lib/json/scanner.py b/Lib/json/scanner.py index 23eef61..86426cd 100644 --- a/Lib/json/scanner.py +++ b/Lib/json/scanner.py @@ -29,7 +29,7 @@ def py_make_scanner(context): try: nextchar = string[idx] except IndexError: - raise StopIteration + raise StopIteration(idx) if nextchar == '"': return parse_string(string, idx + 1, strict) @@ -60,7 +60,7 @@ def py_make_scanner(context): elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': return parse_constant('-Infinity'), idx + 9 else: - raise StopIteration + raise StopIteration(idx) def scan_once(string, idx): try: diff --git a/Lib/test/json_tests/test_fail.py b/Lib/test/json_tests/test_fail.py index 7809056..a2dc29a 100644 --- a/Lib/test/json_tests/test_fail.py +++ b/Lib/test/json_tests/test_fail.py @@ -1,4 +1,5 @@ from test.json_tests import PyTest, CTest +import re # 2007-10-05 JSONDOCS = [ @@ -100,6 +101,82 @@ class TestFail: #This is for python encoder self.assertRaises(TypeError, self.dumps, data, indent=True) + def test_truncated_input(self): + test_cases = [ + ('', 'Expecting value', 0), + ('[', 'Expecting value', 1), + ('[42', "Expecting ',' delimiter", 3), + ('[42,', 'Expecting value', 4), + ('["', 'Unterminated string starting at', 1), + ('["spam', 'Unterminated string starting at', 1), + ('["spam"', "Expecting ',' delimiter", 7), + ('["spam",', 'Expecting value', 8), + ('{', 'Expecting property name enclosed in double quotes', 1), + ('{"', 'Unterminated string starting at', 1), + ('{"spam', 'Unterminated string starting at', 1), + ('{"spam"', "Expecting ':' delimiter", 7), + ('{"spam":', 'Expecting value', 8), + ('{"spam":42', "Expecting ',' delimiter", 10), + ('{"spam":42,', 'Expecting property name enclosed in double quotes', 11), + ] + test_cases += [ + ('"', 'Unterminated string starting at', 0), + ('"spam', 'Unterminated string starting at', 0), + ] + for data, msg, idx in test_cases: + self.assertRaisesRegex(ValueError, + r'^{0}: line 1 column {1} \(char {1}\)'.format( + re.escape(msg), idx), + self.loads, data) + + def test_unexpected_data(self): + test_cases = [ + ('[,', 'Expecting value', 1), + ('{"spam":[}', 'Expecting value', 9), + ('[42:', "Expecting ',' delimiter", 3), + ('[42 "spam"', "Expecting ',' delimiter", 4), + ('[42,]', 'Expecting value', 4), + ('{"spam":[42}', "Expecting ',' delimiter", 11), + ('["]', 'Unterminated string starting at', 1), + ('["spam":', "Expecting ',' delimiter", 7), + ('["spam",]', 'Expecting value', 8), + ('{:', 'Expecting property name enclosed in double quotes', 1), + ('{,', 'Expecting property name enclosed in double quotes', 1), + ('{42', 'Expecting property name enclosed in double quotes', 1), + ('[{]', 'Expecting property name enclosed in double quotes', 2), + ('{"spam",', "Expecting ':' delimiter", 7), + ('{"spam"}', "Expecting ':' delimiter", 7), + ('[{"spam"]', "Expecting ':' delimiter", 8), + ('{"spam":}', 'Expecting value', 8), + ('[{"spam":]', 'Expecting value', 9), + ('{"spam":42 "ham"', "Expecting ',' delimiter", 11), + ('[{"spam":42]', "Expecting ',' delimiter", 11), + ('{"spam":42,}', 'Expecting property name enclosed in double quotes', 11), + ] + for data, msg, idx in test_cases: + self.assertRaisesRegex(ValueError, + r'^{0}: line 1 column {1} \(char {1}\)'.format( + re.escape(msg), idx), + self.loads, data) + + def test_extra_data(self): + test_cases = [ + ('[]]', 'Extra data', 2), + ('{}}', 'Extra data', 2), + ('[],[]', 'Extra data', 2), + ('{},{}', 'Extra data', 2), + ] + test_cases += [ + ('42,"spam"', 'Extra data', 2), + ('"spam",42', 'Extra data', 6), + ] + for data, msg, idx in test_cases: + self.assertRaisesRegex(ValueError, + r'^{0}: line 1 column {1} - line 1 column {2}' + r' \(char {1} - {2}\)'.format( + re.escape(msg), idx, len(data)), + self.loads, data) + class TestPyFail(TestFail, PyTest): pass class TestCFail(TestFail, CTest): pass diff --git a/Misc/NEWS b/Misc/NEWS index b00dc53..f1d55dd 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -201,6 +201,8 @@ Core and Builtins Library ------- +- Issue #16009: JSON error messages now provide more information. + - Issue #16828: Fix error incorrectly raised by bz2.compress(b'') and bz2.BZ2Compressor.compress(b''). Initial patch by Martin Packman. diff --git a/Modules/_json.c b/Modules/_json.c index 9b6bbd3..68ebe99 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -237,6 +237,16 @@ raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) } } +static void +raise_stop_iteration(Py_ssize_t idx) +{ + PyObject *value = PyLong_FromSsize_t(idx); + if (value != NULL) { + PyErr_SetObject(PyExc_StopIteration, value); + Py_DECREF(value); + } +} + static PyObject * _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { /* return (rval, idx) tuple, stealing reference to rval */ @@ -306,7 +316,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next buf = PyUnicode_DATA(pystr); kind = PyUnicode_KIND(pystr); - if (end < 0 || len <= end) { + if (end < 0 || len < end) { PyErr_SetString(PyExc_ValueError, "end is out of bounds"); goto bail; } @@ -604,12 +614,12 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++; /* only loop if the object is non-empty */ - if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') { - while (idx <= end_idx) { + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') { + while (1) { PyObject *memokey; /* read key */ - if (PyUnicode_READ(kind, str, idx) != '"') { + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') { raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx); goto bail; } @@ -666,11 +676,9 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* bail if the object is closed or we didn't get the , delimiter */ - if (idx > end_idx) break; - if (PyUnicode_READ(kind, str, idx) == '}') { + if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}') break; - } - else if (PyUnicode_READ(kind, str, idx) != ',') { + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') { raise_errmsg("Expecting ',' delimiter", pystr, idx); goto bail; } @@ -681,12 +689,6 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss } } - /* verify that idx < end_idx, str[idx] should be '}' */ - if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') { - raise_errmsg("Expecting object", pystr, end_idx); - goto bail; - } - *next_idx_ptr = idx + 1; if (has_pairs_hook) { @@ -738,8 +740,8 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* only loop if the array is non-empty */ - if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') { - while (idx <= end_idx) { + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') { + while (1) { /* read any JSON term */ val = scan_once_unicode(s, pystr, idx, &next_idx); @@ -756,11 +758,9 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* bail if the array is closed or we didn't get the , delimiter */ - if (idx > end_idx) break; - if (PyUnicode_READ(kind, str, idx) == ']') { + if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']') break; - } - else if (PyUnicode_READ(kind, str, idx) != ',') { + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') { raise_errmsg("Expecting ',' delimiter", pystr, idx); goto bail; } @@ -773,7 +773,7 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */ if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') { - raise_errmsg("Expecting object", pystr, end_idx); + raise_errmsg("Expecting value", pystr, end_idx); goto bail; } *next_idx_ptr = idx + 1; @@ -841,7 +841,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ if (PyUnicode_READ(kind, str, idx) == '-') { idx++; if (idx > end_idx) { - PyErr_SetNone(PyExc_StopIteration); + raise_stop_iteration(start); return NULL; } } @@ -857,7 +857,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ } /* no integer digits, error */ else { - PyErr_SetNone(PyExc_StopIteration); + raise_stop_iteration(start); return NULL; } @@ -950,7 +950,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ length = PyUnicode_GET_LENGTH(pystr); if (idx >= length) { - PyErr_SetNone(PyExc_StopIteration); + raise_stop_iteration(idx); return NULL; } -- cgit v0.12