summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/json/decoder.py14
-rw-r--r--Lib/json/scanner.py4
-rw-r--r--Lib/test/json_tests/test_fail.py77
-rw-r--r--Misc/NEWS2
-rw-r--r--Modules/_json.c48
5 files changed, 112 insertions, 33 deletions
diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py
index 9b7438c..7e5a099 100644
--- a/Lib/json/decoder.py
+++ b/Lib/json/decoder.py
@@ -188,8 +188,8 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
try:
value, end = scan_once(s, end)
- except StopIteration:
- raise ValueError(errmsg("Expecting object", s, end))
+ except StopIteration as err:
+ raise ValueError(errmsg("Expecting value", s, err.value)) from None
pairs_append((key, value))
try:
nextchar = s[end]
@@ -232,8 +232,8 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
while True:
try:
value, end = scan_once(s, end)
- except StopIteration:
- raise ValueError(errmsg("Expecting object", s, end))
+ except StopIteration as err:
+ raise ValueError(errmsg("Expecting value", s, err.value)) from None
_append(value)
nextchar = s[end:end + 1]
if nextchar in _ws:
@@ -243,7 +243,7 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
if nextchar == ']':
break
elif nextchar != ',':
- raise ValueError(errmsg("Expecting ',' delimiter", s, end))
+ raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
try:
if s[end] in _ws:
end += 1
@@ -358,6 +358,6 @@ class JSONDecoder(object):
"""
try:
obj, end = self.scan_once(s, idx)
- except StopIteration:
- raise ValueError("No JSON object could be decoded")
+ except StopIteration as err:
+ raise ValueError(errmsg("Expecting value", s, err.value)) from None
return obj, end
diff --git a/Lib/json/scanner.py b/Lib/json/scanner.py
index 23eef61..86426cd 100644
--- a/Lib/json/scanner.py
+++ b/Lib/json/scanner.py
@@ -29,7 +29,7 @@ def py_make_scanner(context):
try:
nextchar = string[idx]
except IndexError:
- raise StopIteration
+ raise StopIteration(idx)
if nextchar == '"':
return parse_string(string, idx + 1, strict)
@@ -60,7 +60,7 @@ def py_make_scanner(context):
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
return parse_constant('-Infinity'), idx + 9
else:
- raise StopIteration
+ raise StopIteration(idx)
def scan_once(string, idx):
try:
diff --git a/Lib/test/json_tests/test_fail.py b/Lib/test/json_tests/test_fail.py
index 7809056..a2dc29a 100644
--- a/Lib/test/json_tests/test_fail.py
+++ b/Lib/test/json_tests/test_fail.py
@@ -1,4 +1,5 @@
from test.json_tests import PyTest, CTest
+import re
# 2007-10-05
JSONDOCS = [
@@ -100,6 +101,82 @@ class TestFail:
#This is for python encoder
self.assertRaises(TypeError, self.dumps, data, indent=True)
+ def test_truncated_input(self):
+ test_cases = [
+ ('', 'Expecting value', 0),
+ ('[', 'Expecting value', 1),
+ ('[42', "Expecting ',' delimiter", 3),
+ ('[42,', 'Expecting value', 4),
+ ('["', 'Unterminated string starting at', 1),
+ ('["spam', 'Unterminated string starting at', 1),
+ ('["spam"', "Expecting ',' delimiter", 7),
+ ('["spam",', 'Expecting value', 8),
+ ('{', 'Expecting property name enclosed in double quotes', 1),
+ ('{"', 'Unterminated string starting at', 1),
+ ('{"spam', 'Unterminated string starting at', 1),
+ ('{"spam"', "Expecting ':' delimiter", 7),
+ ('{"spam":', 'Expecting value', 8),
+ ('{"spam":42', "Expecting ',' delimiter", 10),
+ ('{"spam":42,', 'Expecting property name enclosed in double quotes', 11),
+ ]
+ test_cases += [
+ ('"', 'Unterminated string starting at', 0),
+ ('"spam', 'Unterminated string starting at', 0),
+ ]
+ for data, msg, idx in test_cases:
+ self.assertRaisesRegex(ValueError,
+ r'^{0}: line 1 column {1} \(char {1}\)'.format(
+ re.escape(msg), idx),
+ self.loads, data)
+
+ def test_unexpected_data(self):
+ test_cases = [
+ ('[,', 'Expecting value', 1),
+ ('{"spam":[}', 'Expecting value', 9),
+ ('[42:', "Expecting ',' delimiter", 3),
+ ('[42 "spam"', "Expecting ',' delimiter", 4),
+ ('[42,]', 'Expecting value', 4),
+ ('{"spam":[42}', "Expecting ',' delimiter", 11),
+ ('["]', 'Unterminated string starting at', 1),
+ ('["spam":', "Expecting ',' delimiter", 7),
+ ('["spam",]', 'Expecting value', 8),
+ ('{:', 'Expecting property name enclosed in double quotes', 1),
+ ('{,', 'Expecting property name enclosed in double quotes', 1),
+ ('{42', 'Expecting property name enclosed in double quotes', 1),
+ ('[{]', 'Expecting property name enclosed in double quotes', 2),
+ ('{"spam",', "Expecting ':' delimiter", 7),
+ ('{"spam"}', "Expecting ':' delimiter", 7),
+ ('[{"spam"]', "Expecting ':' delimiter", 8),
+ ('{"spam":}', 'Expecting value', 8),
+ ('[{"spam":]', 'Expecting value', 9),
+ ('{"spam":42 "ham"', "Expecting ',' delimiter", 11),
+ ('[{"spam":42]', "Expecting ',' delimiter", 11),
+ ('{"spam":42,}', 'Expecting property name enclosed in double quotes', 11),
+ ]
+ for data, msg, idx in test_cases:
+ self.assertRaisesRegex(ValueError,
+ r'^{0}: line 1 column {1} \(char {1}\)'.format(
+ re.escape(msg), idx),
+ self.loads, data)
+
+ def test_extra_data(self):
+ test_cases = [
+ ('[]]', 'Extra data', 2),
+ ('{}}', 'Extra data', 2),
+ ('[],[]', 'Extra data', 2),
+ ('{},{}', 'Extra data', 2),
+ ]
+ test_cases += [
+ ('42,"spam"', 'Extra data', 2),
+ ('"spam",42', 'Extra data', 6),
+ ]
+ for data, msg, idx in test_cases:
+ self.assertRaisesRegex(ValueError,
+ r'^{0}: line 1 column {1} - line 1 column {2}'
+ r' \(char {1} - {2}\)'.format(
+ re.escape(msg), idx, len(data)),
+ self.loads, data)
+
class TestPyFail(TestFail, PyTest): pass
class TestCFail(TestFail, CTest): pass
diff --git a/Misc/NEWS b/Misc/NEWS
index b00dc53..f1d55dd 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -201,6 +201,8 @@ Core and Builtins
Library
-------
+- Issue #16009: JSON error messages now provide more information.
+
- Issue #16828: Fix error incorrectly raised by bz2.compress(b'') and
bz2.BZ2Compressor.compress(b''). Initial patch by Martin Packman.
diff --git a/Modules/_json.c b/Modules/_json.c
index 9b6bbd3..68ebe99 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -237,6 +237,16 @@ raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
}
}
+static void
+raise_stop_iteration(Py_ssize_t idx)
+{
+ PyObject *value = PyLong_FromSsize_t(idx);
+ if (value != NULL) {
+ PyErr_SetObject(PyExc_StopIteration, value);
+ Py_DECREF(value);
+ }
+}
+
static PyObject *
_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
/* return (rval, idx) tuple, stealing reference to rval */
@@ -306,7 +316,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
buf = PyUnicode_DATA(pystr);
kind = PyUnicode_KIND(pystr);
- if (end < 0 || len <= end) {
+ if (end < 0 || len < end) {
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
goto bail;
}
@@ -604,12 +614,12 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
/* only loop if the object is non-empty */
- if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') {
- while (idx <= end_idx) {
+ if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
+ while (1) {
PyObject *memokey;
/* read key */
- if (PyUnicode_READ(kind, str, idx) != '"') {
+ if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
goto bail;
}
@@ -666,11 +676,9 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* bail if the object is closed or we didn't get the , delimiter */
- if (idx > end_idx) break;
- if (PyUnicode_READ(kind, str, idx) == '}') {
+ if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
break;
- }
- else if (PyUnicode_READ(kind, str, idx) != ',') {
+ if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
raise_errmsg("Expecting ',' delimiter", pystr, idx);
goto bail;
}
@@ -681,12 +689,6 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
}
}
- /* verify that idx < end_idx, str[idx] should be '}' */
- if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
- raise_errmsg("Expecting object", pystr, end_idx);
- goto bail;
- }
-
*next_idx_ptr = idx + 1;
if (has_pairs_hook) {
@@ -738,8 +740,8 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* only loop if the array is non-empty */
- if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') {
- while (idx <= end_idx) {
+ if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
+ while (1) {
/* read any JSON term */
val = scan_once_unicode(s, pystr, idx, &next_idx);
@@ -756,11 +758,9 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* bail if the array is closed or we didn't get the , delimiter */
- if (idx > end_idx) break;
- if (PyUnicode_READ(kind, str, idx) == ']') {
+ if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
break;
- }
- else if (PyUnicode_READ(kind, str, idx) != ',') {
+ if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
raise_errmsg("Expecting ',' delimiter", pystr, idx);
goto bail;
}
@@ -773,7 +773,7 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
/* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
- raise_errmsg("Expecting object", pystr, end_idx);
+ raise_errmsg("Expecting value", pystr, end_idx);
goto bail;
}
*next_idx_ptr = idx + 1;
@@ -841,7 +841,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
if (PyUnicode_READ(kind, str, idx) == '-') {
idx++;
if (idx > end_idx) {
- PyErr_SetNone(PyExc_StopIteration);
+ raise_stop_iteration(start);
return NULL;
}
}
@@ -857,7 +857,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
}
/* no integer digits, error */
else {
- PyErr_SetNone(PyExc_StopIteration);
+ raise_stop_iteration(start);
return NULL;
}
@@ -950,7 +950,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
length = PyUnicode_GET_LENGTH(pystr);
if (idx >= length) {
- PyErr_SetNone(PyExc_StopIteration);
+ raise_stop_iteration(idx);
return NULL;
}