summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEzio Melotti <ezio.melotti@gmail.com>2013-10-20 23:10:55 (GMT)
committerEzio Melotti <ezio.melotti@gmail.com>2013-10-20 23:10:55 (GMT)
commit566a2be95c3aba65302d3d5b8108b27f810e1eaf (patch)
treee093ba0634bea66fa1561c13c88eccccc0aae299
parenta0e768ccc250dbe6ffab812b7964538013ae36c4 (diff)
downloadcpython-566a2be95c3aba65302d3d5b8108b27f810e1eaf.zip
cpython-566a2be95c3aba65302d3d5b8108b27f810e1eaf.tar.gz
cpython-566a2be95c3aba65302d3d5b8108b27f810e1eaf.tar.bz2
#18958: Improve error message for json.load(s) while passing a string that starts with a UTF-8 BOM.
-rw-r--r--Lib/json/__init__.py2
-rw-r--r--Lib/test/test_json/test_decode.py14
-rw-r--r--Misc/NEWS3
3 files changed, 19 insertions, 0 deletions
diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py
index 6cedb6e..a459f77 100644
--- a/Lib/json/__init__.py
+++ b/Lib/json/__init__.py
@@ -313,6 +313,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
if not isinstance(s, str):
raise TypeError('the JSON object must be str, not {!r}'.format(
s.__class__.__name__))
+ if s.startswith(u'\ufeff'):
+ raise ValueError("Unexpected UTF-8 BOM (decode using utf-8-sig)")
if (cls is None and object_hook is None and
parse_int is None and parse_float is None and
parse_constant is None and object_pairs_hook is None and not kw):
diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py
index 05d07b7..35c02de 100644
--- a/Lib/test/test_json/test_decode.py
+++ b/Lib/test/test_json/test_decode.py
@@ -77,5 +77,19 @@ class TestDecode:
with self.assertRaisesRegex(TypeError, msg):
self.json.load(BytesIO(b'[1,2,3]'))
+ def test_string_with_utf8_bom(self):
+ # see #18958
+ bom_json = "[1,2,3]".encode('utf-8-sig').decode('utf-8')
+ with self.assertRaises(ValueError) as cm:
+ self.loads(bom_json)
+ self.assertIn('BOM', str(cm.exception))
+ with self.assertRaises(ValueError) as cm:
+ self.json.load(StringIO(bom_json))
+ self.assertIn('BOM', str(cm.exception))
+ # make sure that the BOM is not detected in the middle of a string
+ bom_in_str = '"{}"'.format(''.encode('utf-8-sig').decode('utf-8'))
+ self.assertEqual(self.loads(bom_in_str), '\ufeff')
+ self.assertEqual(self.json.load(StringIO(bom_in_str)), '\ufeff')
+
class TestPyDecode(TestDecode, PyTest): pass
class TestCDecode(TestDecode, CTest): pass
diff --git a/Misc/NEWS b/Misc/NEWS
index bff7097..af66063 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -62,6 +62,9 @@ Core and Builtins
Library
-------
+- Issue #18958: Improve error message for json.load(s) while passing a string
+ that starts with a UTF-8 BOM.
+
- Issue #19307: Improve error message for json.load(s) while passing objects
of the wrong type.