summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2016-10-30 21:00:01 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2016-10-30 21:00:01 (GMT)
commitc4a35daa9771fbdcc58cad4656b8680541623e0e (patch)
tree6c8b9f667c932d9a56aadb8dc685546555b08227 /Lib
parenta0d9c685d0c3f817c5a7143098ce1a89380ee514 (diff)
downloadcpython-c4a35daa9771fbdcc58cad4656b8680541623e0e.zip
cpython-c4a35daa9771fbdcc58cad4656b8680541623e0e.tar.gz
cpython-c4a35daa9771fbdcc58cad4656b8680541623e0e.tar.bz2
Issue #28541: Improve test coverage for encoding detection in json library.
Original patch by Eric Appelt.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/json/__init__.py3
-rw-r--r--Lib/test/test_json/test_unicode.py13
2 files changed, 15 insertions, 1 deletions
diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py
index 8dcc678..94397aa 100644
--- a/Lib/json/__init__.py
+++ b/Lib/json/__init__.py
@@ -257,7 +257,8 @@ def detect_encoding(b):
return 'utf-16-be' if b[1] else 'utf-32-be'
if not b[1]:
# XX 00 00 00 - utf-32-le
- # XX 00 XX XX - utf-16-le
+ # XX 00 00 XX - utf-16-le
+ # XX 00 XX -- - utf-16-le
return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
elif len(b) == 2:
if not b[0]:
diff --git a/Lib/test/test_json/test_unicode.py b/Lib/test/test_json/test_unicode.py
index eda177a..2e8bba2 100644
--- a/Lib/test/test_json/test_unicode.py
+++ b/Lib/test/test_json/test_unicode.py
@@ -65,6 +65,19 @@ class TestUnicode:
self.assertEqual(self.loads(bom + encoded), data)
self.assertEqual(self.loads(encoded), data)
self.assertRaises(UnicodeDecodeError, self.loads, b'["\x80"]')
+ # RFC-7159 and ECMA-404 extend JSON to allow documents that
+ # consist of only a string, which can present a special case
+ # not covered by the encoding detection patterns specified in
+ # RFC-4627 for utf-16-le (XX 00 XX 00).
+ self.assertEqual(self.loads('"\u2600"'.encode('utf-16-le')),
+ '\u2600')
+ # Encoding detection for small (<4) bytes objects
+ # is implemented as a special case. RFC-7159 and ECMA-404
+ # allow single codepoint JSON documents which are only two
+ # bytes in utf-16 encodings w/o BOM.
+ self.assertEqual(self.loads(b'5\x00'), 5)
+ self.assertEqual(self.loads(b'\x007'), 7)
+ self.assertEqual(self.loads(b'57'), 57)
def test_object_pairs_hook_with_unicode(self):
s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'