diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2016-10-30 21:00:01 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2016-10-30 21:00:01 (GMT) |
commit | c4a35daa9771fbdcc58cad4656b8680541623e0e (patch) | |
tree | 6c8b9f667c932d9a56aadb8dc685546555b08227 /Lib | |
parent | a0d9c685d0c3f817c5a7143098ce1a89380ee514 (diff) | |
download | cpython-c4a35daa9771fbdcc58cad4656b8680541623e0e.zip cpython-c4a35daa9771fbdcc58cad4656b8680541623e0e.tar.gz cpython-c4a35daa9771fbdcc58cad4656b8680541623e0e.tar.bz2 |
Issue #28541: Improve test coverage for encoding detection in json library.
Original patch by Eric Appelt.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/json/__init__.py | 3 | ||||
-rw-r--r-- | Lib/test/test_json/test_unicode.py | 13 |
2 files changed, 15 insertions, 1 deletions
diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py index 8dcc678..94397aa 100644 --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -257,7 +257,8 @@ def detect_encoding(b): return 'utf-16-be' if b[1] else 'utf-32-be' if not b[1]: # XX 00 00 00 - utf-32-le - # XX 00 XX XX - utf-16-le + # XX 00 00 XX - utf-16-le + # XX 00 XX -- - utf-16-le return 'utf-16-le' if b[2] or b[3] else 'utf-32-le' elif len(b) == 2: if not b[0]: diff --git a/Lib/test/test_json/test_unicode.py b/Lib/test/test_json/test_unicode.py index eda177a..2e8bba2 100644 --- a/Lib/test/test_json/test_unicode.py +++ b/Lib/test/test_json/test_unicode.py @@ -65,6 +65,19 @@ class TestUnicode: self.assertEqual(self.loads(bom + encoded), data) self.assertEqual(self.loads(encoded), data) self.assertRaises(UnicodeDecodeError, self.loads, b'["\x80"]') + # RFC-7159 and ECMA-404 extend JSON to allow documents that + # consist of only a string, which can present a special case + # not covered by the encoding detection patterns specified in + # RFC-4627 for utf-16-le (XX 00 XX 00). + self.assertEqual(self.loads('"\u2600"'.encode('utf-16-le')), + '\u2600') + # Encoding detection for small (<4) bytes objects + # is implemented as a special case. RFC-7159 and ECMA-404 + # allow single codepoint JSON documents which are only two + # bytes in utf-16 encodings w/o BOM. + self.assertEqual(self.loads(b'5\x00'), 5) + self.assertEqual(self.loads(b'\x007'), 7) + self.assertEqual(self.loads(b'57'), 57) def test_object_pairs_hook_with_unicode(self): s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}' |