summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2010-12-08 22:25:45 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2010-12-08 22:25:45 (GMT)
commit53a9dd776e62f6bc3b1884f3aa82e49a78bd83a8 (patch)
treeb62f83eb883c6ffceb9c0fff6083c4f914c832a2
parent84cc06288d88cb12b382bf3e4695f8538ab498ff (diff)
downloadcpython-53a9dd776e62f6bc3b1884f3aa82e49a78bd83a8.zip
cpython-53a9dd776e62f6bc3b1884f3aa82e49a78bd83a8.tar.gz
cpython-53a9dd776e62f6bc3b1884f3aa82e49a78bd83a8.tar.bz2
Issue #10546: UTF-16-LE and UTF-16-BE *do* support non-BMP characters
Fix the doc and add tests.
-rw-r--r--Doc/library/codecs.rst4
-rw-r--r--Lib/test/test_codecs.py12
2 files changed, 14 insertions, 2 deletions
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
index 5416d3b..26e31a4 100644
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -1114,9 +1114,9 @@ particular, the following variants typically exist:
+-----------------+--------------------------------+--------------------------------+
| utf_16 | U16, utf16 | all languages |
+-----------------+--------------------------------+--------------------------------+
-| utf_16_be | UTF-16BE | all languages (BMP only) |
+| utf_16_be | UTF-16BE | all languages |
+-----------------+--------------------------------+--------------------------------+
-| utf_16_le | UTF-16LE | all languages (BMP only) |
+| utf_16_le | UTF-16LE | all languages |
+-----------------+--------------------------------+--------------------------------+
| utf_7 | U7, unicode-1-1-utf-7 | all languages |
+-----------------+--------------------------------+--------------------------------+
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index bc29e06..8287a5b 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -544,6 +544,12 @@ class UTF16LETest(ReadTest):
self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode,
b"\xff", "strict", True)
+ def test_nonbmp(self):
+ self.assertEqual("\U00010203".encode(self.encoding),
+ b'\x00\xd8\x03\xde')
+ self.assertEqual(b'\x00\xd8\x03\xde'.decode(self.encoding),
+ "\U00010203")
+
class UTF16BETest(ReadTest):
encoding = "utf-16-be"
@@ -566,6 +572,12 @@ class UTF16BETest(ReadTest):
self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode,
b"\xff", "strict", True)
+ def test_nonbmp(self):
+ self.assertEqual("\U00010203".encode(self.encoding),
+ b'\xd8\x00\xde\x03')
+ self.assertEqual(b'\xd8\x00\xde\x03'.decode(self.encoding),
+ "\U00010203")
+
class UTF8Test(ReadTest):
encoding = "utf-8"