diff options
author | Walter Dörwald <walter@livinglogic.de> | 2007-08-17 16:41:28 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2007-08-17 16:41:28 (GMT) |
commit | 6e390806495cf30c836615996b94e5ffa258cbef (patch) | |
tree | eef913ca3061a114ff6d301a042408d4d3243ecc /Lib/test/test_codecs.py | |
parent | 437e6a3b1588ece44abbb4d65f74f9a841638e1d (diff) | |
download | cpython-6e390806495cf30c836615996b94e5ffa258cbef.zip cpython-6e390806495cf30c836615996b94e5ffa258cbef.tar.gz cpython-6e390806495cf30c836615996b94e5ffa258cbef.tar.bz2 |
Backport r57105 and r57145 from the py3k branch: UTF-32 codecs.
Diffstat (limited to 'Lib/test/test_codecs.py')
-rw-r--r-- | Lib/test/test_codecs.py | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 0389623..7c5eb57 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -244,6 +244,137 @@ class ReadTest(unittest.TestCase): self.assertEqual(reader.readline(), s5) self.assertEqual(reader.readline(), u"") +class UTF32Test(ReadTest): + encoding = "utf-32" + + spamle = ('\xff\xfe\x00\x00' + 's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00' + 's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00') + spambe = ('\x00\x00\xfe\xff' + '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m' + '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m') + + def test_only_one_bom(self): + _,_,reader,writer = codecs.lookup(self.encoding) + # encode some stream + s = StringIO.StringIO() + f = writer(s) + f.write(u"spam") + f.write(u"spam") + d = s.getvalue() + # check whether there is exactly one BOM in it + self.assert_(d == self.spamle or d == self.spambe) + # try to read it back + s = StringIO.StringIO(d) + f = reader(s) + self.assertEquals(f.read(), u"spamspam") + + def test_badbom(self): + s = StringIO.StringIO(4*"\xff") + f = codecs.getreader(self.encoding)(s) + self.assertRaises(UnicodeError, f.read) + + s = StringIO.StringIO(8*"\xff") + f = codecs.getreader(self.encoding)(s) + self.assertRaises(UnicodeError, f.read) + + def test_partial(self): + self.check_partial( + u"\x00\xff\u0100\uffff", + [ + u"", # first byte of BOM read + u"", # second byte of BOM read + u"", # third byte of BOM read + u"", # fourth byte of BOM read => byteorder known + u"", + u"", + u"", + u"\x00", + u"\x00", + u"\x00", + u"\x00", + u"\x00\xff", + u"\x00\xff", + u"\x00\xff", + u"\x00\xff", + u"\x00\xff\u0100", + u"\x00\xff\u0100", + u"\x00\xff\u0100", + u"\x00\xff\u0100", + u"\x00\xff\u0100\uffff", + ] + ) + + def test_errors(self): + self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode, + "\xff", "strict", True) + +class UTF32LETest(ReadTest): + encoding = "utf-32-le" + + def test_partial(self): + self.check_partial( + u"\x00\xff\u0100\uffff", + [ + u"", + u"", + u"", + u"\x00", + u"\x00", + u"\x00", + u"\x00", + u"\x00\xff", + u"\x00\xff", + u"\x00\xff", + u"\x00\xff", + u"\x00\xff\u0100", + u"\x00\xff\u0100", + u"\x00\xff\u0100", + u"\x00\xff\u0100", + u"\x00\xff\u0100\uffff", + ] + ) + + def test_simple(self): + self.assertEqual(u"\U00010203".encode(self.encoding), "\x03\x02\x01\x00") + + def test_errors(self): + self.assertRaises(UnicodeDecodeError, codecs.utf_32_le_decode, + "\xff", "strict", True) + +class UTF32BETest(ReadTest): + encoding = "utf-32-be" + + def test_partial(self): + self.check_partial( + u"\x00\xff\u0100\uffff", + [ + u"", + u"", + u"", + u"\x00", + u"\x00", + u"\x00", + u"\x00", + u"\x00\xff", + u"\x00\xff", + u"\x00\xff", + u"\x00\xff", + u"\x00\xff\u0100", + u"\x00\xff\u0100", + u"\x00\xff\u0100", + u"\x00\xff\u0100", + u"\x00\xff\u0100\uffff", + ] + ) + + def test_simple(self): + self.assertEqual(u"\U00010203".encode(self.encoding), "\x00\x01\x02\x03") + + def test_errors(self): + self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode, + "\xff", "strict", True) + class UTF16Test(ReadTest): encoding = "utf-16" @@ -1278,6 +1409,9 @@ class WithStmtTest(unittest.TestCase): def test_main(): test_support.run_unittest( + UTF32Test, + UTF32LETest, + UTF32BETest, UTF16Test, UTF16LETest, UTF16BETest, |