summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_codecs.py
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2007-08-17 16:41:28 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2007-08-17 16:41:28 (GMT)
commit6e390806495cf30c836615996b94e5ffa258cbef (patch)
treeeef913ca3061a114ff6d301a042408d4d3243ecc /Lib/test/test_codecs.py
parent437e6a3b1588ece44abbb4d65f74f9a841638e1d (diff)
downloadcpython-6e390806495cf30c836615996b94e5ffa258cbef.zip
cpython-6e390806495cf30c836615996b94e5ffa258cbef.tar.gz
cpython-6e390806495cf30c836615996b94e5ffa258cbef.tar.bz2
Backport r57105 and r57145 from the py3k branch: UTF-32 codecs.
Diffstat (limited to 'Lib/test/test_codecs.py')
-rw-r--r--Lib/test/test_codecs.py134
1 files changed, 134 insertions, 0 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 0389623..7c5eb57 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -244,6 +244,137 @@ class ReadTest(unittest.TestCase):
self.assertEqual(reader.readline(), s5)
self.assertEqual(reader.readline(), u"")
+class UTF32Test(ReadTest):
+ encoding = "utf-32"
+
+ spamle = ('\xff\xfe\x00\x00'
+ 's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00'
+ 's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00')
+ spambe = ('\x00\x00\xfe\xff'
+ '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m'
+ '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m')
+
+ def test_only_one_bom(self):
+ _,_,reader,writer = codecs.lookup(self.encoding)
+ # encode some stream
+ s = StringIO.StringIO()
+ f = writer(s)
+ f.write(u"spam")
+ f.write(u"spam")
+ d = s.getvalue()
+ # check whether there is exactly one BOM in it
+ self.assert_(d == self.spamle or d == self.spambe)
+ # try to read it back
+ s = StringIO.StringIO(d)
+ f = reader(s)
+ self.assertEquals(f.read(), u"spamspam")
+
+ def test_badbom(self):
+ s = StringIO.StringIO(4*"\xff")
+ f = codecs.getreader(self.encoding)(s)
+ self.assertRaises(UnicodeError, f.read)
+
+ s = StringIO.StringIO(8*"\xff")
+ f = codecs.getreader(self.encoding)(s)
+ self.assertRaises(UnicodeError, f.read)
+
+ def test_partial(self):
+ self.check_partial(
+ u"\x00\xff\u0100\uffff",
+ [
+ u"", # first byte of BOM read
+ u"", # second byte of BOM read
+ u"", # third byte of BOM read
+ u"", # fourth byte of BOM read => byteorder known
+ u"",
+ u"",
+ u"",
+ u"\x00",
+ u"\x00",
+ u"\x00",
+ u"\x00",
+ u"\x00\xff",
+ u"\x00\xff",
+ u"\x00\xff",
+ u"\x00\xff",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100\uffff",
+ ]
+ )
+
+ def test_errors(self):
+ self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode,
+ "\xff", "strict", True)
+
+class UTF32LETest(ReadTest):
+ encoding = "utf-32-le"
+
+ def test_partial(self):
+ self.check_partial(
+ u"\x00\xff\u0100\uffff",
+ [
+ u"",
+ u"",
+ u"",
+ u"\x00",
+ u"\x00",
+ u"\x00",
+ u"\x00",
+ u"\x00\xff",
+ u"\x00\xff",
+ u"\x00\xff",
+ u"\x00\xff",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100\uffff",
+ ]
+ )
+
+ def test_simple(self):
+ self.assertEqual(u"\U00010203".encode(self.encoding), "\x03\x02\x01\x00")
+
+ def test_errors(self):
+ self.assertRaises(UnicodeDecodeError, codecs.utf_32_le_decode,
+ "\xff", "strict", True)
+
+class UTF32BETest(ReadTest):
+ encoding = "utf-32-be"
+
+ def test_partial(self):
+ self.check_partial(
+ u"\x00\xff\u0100\uffff",
+ [
+ u"",
+ u"",
+ u"",
+ u"\x00",
+ u"\x00",
+ u"\x00",
+ u"\x00",
+ u"\x00\xff",
+ u"\x00\xff",
+ u"\x00\xff",
+ u"\x00\xff",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100\uffff",
+ ]
+ )
+
+ def test_simple(self):
+ self.assertEqual(u"\U00010203".encode(self.encoding), "\x00\x01\x02\x03")
+
+ def test_errors(self):
+ self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode,
+ "\xff", "strict", True)
+
class UTF16Test(ReadTest):
encoding = "utf-16"
@@ -1278,6 +1409,9 @@ class WithStmtTest(unittest.TestCase):
def test_main():
test_support.run_unittest(
+ UTF32Test,
+ UTF32LETest,
+ UTF32BETest,
UTF16Test,
UTF16LETest,
UTF16BETest,