diff options
Diffstat (limited to 'Lib/test')
| -rw-r--r-- | Lib/test/test_codeccallbacks.py | 6 | ||||
| -rw-r--r-- | Lib/test/test_codecs.py | 134 | 
2 files changed, 138 insertions, 2 deletions
| diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index 159c86d..656551d 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -285,7 +285,8 @@ class CodecCallbackTest(unittest.TestCase):      def test_longstrings(self):          # test long strings to check for memory overflow problems -        errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace"] +        errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", +                   "backslashreplace"]          # register the handlers under different names,          # to prevent the codec from recognizing the name          for err in errors: @@ -293,7 +294,8 @@ class CodecCallbackTest(unittest.TestCase):          l = 1000          errors += [ "test." + err for err in errors ]          for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]: -            for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", "utf-8", "utf-7", "utf-16"): +            for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", +                        "utf-8", "utf-7", "utf-16", "utf-32"):                  for err in errors:                      try:                          uni.encode(enc, err) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 0389623..7c5eb57 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -244,6 +244,137 @@ class ReadTest(unittest.TestCase):          self.assertEqual(reader.readline(), s5)          self.assertEqual(reader.readline(), u"") +class UTF32Test(ReadTest): +    encoding = "utf-32" + +    spamle = ('\xff\xfe\x00\x00' +              's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00' +              's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00') +    spambe = ('\x00\x00\xfe\xff' +              '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m' +              '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m') + +    def test_only_one_bom(self): +        _,_,reader,writer = codecs.lookup(self.encoding) +        # encode some stream +        s = StringIO.StringIO() +        f = writer(s) +        f.write(u"spam") +        f.write(u"spam") +        d = s.getvalue() +        # check whether there is exactly one BOM in it +        self.assert_(d == self.spamle or d == self.spambe) +        # try to read it back +        s = StringIO.StringIO(d) +        f = reader(s) +        self.assertEquals(f.read(), u"spamspam") + +    def test_badbom(self): +        s = StringIO.StringIO(4*"\xff") +        f = codecs.getreader(self.encoding)(s) +        self.assertRaises(UnicodeError, f.read) + +        s = StringIO.StringIO(8*"\xff") +        f = codecs.getreader(self.encoding)(s) +        self.assertRaises(UnicodeError, f.read) + +    def test_partial(self): +        self.check_partial( +            u"\x00\xff\u0100\uffff", +            [ +                u"", # first byte of BOM read +                u"", # second byte of BOM read +                u"", # third byte of BOM read +                u"", # fourth byte of BOM read => byteorder known +                u"", +                u"", +                u"", +                u"\x00", +                u"\x00", +                u"\x00", +                u"\x00", +                u"\x00\xff", +                u"\x00\xff", +                u"\x00\xff", +                u"\x00\xff", +                u"\x00\xff\u0100", +                u"\x00\xff\u0100", +                u"\x00\xff\u0100", +                u"\x00\xff\u0100", +                u"\x00\xff\u0100\uffff", +            ] +        ) + +    def test_errors(self): +        self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode, +                          "\xff", "strict", True) + +class UTF32LETest(ReadTest): +    encoding = "utf-32-le" + +    def test_partial(self): +        self.check_partial( +            u"\x00\xff\u0100\uffff", +            [ +                u"", +                u"", +                u"", +                u"\x00", +                u"\x00", +                u"\x00", +                u"\x00", +                u"\x00\xff", +                u"\x00\xff", +                u"\x00\xff", +                u"\x00\xff", +                u"\x00\xff\u0100", +                u"\x00\xff\u0100", +                u"\x00\xff\u0100", +                u"\x00\xff\u0100", +                u"\x00\xff\u0100\uffff", +            ] +        ) + +    def test_simple(self): +        self.assertEqual(u"\U00010203".encode(self.encoding), "\x03\x02\x01\x00") + +    def test_errors(self): +        self.assertRaises(UnicodeDecodeError, codecs.utf_32_le_decode, +                          "\xff", "strict", True) + +class UTF32BETest(ReadTest): +    encoding = "utf-32-be" + +    def test_partial(self): +        self.check_partial( +            u"\x00\xff\u0100\uffff", +            [ +                u"", +                u"", +                u"", +                u"\x00", +                u"\x00", +                u"\x00", +                u"\x00", +                u"\x00\xff", +                u"\x00\xff", +                u"\x00\xff", +                u"\x00\xff", +                u"\x00\xff\u0100", +                u"\x00\xff\u0100", +                u"\x00\xff\u0100", +                u"\x00\xff\u0100", +                u"\x00\xff\u0100\uffff", +            ] +        ) + +    def test_simple(self): +        self.assertEqual(u"\U00010203".encode(self.encoding), "\x00\x01\x02\x03") + +    def test_errors(self): +        self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode, +                          "\xff", "strict", True) +  class UTF16Test(ReadTest):      encoding = "utf-16" @@ -1278,6 +1409,9 @@ class WithStmtTest(unittest.TestCase):  def test_main():      test_support.run_unittest( +        UTF32Test, +        UTF32LETest, +        UTF32BETest,          UTF16Test,          UTF16LETest,          UTF16BETest, | 
