diff options
author | Hye-Shik Chang <hyeshik@gmail.com> | 2004-01-17 14:29:29 (GMT) |
---|---|---|
committer | Hye-Shik Chang <hyeshik@gmail.com> | 2004-01-17 14:29:29 (GMT) |
commit | 3e2a30692085d32ac63f72b35da39158a471fc68 (patch) | |
tree | 4cbe735f61eae87ac56a13ca6bd32113b98bd03d /Lib/test/test_multibytecodec.py | |
parent | cd1f7430cb8f48de970021071d7683054c23b10f (diff) | |
download | cpython-3e2a30692085d32ac63f72b35da39158a471fc68.zip cpython-3e2a30692085d32ac63f72b35da39158a471fc68.tar.gz cpython-3e2a30692085d32ac63f72b35da39158a471fc68.tar.bz2 |
Add CJK codecs support as discussed on python-dev. (SF #873597)
Several style fixes are suggested by Martin v. Loewis and
Marc-Andre Lemburg. Thanks!
Diffstat (limited to 'Lib/test/test_multibytecodec.py')
-rw-r--r-- | Lib/test/test_multibytecodec.py | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py new file mode 100644 index 0000000..f686b70 --- /dev/null +++ b/Lib/test/test_multibytecodec.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +# +# test_multibytecodec.py +# Unit test for multibytecodec itself +# +# $CJKCodecs: test_multibytecodec.py,v 1.5 2004/01/06 02:26:28 perky Exp $ + +from test import test_support +from test import test_multibytecodec_support +import unittest, StringIO, codecs + +class Test_StreamWriter(unittest.TestCase): + if len(u'\U00012345') == 2: # UCS2 + def test_gb18030(self): + s= StringIO.StringIO() + c = codecs.lookup('gb18030')[3](s) + c.write(u'123') + self.assertEqual(s.getvalue(), '123') + c.write(u'\U00012345') + self.assertEqual(s.getvalue(), '123\x907\x959') + c.write(u'\U00012345'[0]) + self.assertEqual(s.getvalue(), '123\x907\x959') + c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac') + self.assertEqual(s.getvalue(), + '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851') + c.write(u'\U00012345'[0]) + self.assertEqual(s.getvalue(), + '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851') + self.assertRaises(UnicodeError, c.reset) + self.assertEqual(s.getvalue(), + '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851') + + # standard utf-8 codecs has broken StreamReader + if test_multibytecodec_support.__cjkcodecs__: + def test_utf_8(self): + s= StringIO.StringIO() + c = codecs.lookup('utf-8')[3](s) + c.write(u'123') + self.assertEqual(s.getvalue(), '123') + c.write(u'\U00012345') + self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85') + c.write(u'\U00012345'[0]) + self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85') + c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac') + self.assertEqual(s.getvalue(), + '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85' + '\xea\xb0\x80\xc2\xac') + c.write(u'\U00012345'[0]) + self.assertEqual(s.getvalue(), + '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85' + '\xea\xb0\x80\xc2\xac') + c.reset() + self.assertEqual(s.getvalue(), + '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85' + '\xea\xb0\x80\xc2\xac\xed\xa0\x88') + c.write(u'\U00012345'[1]) + self.assertEqual(s.getvalue(), + '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85' + '\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85') + + else: # UCS4 + pass + + def test_nullcoding(self): + self.assertEqual(''.decode('utf-8'), u'') + self.assertEqual(unicode('', 'utf-8'), u'') + self.assertEqual(u''.encode('utf-8'), '') + + def test_str_decode(self): + self.assertEqual('abcd'.encode('utf-8'), 'abcd') + +def test_main(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(Test_StreamWriter)) + test_support.run_suite(suite) + +if __name__ == "__main__": + test_main() + |