diff options
Diffstat (limited to 'Lib/test/test_multibytecodec.py')
| -rw-r--r-- | Lib/test/test_multibytecodec.py | 82 |
1 files changed, 30 insertions, 52 deletions
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py index 6889184..2929f98 100644 --- a/Lib/test/test_multibytecodec.py +++ b/Lib/test/test_multibytecodec.py @@ -44,6 +44,13 @@ class Test_MultibyteCodec(unittest.TestCase): self.assertRaises(IndexError, dec, b'apple\x92ham\x93spam', 'test.cjktest') + def test_errorcallback_custom_ignore(self): + # Issue #23215: MemoryError with custom error handlers and multibyte codecs + data = 100 * "\udc00" + codecs.register_error("test.ignore", codecs.ignore_errors) + for enc in ALL_CJKENCODINGS: + self.assertEqual(data.encode(enc, "test.ignore"), b'') + def test_codingspec(self): try: for enc in ALL_CJKENCODINGS: @@ -80,7 +87,7 @@ class Test_IncrementalEncoder(unittest.TestCase): self.assertEqual(encoder.reset(), None) def test_stateful(self): - # jisx0213 encoder is stateful for a few codepoints. eg) + # jisx0213 encoder is stateful for a few code points. eg) # U+00E6 => A9DC # U+00E6 U+0300 => ABC4 # U+0300 => ABDC @@ -175,57 +182,28 @@ class Test_StreamReader(unittest.TestCase): support.unlink(TESTFN) class Test_StreamWriter(unittest.TestCase): - if len('\U00012345') == 2: # UCS2 - def test_gb18030(self): - s= io.BytesIO() - c = codecs.getwriter('gb18030')(s) - c.write('123') - self.assertEqual(s.getvalue(), b'123') - c.write('\U00012345') - self.assertEqual(s.getvalue(), b'123\x907\x959') - c.write('\U00012345'[0]) - self.assertEqual(s.getvalue(), b'123\x907\x959') - c.write('\U00012345'[1] + '\U00012345' + '\uac00\u00ac') - self.assertEqual(s.getvalue(), - b'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851') - c.write('\U00012345'[0]) - self.assertEqual(s.getvalue(), - b'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851') - self.assertRaises(UnicodeError, c.reset) - self.assertEqual(s.getvalue(), - b'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851') - - def test_utf_8(self): - s= io.BytesIO() - c = codecs.getwriter('utf-8')(s) - c.write('123') - self.assertEqual(s.getvalue(), b'123') - c.write('\U00012345') - self.assertEqual(s.getvalue(), b'123\xf0\x92\x8d\x85') - - # Python utf-8 codec can't buffer surrogate pairs yet. - if 0: - c.write('\U00012345'[0]) - self.assertEqual(s.getvalue(), b'123\xf0\x92\x8d\x85') - c.write('\U00012345'[1] + '\U00012345' + '\uac00\u00ac') - self.assertEqual(s.getvalue(), - b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85' - b'\xea\xb0\x80\xc2\xac') - c.write('\U00012345'[0]) - self.assertEqual(s.getvalue(), - b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85' - b'\xea\xb0\x80\xc2\xac') - c.reset() - self.assertEqual(s.getvalue(), - b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85' - b'\xea\xb0\x80\xc2\xac\xed\xa0\x88') - c.write('\U00012345'[1]) - self.assertEqual(s.getvalue(), - b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85' - b'\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85') - - else: # UCS4 - pass + def test_gb18030(self): + s= io.BytesIO() + c = codecs.getwriter('gb18030')(s) + c.write('123') + self.assertEqual(s.getvalue(), b'123') + c.write('\U00012345') + self.assertEqual(s.getvalue(), b'123\x907\x959') + c.write('\uac00\u00ac') + self.assertEqual(s.getvalue(), + b'123\x907\x959\x827\xcf5\x810\x851') + + def test_utf_8(self): + s= io.BytesIO() + c = codecs.getwriter('utf-8')(s) + c.write('123') + self.assertEqual(s.getvalue(), b'123') + c.write('\U00012345') + self.assertEqual(s.getvalue(), b'123\xf0\x92\x8d\x85') + c.write('\uac00\u00ac') + self.assertEqual(s.getvalue(), + b'123\xf0\x92\x8d\x85' + b'\xea\xb0\x80\xc2\xac') def test_streamwriter_strwrite(self): s = io.BytesIO() |
