summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_multibytecodec.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_multibytecodec.py')
-rw-r--r--Lib/test/test_multibytecodec.py82
1 files changed, 30 insertions, 52 deletions
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py
index 6889184..2929f98 100644
--- a/Lib/test/test_multibytecodec.py
+++ b/Lib/test/test_multibytecodec.py
@@ -44,6 +44,13 @@ class Test_MultibyteCodec(unittest.TestCase):
self.assertRaises(IndexError, dec,
b'apple\x92ham\x93spam', 'test.cjktest')
+ def test_errorcallback_custom_ignore(self):
+ # Issue #23215: MemoryError with custom error handlers and multibyte codecs
+ data = 100 * "\udc00"
+ codecs.register_error("test.ignore", codecs.ignore_errors)
+ for enc in ALL_CJKENCODINGS:
+ self.assertEqual(data.encode(enc, "test.ignore"), b'')
+
def test_codingspec(self):
try:
for enc in ALL_CJKENCODINGS:
@@ -80,7 +87,7 @@ class Test_IncrementalEncoder(unittest.TestCase):
self.assertEqual(encoder.reset(), None)
def test_stateful(self):
- # jisx0213 encoder is stateful for a few codepoints. eg)
+ # jisx0213 encoder is stateful for a few code points. eg)
# U+00E6 => A9DC
# U+00E6 U+0300 => ABC4
# U+0300 => ABDC
@@ -175,57 +182,28 @@ class Test_StreamReader(unittest.TestCase):
support.unlink(TESTFN)
class Test_StreamWriter(unittest.TestCase):
- if len('\U00012345') == 2: # UCS2
- def test_gb18030(self):
- s= io.BytesIO()
- c = codecs.getwriter('gb18030')(s)
- c.write('123')
- self.assertEqual(s.getvalue(), b'123')
- c.write('\U00012345')
- self.assertEqual(s.getvalue(), b'123\x907\x959')
- c.write('\U00012345'[0])
- self.assertEqual(s.getvalue(), b'123\x907\x959')
- c.write('\U00012345'[1] + '\U00012345' + '\uac00\u00ac')
- self.assertEqual(s.getvalue(),
- b'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
- c.write('\U00012345'[0])
- self.assertEqual(s.getvalue(),
- b'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
- self.assertRaises(UnicodeError, c.reset)
- self.assertEqual(s.getvalue(),
- b'123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
-
- def test_utf_8(self):
- s= io.BytesIO()
- c = codecs.getwriter('utf-8')(s)
- c.write('123')
- self.assertEqual(s.getvalue(), b'123')
- c.write('\U00012345')
- self.assertEqual(s.getvalue(), b'123\xf0\x92\x8d\x85')
-
- # Python utf-8 codec can't buffer surrogate pairs yet.
- if 0:
- c.write('\U00012345'[0])
- self.assertEqual(s.getvalue(), b'123\xf0\x92\x8d\x85')
- c.write('\U00012345'[1] + '\U00012345' + '\uac00\u00ac')
- self.assertEqual(s.getvalue(),
- b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
- b'\xea\xb0\x80\xc2\xac')
- c.write('\U00012345'[0])
- self.assertEqual(s.getvalue(),
- b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
- b'\xea\xb0\x80\xc2\xac')
- c.reset()
- self.assertEqual(s.getvalue(),
- b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
- b'\xea\xb0\x80\xc2\xac\xed\xa0\x88')
- c.write('\U00012345'[1])
- self.assertEqual(s.getvalue(),
- b'123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
- b'\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85')
-
- else: # UCS4
- pass
+ def test_gb18030(self):
+ s= io.BytesIO()
+ c = codecs.getwriter('gb18030')(s)
+ c.write('123')
+ self.assertEqual(s.getvalue(), b'123')
+ c.write('\U00012345')
+ self.assertEqual(s.getvalue(), b'123\x907\x959')
+ c.write('\uac00\u00ac')
+ self.assertEqual(s.getvalue(),
+ b'123\x907\x959\x827\xcf5\x810\x851')
+
+ def test_utf_8(self):
+ s= io.BytesIO()
+ c = codecs.getwriter('utf-8')(s)
+ c.write('123')
+ self.assertEqual(s.getvalue(), b'123')
+ c.write('\U00012345')
+ self.assertEqual(s.getvalue(), b'123\xf0\x92\x8d\x85')
+ c.write('\uac00\u00ac')
+ self.assertEqual(s.getvalue(),
+ b'123\xf0\x92\x8d\x85'
+ b'\xea\xb0\x80\xc2\xac')
def test_streamwriter_strwrite(self):
s = io.BytesIO()