diff options
-rw-r--r-- | Lib/test/test_multibytecodec.py | 4 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Modules/cjkcodecs/multibytecodec.c | 8 |
3 files changed, 13 insertions, 2 deletions
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py index feb7bd5..7b47cb5 100644 --- a/Lib/test/test_multibytecodec.py +++ b/Lib/test/test_multibytecodec.py @@ -45,6 +45,10 @@ class Test_MultibyteCodec(unittest.TestCase): self.assertRaises(IndexError, dec, b'apple\x92ham\x93spam', 'test.cjktest') + def test_errorhandler_returns_bytes(self): + enc = "\u30fb\udc80".encode('gb18030', 'surrogateescape') + self.assertEqual(enc, b'\x819\xa79\x80') + def test_codingspec(self): try: for enc in ALL_CJKENCODINGS: @@ -98,6 +98,9 @@ Core and Builtins Library ------- +- Issue #16585: Make CJK encoders support error handlers that return bytes per + PEP 383. + - Issue #10182: The re module doesn't truncate indices to 32 bits anymore. Patch by Serhiy Storchaka. diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 40717d8..c032cdb 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -316,7 +316,7 @@ multibytecodec_encerror(MultibyteCodec *codec, goto errorexit; if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || - !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) || + (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) || !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) { PyErr_SetString(PyExc_TypeError, "encoding error handler must return " @@ -324,7 +324,7 @@ multibytecodec_encerror(MultibyteCodec *codec, goto errorexit; } - { + if (PyUnicode_Check(tobj)) { const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj); retstr = multibytecodec_encode(codec, state, &uraw, @@ -333,6 +333,10 @@ multibytecodec_encerror(MultibyteCodec *codec, if (retstr == NULL) goto errorexit; } + else { + Py_INCREF(tobj); + retstr = tobj; + } assert(PyBytes_Check(retstr)); retstrsize = PyBytes_GET_SIZE(retstr); |