summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Peterson <benjamin@python.org>2012-12-02 16:20:28 (GMT)
committerBenjamin Peterson <benjamin@python.org>2012-12-02 16:20:28 (GMT)
commit47a00f3d1a1fe3774d92e5d9263ef3bff79dd4ac (patch)
tree4aaec1d69dd4b4a28e615ee90ef081264d100c4b
parentaff472394cf29c7712854043fbac6a032195fe77 (diff)
downloadcpython-47a00f3d1a1fe3774d92e5d9263ef3bff79dd4ac.zip
cpython-47a00f3d1a1fe3774d92e5d9263ef3bff79dd4ac.tar.gz
cpython-47a00f3d1a1fe3774d92e5d9263ef3bff79dd4ac.tar.bz2
support encoding error handlers that return bytes (closes #16585)
-rw-r--r--Lib/test/test_multibytecodec.py4
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/cjkcodecs/multibytecodec.c8
3 files changed, 13 insertions, 2 deletions
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py
index feb7bd5..7b47cb5 100644
--- a/Lib/test/test_multibytecodec.py
+++ b/Lib/test/test_multibytecodec.py
@@ -45,6 +45,10 @@ class Test_MultibyteCodec(unittest.TestCase):
self.assertRaises(IndexError, dec,
b'apple\x92ham\x93spam', 'test.cjktest')
+ def test_errorhandler_returns_bytes(self):
+ enc = "\u30fb\udc80".encode('gb18030', 'surrogateescape')
+ self.assertEqual(enc, b'\x819\xa79\x80')
+
def test_codingspec(self):
try:
for enc in ALL_CJKENCODINGS:
diff --git a/Misc/NEWS b/Misc/NEWS
index bdfe161..6eff12c 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -98,6 +98,9 @@ Core and Builtins
Library
-------
+- Issue #16585: Make CJK encoders support error handlers that return bytes per
+ PEP 383.
+
- Issue #10182: The re module doesn't truncate indices to 32 bits anymore.
Patch by Serhiy Storchaka.
diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c
index 40717d8..c032cdb 100644
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -316,7 +316,7 @@ multibytecodec_encerror(MultibyteCodec *codec,
goto errorexit;
if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
- !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) ||
+ (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
!PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
PyErr_SetString(PyExc_TypeError,
"encoding error handler must return "
@@ -324,7 +324,7 @@ multibytecodec_encerror(MultibyteCodec *codec,
goto errorexit;
}
- {
+ if (PyUnicode_Check(tobj)) {
const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj);
retstr = multibytecodec_encode(codec, state, &uraw,
@@ -333,6 +333,10 @@ multibytecodec_encerror(MultibyteCodec *codec,
if (retstr == NULL)
goto errorexit;
}
+ else {
+ Py_INCREF(tobj);
+ retstr = tobj;
+ }
assert(PyBytes_Check(retstr));
retstrsize = PyBytes_GET_SIZE(retstr);