summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2011-05-24 20:17:55 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2011-05-24 20:17:55 (GMT)
commit6bcbef7da0127272aa97cdd43ec529bfe92c3251 (patch)
tree6649bf8138bd5830b8ac02cb9a9fdac41779b955
parentf8473933083cd045bdb0bf0dda8516e268846bd5 (diff)
downloadcpython-6bcbef7da0127272aa97cdd43ec529bfe92c3251.zip
cpython-6bcbef7da0127272aa97cdd43ec529bfe92c3251.tar.gz
cpython-6bcbef7da0127272aa97cdd43ec529bfe92c3251.tar.bz2
Issue #12100: Don't reset incremental encoders of CJK codecs at each call to
their encode() method anymore, but continue to call the reset() method if the final argument is True.
-rw-r--r--Lib/test/test_multibytecodec.py30
-rw-r--r--Misc/NEWS4
-rw-r--r--Modules/cjkcodecs/multibytecodec.c8
3 files changed, 38 insertions, 4 deletions
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py
index f3c8c61..069d090 100644
--- a/Lib/test/test_multibytecodec.py
+++ b/Lib/test/test_multibytecodec.py
@@ -257,6 +257,36 @@ class Test_ISO2022(unittest.TestCase):
# Any ISO 2022 codec will cause the segfault
myunichr(x).encode('iso_2022_jp', 'ignore')
+class TestStateful(unittest.TestCase):
+ text = '\u4E16\u4E16'
+ encoding = 'iso-2022-jp'
+ expected = b'\x1b$B@$@$'
+ expected_reset = b'\x1b$B@$@$\x1b(B'
+
+ def test_encode(self):
+ self.assertEqual(self.text.encode(self.encoding), self.expected_reset)
+
+ def test_incrementalencoder(self):
+ encoder = codecs.getincrementalencoder(self.encoding)()
+ output = b''.join(
+ encoder.encode(char)
+ for char in self.text)
+ self.assertEqual(output, self.expected)
+
+ def test_incrementalencoder_final(self):
+ encoder = codecs.getincrementalencoder(self.encoding)()
+ last_index = len(self.text) - 1
+ output = b''.join(
+ encoder.encode(char, index == last_index)
+ for index, char in enumerate(self.text))
+ self.assertEqual(output, self.expected_reset)
+
+class TestHZStateful(TestStateful):
+ text = '\u804a\u804a'
+ encoding = 'hz'
+ expected = b'~{ADAD'
+ expected_reset = b'~{ADAD~}'
+
def test_main():
support.run_unittest(__name__)
diff --git a/Misc/NEWS b/Misc/NEWS
index 17f933a..7862e21 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -75,6 +75,10 @@ Core and Builtins
Library
-------
+- Issue #12100: Don't reset incremental encoders of CJK codecs at each call to
+ their encode() method anymore, but continue to call the reset() method if the
+ final argument is True.
+
- Issue #5715: In socketserver, close the server socket in the child process.
- Issue #12124: zipimport doesn't keep a reference to zlib.decompress() anymore
diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c
index af7ea5b..7b04f020 100644
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -479,7 +479,7 @@ multibytecodec_encode(MultibyteCodec *codec,
MultibyteEncodeBuffer buf;
Py_ssize_t finalsize, r = 0;
- if (datalen == 0)
+ if (datalen == 0 && !(flags & MBENC_RESET))
return PyBytes_FromStringAndSize(NULL, 0);
buf.excobj = NULL;
@@ -514,7 +514,7 @@ multibytecodec_encode(MultibyteCodec *codec,
break;
}
- if (codec->encreset != NULL)
+ if (codec->encreset != NULL && (flags & MBENC_RESET))
for (;;) {
Py_ssize_t outleft;
@@ -784,8 +784,8 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
inbuf_end = inbuf + datalen;
r = multibytecodec_encode(ctx->codec, &ctx->state,
- (const Py_UNICODE **)&inbuf,
- datalen, ctx->errors, final ? MBENC_FLUSH : 0);
+ (const Py_UNICODE **)&inbuf, datalen,
+ ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
if (r == NULL) {
/* recover the original pending buffer */
if (origpending > 0)