From f4cfc8f6bb47e77ca954b58b436f2157b5b6f530 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 17 May 2007 21:52:23 +0000 Subject: Make test_codecs work. The CJK codecs now use bytes instead of str8 for their encoded input/output. --- Lib/test/test_codecs.py | 10 ++++---- Modules/cjkcodecs/multibytecodec.c | 51 ++++++++++++++++++++------------------ 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index dba9033..d28b357 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -492,7 +492,7 @@ class EscapeDecodeTest(unittest.TestCase): class RecodingTest(unittest.TestCase): def test_recoding(self): - f = io.StringIO() + f = io.BytesIO() f2 = codecs.EncodedFile(f, "unicode_internal", "utf-8") f2.write("a") f2.close() @@ -1205,7 +1205,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling): decodedresult = "" for c in encodedresult: decodedresult += decoder.decode(bytes([c])) - decodedresult += decoder.decode("", True) + decodedresult += decoder.decode(b"", True) self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding)) # check C API @@ -1217,7 +1217,7 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling): decodedresult = "" for c in encodedresult: decodedresult += cdecoder.decode(bytes([c])) - decodedresult += cdecoder.decode("", True) + decodedresult += cdecoder.decode(b"", True) self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding)) # check iterencode()/iterdecode() @@ -1258,8 +1258,8 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling): for t in range(5): # Test that calling seek resets the internal codec state and buffers reader.seek(0, 0) - line = reader.readline() - self.assertEqual(s[:len(line)], line) + data = reader.read() + self.assertEqual(s, data) def test_bad_decode_args(self): for encoding in all_unicode_encodings: diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 42178ff..b26d38e 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -166,15 +166,15 @@ expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize) Py_ssize_t orgpos, orgsize; orgpos = (Py_ssize_t)((char *)buf->outbuf - - PyString_AS_STRING(buf->outobj)); - orgsize = PyString_GET_SIZE(buf->outobj); - if (_PyString_Resize(&buf->outobj, orgsize + ( + PyBytes_AS_STRING(buf->outobj)); + orgsize = PyBytes_GET_SIZE(buf->outobj); + if (PyBytes_Resize(buf->outobj, orgsize + ( esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) return -1; - buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj) +orgpos; - buf->outbuf_end = (unsigned char *)PyString_AS_STRING(buf->outobj) - + PyString_GET_SIZE(buf->outobj); + buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos; + buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj) + + PyBytes_GET_SIZE(buf->outobj); return 0; } @@ -322,6 +322,7 @@ multibytecodec_encerror(MultibyteCodec *codec, goto errorexit; } + assert(PyString_Check(retstr)); retstrsize = PyString_GET_SIZE(retstr); REQUIRE_ENCODEBUFFER(buf, retstrsize); @@ -468,16 +469,16 @@ multibytecodec_encode(MultibyteCodec *codec, Py_ssize_t finalsize, r = 0; if (datalen == 0) - return PyString_FromString(""); + return PyBytes_FromStringAndSize(NULL, 0); buf.excobj = NULL; buf.inbuf = buf.inbuf_top = *data; buf.inbuf_end = buf.inbuf_top + datalen; - buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16); + buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16); if (buf.outobj == NULL) goto errorexit; - buf.outbuf = (unsigned char *)PyString_AS_STRING(buf.outobj); - buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj); + buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj); + buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj); while (buf.inbuf < buf.inbuf_end) { Py_ssize_t inleft, outleft; @@ -512,10 +513,10 @@ multibytecodec_encode(MultibyteCodec *codec, } finalsize = (Py_ssize_t)((char *)buf.outbuf - - PyString_AS_STRING(buf.outobj)); + PyBytes_AS_STRING(buf.outobj)); - if (finalsize != PyString_GET_SIZE(buf.outobj)) - if (_PyString_Resize(&buf.outobj, finalsize) == -1) + if (finalsize != PyBytes_GET_SIZE(buf.outobj)) + if (PyBytes_Resize(buf.outobj, finalsize) == -1) goto errorexit; Py_XDECREF(buf.excobj); @@ -1223,10 +1224,11 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, if (cres == NULL) goto errorexit; - if (!PyString_Check(cres)) { - PyErr_SetString(PyExc_TypeError, - "stream function returned a " - "non-string object"); + if (!PyBytes_Check(cres)) { + PyErr_Format(PyExc_TypeError, + "stream function returned a " + "non-string object (%.100s)", + cres->ob_type->tp_name); goto errorexit; } @@ -1234,22 +1236,22 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, PyObject *ctr; char *ctrdata; - rsize = PyString_GET_SIZE(cres) + self->pendingsize; - ctr = PyString_FromStringAndSize(NULL, rsize); + rsize = PyBytes_GET_SIZE(cres) + self->pendingsize; + ctr = PyBytes_FromStringAndSize(NULL, rsize); if (ctr == NULL) goto errorexit; - ctrdata = PyString_AS_STRING(ctr); + ctrdata = PyBytes_AS_STRING(ctr); memcpy(ctrdata, self->pending, self->pendingsize); memcpy(ctrdata + self->pendingsize, - PyString_AS_STRING(cres), - PyString_GET_SIZE(cres)); + PyBytes_AS_STRING(cres), + PyBytes_GET_SIZE(cres)); Py_DECREF(cres); cres = ctr; self->pendingsize = 0; } - rsize = PyString_GET_SIZE(cres); - if (decoder_prepare_buffer(&buf, PyString_AS_STRING(cres), + rsize = PyBytes_GET_SIZE(cres); + if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres), rsize) != 0) goto errorexit; @@ -1594,6 +1596,7 @@ mbstreamwriter_reset(MultibyteStreamWriterObject *self) if (pwrt == NULL) return NULL; + assert(PyString_Check(pwrt)); if (PyString_Size(pwrt) > 0) { PyObject *wr; wr = PyObject_CallMethod(self->stream, "write", "O", pwrt); -- cgit v0.12