From ee7889dec321654d1c50448de7987e1841dd3ad5 Mon Sep 17 00:00:00 2001 From: Nadeem Vawda Date: Sun, 11 Nov 2012 02:14:36 +0100 Subject: Issue #16350, part 2: Set unused_data (and unconsumed_tail) correctly in decompressobj().flush(). Additionally, fix a bug where a MemoryError in allocating a bytes object could leave the decompressor object in an invalid state (with its unconsumed_tail member being NULL). Patch by Serhiy Storchaka. --- Lib/test/test_zlib.py | 30 ++++++++++----- Misc/NEWS | 6 ++- Modules/zlibmodule.c | 100 +++++++++++++++++++++++++++----------------------- 3 files changed, 80 insertions(+), 56 deletions(-) diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index c17b4d0..6d4b2c3 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -437,15 +437,27 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): def test_decompress_unused_data(self): # Repeated calls to decompress() after EOF should accumulate data in # dco.unused_data, instead of just storing the arg to the last call. - x = zlib.compress(HAMLET_SCENE) + HAMLET_SCENE - for step in 1, 2, 100: - dco = zlib.decompressobj() - data = b''.join(dco.decompress(x[i : i + step]) - for i in range(0, len(x), step)) - data += dco.flush() - - self.assertEqual(data, HAMLET_SCENE) - self.assertEqual(dco.unused_data, HAMLET_SCENE) + source = b'abcdefghijklmnopqrstuvwxyz' + remainder = b'0123456789' + y = zlib.compress(source) + x = y + remainder + for maxlen in 0, 1000: + for step in 1, 2, len(y), len(x): + dco = zlib.decompressobj() + data = b'' + for i in range(0, len(x), step): + if i < len(y): + self.assertEqual(dco.unused_data, b'') + if maxlen == 0: + data += dco.decompress(x[i : i + step]) + self.assertEqual(dco.unconsumed_tail, b'') + else: + data += dco.decompress( + dco.unconsumed_tail + x[i : i + step], maxlen) + data += dco.flush() + self.assertEqual(data, source) + self.assertEqual(dco.unconsumed_tail, b'') + self.assertEqual(dco.unused_data, remainder) if hasattr(zlib.compressobj(), "copy"): def test_compresscopy(self): diff --git a/Misc/NEWS b/Misc/NEWS index b035e24..01517e1 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -162,9 +162,11 @@ Library - Issue #16357: fix calling accept() on a SSLSocket created through SSLContext.wrap_socket(). Original patch by Jeff McNeil. -- Issue #16350: zlib.Decompress.decompress() now accumulates data from +- Issue #16350: zlib.decompressobj().decompress() now accumulates data from successive calls after EOF in unused_data, instead of only saving the argument - to the last call. Patch by Serhiy Storchaka. + to the last call. decompressobj().flush() now correctly sets unused_data and + unconsumed_tail. A bug in the handling of MemoryError when setting the + unconsumed_tail attribute has also been fixed. Patch by Serhiy Storchaka. - Issue #12759: sre_parse now raises a proper error when the name of the group is missing. Initial patch by Serhiy Storchaka. diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index 7734ba6..6d4aa3a 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -499,6 +499,49 @@ PyZlib_objcompress(compobject *self, PyObject *args) return RetVal; } +/* Helper for objdecompress() and unflush(). Saves any unconsumed input data in + self->unused_data or self->unconsumed_tail, as appropriate. */ +static int +save_unconsumed_input(compobject *self, int err) +{ + if (err == Z_STREAM_END) { + /* The end of the compressed data has been reached. Store the leftover + input data in self->unused_data. */ + if (self->zst.avail_in > 0) { + Py_ssize_t old_size = PyBytes_GET_SIZE(self->unused_data); + Py_ssize_t new_size; + PyObject *new_data; + if (self->zst.avail_in > PY_SSIZE_T_MAX - old_size) { + PyErr_NoMemory(); + return -1; + } + new_size = old_size + self->zst.avail_in; + new_data = PyBytes_FromStringAndSize(NULL, new_size); + if (new_data == NULL) + return -1; + Py_MEMCPY(PyBytes_AS_STRING(new_data), + PyBytes_AS_STRING(self->unused_data), old_size); + Py_MEMCPY(PyBytes_AS_STRING(new_data) + old_size, + self->zst.next_in, self->zst.avail_in); + Py_DECREF(self->unused_data); + self->unused_data = new_data; + self->zst.avail_in = 0; + } + } + if (self->zst.avail_in > 0 || PyBytes_GET_SIZE(self->unconsumed_tail)) { + /* This code handles two distinct cases: + 1. Output limit was reached. Save leftover input in unconsumed_tail. + 2. All input data was consumed. Clear unconsumed_tail. */ + PyObject *new_data = PyBytes_FromStringAndSize( + (char *)self->zst.next_in, self->zst.avail_in); + if (new_data == NULL) + return -1; + Py_DECREF(self->unconsumed_tail); + self->unconsumed_tail = new_data; + } + return 0; +} + PyDoc_STRVAR(decomp_decompress__doc__, "decompress(data, max_length) -- Return a string containing the decompressed\n" "version of the data.\n" @@ -585,60 +628,20 @@ PyZlib_objdecompress(compobject *self, PyObject *args) Py_END_ALLOW_THREADS } - if(max_length) { - /* Not all of the compressed data could be accommodated in a buffer of - the specified size. Return the unconsumed tail in an attribute. */ - Py_DECREF(self->unconsumed_tail); - self->unconsumed_tail = PyBytes_FromStringAndSize((char *)self->zst.next_in, - self->zst.avail_in); - } - else if (PyBytes_GET_SIZE(self->unconsumed_tail) > 0) { - /* All of the compressed data was consumed. Clear unconsumed_tail. */ - Py_DECREF(self->unconsumed_tail); - self->unconsumed_tail = PyBytes_FromStringAndSize("", 0); - } - if (self->unconsumed_tail == NULL) { + if (save_unconsumed_input(self, err) < 0) { Py_DECREF(RetVal); RetVal = NULL; goto error; } - /* The end of the compressed data has been reached, so set the - unused_data attribute to a string containing the remainder of the - data in the string. Note that this is also a logical place to call - inflateEnd, but the old behaviour of only calling it on flush() is - preserved. - */ - if (err == Z_STREAM_END) { - if (self->zst.avail_in > 0) { - /* Append the leftover data to the existing value of unused_data. */ - Py_ssize_t old_size = PyBytes_GET_SIZE(self->unused_data); - Py_ssize_t new_size = old_size + self->zst.avail_in; - PyObject *new_data; - if (new_size <= old_size) { /* Check for overflow. */ - PyErr_NoMemory(); - Py_DECREF(RetVal); - RetVal = NULL; - goto error; - } - new_data = PyBytes_FromStringAndSize(NULL, new_size); - if (new_data == NULL) { - Py_DECREF(RetVal); - RetVal = NULL; - goto error; - } - Py_MEMCPY(PyBytes_AS_STRING(new_data), - PyBytes_AS_STRING(self->unused_data), old_size); - Py_MEMCPY(PyBytes_AS_STRING(new_data) + old_size, - self->zst.next_in, self->zst.avail_in); - Py_DECREF(self->unused_data); - self->unused_data = new_data; - } + /* This is the logical place to call inflateEnd, but the old behaviour of + only calling it on flush() is preserved. */ + + if (err != Z_STREAM_END && err != Z_OK && err != Z_BUF_ERROR) { /* We will only get Z_BUF_ERROR if the output buffer was full but there wasn't more output when we tried again, so it is not an error condition. */ - } else if (err != Z_OK && err != Z_BUF_ERROR) { zlib_error(self->zst, err, "while decompressing"); Py_DECREF(RetVal); RetVal = NULL; @@ -904,6 +907,12 @@ PyZlib_unflush(compobject *self, PyObject *args) Py_END_ALLOW_THREADS } + if (save_unconsumed_input(self, err) < 0) { + Py_DECREF(retval); + retval = NULL; + goto error; + } + /* If flushmode is Z_FINISH, we also have to call deflateEnd() to free various data structures. Note we should only get Z_STREAM_END when flushmode is Z_FINISH */ @@ -917,6 +926,7 @@ PyZlib_unflush(compobject *self, PyObject *args) goto error; } } + if (_PyBytes_Resize(&retval, self->zst.total_out - start_total_out) < 0) { Py_DECREF(retval); retval = NULL; -- cgit v0.12