summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_zlib.py32
-rw-r--r--Misc/NEWS6
-rw-r--r--Modules/zlibmodule.c98
3 files changed, 80 insertions, 56 deletions
diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py
index b6a60f4..f5180e0 100644
--- a/Lib/test/test_zlib.py
+++ b/Lib/test/test_zlib.py
@@ -490,16 +490,28 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
def test_decompress_unused_data(self):
# Repeated calls to decompress() after EOF should accumulate data in
# dco.unused_data, instead of just storing the arg to the last call.
- x = zlib.compress(HAMLET_SCENE) + HAMLET_SCENE
- for step in 1, 2, 100:
- dco = zlib.decompressobj()
- data = b''.join(dco.decompress(x[i : i + step])
- for i in range(0, len(x), step))
- data += dco.flush()
-
- self.assertTrue(dco.eof)
- self.assertEqual(data, HAMLET_SCENE)
- self.assertEqual(dco.unused_data, HAMLET_SCENE)
+ source = b'abcdefghijklmnopqrstuvwxyz'
+ remainder = b'0123456789'
+ y = zlib.compress(source)
+ x = y + remainder
+ for maxlen in 0, 1000:
+ for step in 1, 2, len(y), len(x):
+ dco = zlib.decompressobj()
+ data = b''
+ for i in range(0, len(x), step):
+ if i < len(y):
+ self.assertEqual(dco.unused_data, b'')
+ if maxlen == 0:
+ data += dco.decompress(x[i : i + step])
+ self.assertEqual(dco.unconsumed_tail, b'')
+ else:
+ data += dco.decompress(
+ dco.unconsumed_tail + x[i : i + step], maxlen)
+ data += dco.flush()
+ self.assertTrue(dco.eof)
+ self.assertEqual(data, source)
+ self.assertEqual(dco.unconsumed_tail, b'')
+ self.assertEqual(dco.unused_data, remainder)
if hasattr(zlib.compressobj(), "copy"):
def test_compresscopy(self):
diff --git a/Misc/NEWS b/Misc/NEWS
index 9c0ea03..de6ec4f 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -91,9 +91,11 @@ Library
- Issue #16431: Use the type information when constructing a Decimal subtype
from a Decimal argument.
-- Issue #16350: zlib.Decompress.decompress() now accumulates data from
+- Issue #16350: zlib.decompressobj().decompress() now accumulates data from
successive calls after EOF in unused_data, instead of only saving the argument
- to the last call. Patch by Serhiy Storchaka.
+ to the last call. decompressobj().flush() now correctly sets unused_data and
+ unconsumed_tail. A bug in the handling of MemoryError when setting the
+ unconsumed_tail attribute has also been fixed. Patch by Serhiy Storchaka.
- Issue #12759: sre_parse now raises a proper error when the name of the group
is missing. Initial patch by Serhiy Storchaka.
diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c
index 888ef53..9fabb00 100644
--- a/Modules/zlibmodule.c
+++ b/Modules/zlibmodule.c
@@ -561,6 +561,49 @@ PyZlib_objcompress(compobject *self, PyObject *args)
return RetVal;
}
+/* Helper for objdecompress() and unflush(). Saves any unconsumed input data in
+ self->unused_data or self->unconsumed_tail, as appropriate. */
+static int
+save_unconsumed_input(compobject *self, int err)
+{
+ if (err == Z_STREAM_END) {
+ /* The end of the compressed data has been reached. Store the leftover
+ input data in self->unused_data. */
+ if (self->zst.avail_in > 0) {
+ Py_ssize_t old_size = PyBytes_GET_SIZE(self->unused_data);
+ Py_ssize_t new_size;
+ PyObject *new_data;
+ if (self->zst.avail_in > PY_SSIZE_T_MAX - old_size) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ new_size = old_size + self->zst.avail_in;
+ new_data = PyBytes_FromStringAndSize(NULL, new_size);
+ if (new_data == NULL)
+ return -1;
+ Py_MEMCPY(PyBytes_AS_STRING(new_data),
+ PyBytes_AS_STRING(self->unused_data), old_size);
+ Py_MEMCPY(PyBytes_AS_STRING(new_data) + old_size,
+ self->zst.next_in, self->zst.avail_in);
+ Py_DECREF(self->unused_data);
+ self->unused_data = new_data;
+ self->zst.avail_in = 0;
+ }
+ }
+ if (self->zst.avail_in > 0 || PyBytes_GET_SIZE(self->unconsumed_tail)) {
+ /* This code handles two distinct cases:
+ 1. Output limit was reached. Save leftover input in unconsumed_tail.
+ 2. All input data was consumed. Clear unconsumed_tail. */
+ PyObject *new_data = PyBytes_FromStringAndSize(
+ (char *)self->zst.next_in, self->zst.avail_in);
+ if (new_data == NULL)
+ return -1;
+ Py_DECREF(self->unconsumed_tail);
+ self->unconsumed_tail = new_data;
+ }
+ return 0;
+}
+
PyDoc_STRVAR(decomp_decompress__doc__,
"decompress(data, max_length) -- Return a string containing the decompressed\n"
"version of the data.\n"
@@ -668,61 +711,21 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
Py_END_ALLOW_THREADS
}
- if(max_length) {
- /* Not all of the compressed data could be accommodated in a buffer of
- the specified size. Return the unconsumed tail in an attribute. */
- Py_DECREF(self->unconsumed_tail);
- self->unconsumed_tail = PyBytes_FromStringAndSize((char *)self->zst.next_in,
- self->zst.avail_in);
- }
- else if (PyBytes_GET_SIZE(self->unconsumed_tail) > 0) {
- /* All of the compressed data was consumed. Clear unconsumed_tail. */
- Py_DECREF(self->unconsumed_tail);
- self->unconsumed_tail = PyBytes_FromStringAndSize("", 0);
- }
- if (self->unconsumed_tail == NULL) {
+ if (save_unconsumed_input(self, err) < 0) {
Py_DECREF(RetVal);
RetVal = NULL;
goto error;
}
- /* The end of the compressed data has been reached, so set the
- unused_data attribute to a string containing the remainder of the
- data in the string. Note that this is also a logical place to call
- inflateEnd, but the old behaviour of only calling it on flush() is
- preserved.
- */
if (err == Z_STREAM_END) {
- if (self->zst.avail_in > 0) {
- /* Append the leftover data to the existing value of unused_data. */
- Py_ssize_t old_size = PyBytes_GET_SIZE(self->unused_data);
- Py_ssize_t new_size = old_size + self->zst.avail_in;
- PyObject *new_data;
- if (new_size <= old_size) { /* Check for overflow. */
- PyErr_NoMemory();
- Py_DECREF(RetVal);
- RetVal = NULL;
- goto error;
- }
- new_data = PyBytes_FromStringAndSize(NULL, new_size);
- if (new_data == NULL) {
- Py_DECREF(RetVal);
- RetVal = NULL;
- goto error;
- }
- Py_MEMCPY(PyBytes_AS_STRING(new_data),
- PyBytes_AS_STRING(self->unused_data), old_size);
- Py_MEMCPY(PyBytes_AS_STRING(new_data) + old_size,
- self->zst.next_in, self->zst.avail_in);
- Py_DECREF(self->unused_data);
- self->unused_data = new_data;
- }
+ /* This is the logical place to call inflateEnd, but the old behaviour
+ of only calling it on flush() is preserved. */
self->eof = 1;
+ } else if (err != Z_OK && err != Z_BUF_ERROR) {
/* We will only get Z_BUF_ERROR if the output buffer was full
but there wasn't more output when we tried again, so it is
not an error condition.
*/
- } else if (err != Z_OK && err != Z_BUF_ERROR) {
zlib_error(self->zst, err, "while decompressing data");
Py_DECREF(RetVal);
RetVal = NULL;
@@ -996,6 +999,12 @@ PyZlib_unflush(compobject *self, PyObject *args)
Py_END_ALLOW_THREADS
}
+ if (save_unconsumed_input(self, err) < 0) {
+ Py_DECREF(retval);
+ retval = NULL;
+ goto error;
+ }
+
/* If at end of stream, clean up any memory allocated by zlib. */
if (err == Z_STREAM_END) {
self->eof = 1;
@@ -1008,6 +1017,7 @@ PyZlib_unflush(compobject *self, PyObject *args)
goto error;
}
}
+
if (_PyBytes_Resize(&retval, self->zst.total_out - start_total_out) < 0) {
Py_DECREF(retval);
retval = NULL;