diff options
-rw-r--r-- | Doc/library/zlib.rst | 20 | ||||
-rw-r--r-- | Lib/test/test_zlib.py | 20 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Modules/zlibmodule.c | 19 |
4 files changed, 48 insertions, 14 deletions
diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst index a7b8343..54835e7 100644 --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -152,7 +152,7 @@ Compression objects support the following methods: compress a set of data that share a common initial prefix. -Decompression objects support the following methods, and two attributes: +Decompression objects support the following methods and attributes: .. attribute:: Decompress.unused_data @@ -162,13 +162,6 @@ Decompression objects support the following methods, and two attributes: available. If the whole bytestring turned out to contain compressed data, this is ``b""``, an empty bytes object. - The only way to determine where a bytestring of compressed data ends is by actually - decompressing it. This means that when compressed data is contained part of a - larger file, you can only find the end of it by reading data and feeding it - followed by some non-empty bytestring into a decompression object's - :meth:`decompress` method until the :attr:`unused_data` attribute is no longer - empty. - .. attribute:: Decompress.unconsumed_tail @@ -179,6 +172,17 @@ Decompression objects support the following methods, and two attributes: :meth:`decompress` method call in order to get correct output. +.. attribute:: Decompress.eof + + A boolean indicating whether the end of the compressed data stream has been + reached. + + This makes it possible to distinguish between a properly-formed compressed + stream, and an incomplete or truncated one. + + .. versionadded:: 3.3 + + .. method:: Decompress.decompress(data[, max_length]) Decompress *data*, returning a bytes object containing the uncompressed data diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index 68dd3ea..dddde47 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -447,6 +447,26 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): y += dco.flush() self.assertEqual(y, b'foo') + def test_decompress_eof(self): + x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E' # 'foo' + dco = zlib.decompressobj() + self.assertFalse(dco.eof) + dco.decompress(x[:-5]) + self.assertFalse(dco.eof) + dco.decompress(x[-5:]) + self.assertTrue(dco.eof) + dco.flush() + self.assertTrue(dco.eof) + + def test_decompress_eof_incomplete_stream(self): + x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E' # 'foo' + dco = zlib.decompressobj() + self.assertFalse(dco.eof) + dco.decompress(x[:-5]) + self.assertFalse(dco.eof) + dco.flush() + self.assertFalse(dco.eof) + if hasattr(zlib.compressobj(), "copy"): def test_compresscopy(self): # Test copying a compression object @@ -254,6 +254,9 @@ Core and Builtins Library ------- +- Issue #12646: Add an 'eof' attribute to zlib.Decompress, to make it easier to + detect truncated input streams. + - Issue #11513: Fix exception handling ``tarfile.TarFile.gzopen()`` when the file cannot be opened. diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index ba0e59c..dc707ff 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -43,6 +43,7 @@ typedef struct z_stream zst; PyObject *unused_data; PyObject *unconsumed_tail; + char eof; int is_initialised; #ifdef WITH_THREAD PyThread_type_lock lock; @@ -89,6 +90,7 @@ newcompobject(PyTypeObject *type) self = PyObject_New(compobject, type); if (self == NULL) return NULL; + self->eof = 0; self->is_initialised = 0; self->unused_data = PyBytes_FromStringAndSize("", 0); if (self->unused_data == NULL) { @@ -291,7 +293,7 @@ PyZlib_decompress(PyObject *self, PyObject *args) err = inflateEnd(&zst); if (err != Z_OK) { - zlib_error(zst, err, "while finishing data decompression"); + zlib_error(zst, err, "while finishing decompression"); goto error; } @@ -476,7 +478,7 @@ PyZlib_objcompress(compobject *self, PyObject *args) */ if (err != Z_OK && err != Z_BUF_ERROR) { - zlib_error(self->zst, err, "while compressing"); + zlib_error(self->zst, err, "while compressing data"); Py_DECREF(RetVal); RetVal = NULL; goto error; @@ -611,12 +613,13 @@ PyZlib_objdecompress(compobject *self, PyObject *args) Py_DECREF(RetVal); goto error; } + self->eof = 1; /* We will only get Z_BUF_ERROR if the output buffer was full but there wasn't more output when we tried again, so it is not an error condition. */ } else if (err != Z_OK && err != Z_BUF_ERROR) { - zlib_error(self->zst, err, "while decompressing"); + zlib_error(self->zst, err, "while decompressing data"); Py_DECREF(RetVal); RetVal = NULL; goto error; @@ -697,7 +700,7 @@ PyZlib_flush(compobject *self, PyObject *args) if (err == Z_STREAM_END && flushmode == Z_FINISH) { err = deflateEnd(&(self->zst)); if (err != Z_OK) { - zlib_error(self->zst, err, "from deflateEnd()"); + zlib_error(self->zst, err, "while finishing compression"); Py_DECREF(RetVal); RetVal = NULL; goto error; @@ -765,6 +768,7 @@ PyZlib_copy(compobject *self) Py_XDECREF(retval->unconsumed_tail); retval->unused_data = self->unused_data; retval->unconsumed_tail = self->unconsumed_tail; + retval->eof = self->eof; /* Mark it as being initialized */ retval->is_initialised = 1; @@ -816,6 +820,7 @@ PyZlib_uncopy(compobject *self) Py_XDECREF(retval->unconsumed_tail); retval->unused_data = self->unused_data; retval->unconsumed_tail = self->unconsumed_tail; + retval->eof = self->eof; /* Mark it as being initialized */ retval->is_initialised = 1; @@ -885,10 +890,11 @@ PyZlib_unflush(compobject *self, PyObject *args) various data structures. Note we should only get Z_STREAM_END when flushmode is Z_FINISH */ if (err == Z_STREAM_END) { - err = inflateEnd(&(self->zst)); + self->eof = 1; self->is_initialised = 0; + err = inflateEnd(&(self->zst)); if (err != Z_OK) { - zlib_error(self->zst, err, "from inflateEnd()"); + zlib_error(self->zst, err, "while finishing decompression"); Py_DECREF(retval); retval = NULL; goto error; @@ -936,6 +942,7 @@ static PyMethodDef Decomp_methods[] = static PyMemberDef Decomp_members[] = { {"unused_data", T_OBJECT, COMP_OFF(unused_data), READONLY}, {"unconsumed_tail", T_OBJECT, COMP_OFF(unconsumed_tail), READONLY}, + {"eof", T_BOOL, COMP_OFF(eof), READONLY}, {NULL}, }; |