From 511e2cacc4aa67a83ed7c0ae34355407bdd13c57 Mon Sep 17 00:00:00 2001 From: Jeremy Hylton Date: Tue, 16 Oct 2001 20:39:49 +0000 Subject: [ #403753 ] zlib decompress; uncontrollable memory usage Mostly by Toby Dickenson and Titus Brown. Add an optional argument to a decompression object's decompress() method. The argument specifies the maximum length of the return value. If the uncompressed data exceeds this length, the excess data is stored as the unconsumed_tail attribute. (Not to be confused with unused_data, which is a separate issue.) Difference from SF patch: Default value for unconsumed_tail is "" rather than None. It's simpler if the attribute is always a string. --- Doc/lib/libzlib.tex | 18 ++++++++++-- Lib/test/output/test_zlib | 3 ++ Lib/test/test_zlib.py | 30 +++++++++++++++++++ Modules/zlibmodule.c | 74 ++++++++++++++++++++++++++++++++++++++++------- 4 files changed, 113 insertions(+), 12 deletions(-) diff --git a/Doc/lib/libzlib.tex b/Doc/lib/libzlib.tex index e384b1f..b9726d7 100644 --- a/Doc/lib/libzlib.tex +++ b/Doc/lib/libzlib.tex @@ -120,7 +120,7 @@ prevents compressing any more data. After calling action is to delete the object. \end{methoddesc} -Decompression objects support the following methods, and a single attribute: +Decompression objects support the following methods, and two attributes: \begin{memberdesc}{unused_data} A string which contains any unused data from the last string fed to @@ -135,13 +135,27 @@ reading data and feeding it into a decompression object's no longer the empty string. \end{memberdesc} -\begin{methoddesc}[Decompress]{decompress}{string} +\begin{memberdesc}{unconsumed_tail} +A string that contains any data that was not consumed by the last +\method{decompress} call because it exceeded the limit for the +uncompressed data buffer. +\end{memberdesc} + +\begin{methoddesc}[Decompress]{decompress}{string}{\optional{max_length}} Decompress \var{string}, returning a string containing the uncompressed data corresponding to at least part of the data in \var{string}. This data should be concatenated to the output produced by any preceding calls to the \method{decompress()} method. Some of the input data may be preserved in internal buffers for later processing. + +If the optional parameter \var{max_length} is supplied then the return value +will be no longer than \var{max_length}. This may mean that not all of the +compressed input can be processed; and unconsumed data will be stored +in the attribute \member{unconsumed_tail}. This string must be passed +to a subsequent call to \method{decompress()} if decompression is to +continue. If \var{max_length} is not supplied then the whole input is +decompressed, and \member{unconsumed_tail} is an empty string. \end{methoddesc} \begin{methoddesc}[Decompress]{flush}{} diff --git a/Lib/test/output/test_zlib b/Lib/test/output/test_zlib index 61c33cf..1c2e2e9 100644 --- a/Lib/test/output/test_zlib +++ b/Lib/test/output/test_zlib @@ -8,4 +8,7 @@ normal compression/decompression succeeded compress/decompression obj succeeded decompress with init options succeeded decompressobj with init options succeeded +should be '': '' +max_length decompressobj succeeded +unconsumed_tail should be '': '' Testing on 17K of random data diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index 439db22..915f582 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -76,6 +76,36 @@ if decomp2 != buf: else: print "decompressobj with init options succeeded" +print "should be '':", `deco.unconsumed_tail` + +# Check a decompression object with max_length specified +deco = zlib.decompressobj(-12) +cb = combuf +bufs = [] +while cb: + max_length = 1 + len(cb)/10 + chunk = deco.decompress(cb, max_length) + if len(chunk) > max_length: + print 'chunk too big (%d>%d)' % (len(chunk),max_length) + bufs.append(chunk) + cb = deco.unconsumed_tail +bufs.append(deco.flush()) +decomp2 = ''.join(buf) +if decomp2 != buf: + print "max_length decompressobj failed" +else: + print "max_length decompressobj succeeded" + +# Misc tests of max_length +deco = zlib.decompressobj(-12) +try: + deco.decompress("", -1) +except ValueError: + pass +else: + print "failed to raise value error on bad max_length" +print "unconsumed_tail should be '':", `deco.unconsumed_tail` + # Test flush() with the various options, using all the different levels # in order to provide more variations. sync_opt = ['Z_NO_FLUSH', 'Z_SYNC_FLUSH', 'Z_FULL_FLUSH'] diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index a2e6aed..2d9e777 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -78,6 +78,7 @@ typedef struct PyObject_HEAD z_stream zst; PyObject *unused_data; + PyObject *unconsumed_tail; int is_initialised; } compobject; @@ -100,6 +101,15 @@ newcompobject(PyTypeObject *type) return NULL; self->is_initialised = 0; self->unused_data = PyString_FromString(""); + if (self->unused_data == NULL) { + Py_DECREF(self); + return NULL; + } + self->unconsumed_tail = PyString_FromString(""); + if (self->unconsumed_tail == NULL) { + Py_DECREF(self); + return NULL; + } return self; } @@ -485,6 +495,7 @@ Comp_dealloc(compobject *self) if (self->is_initialised) deflateEnd(&self->zst); Py_XDECREF(self->unused_data); + Py_XDECREF(self->unconsumed_tail); PyObject_Del(self); LEAVE_ZLIB @@ -498,6 +509,7 @@ Decomp_dealloc(compobject *self) if (self->is_initialised) inflateEnd(&self->zst); Py_XDECREF(self->unused_data); + Py_XDECREF(self->unconsumed_tail); PyObject_Del(self); LEAVE_ZLIB @@ -595,27 +607,41 @@ PyZlib_objcompress(compobject *self, PyObject *args) } static char decomp_decompress__doc__[] = -"decompress(data) -- Return a string containing the decompressed version of the data.\n\n" +"decompress(data, max_length) -- Return a string containing\n" +"the decompressed version of the data.\n\n" "After calling this function, some of the input data may still\n" "be stored in internal buffers for later processing.\n" -"Call the flush() method to clear these buffers." +"Call the flush() method to clear these buffers.\n" +"If the max_length parameter is specified then the return value will be\n" +"no longer than max_length. Unconsumed input data will be stored in\n" +"the unconsumed_tail attribute." ; static PyObject * PyZlib_objdecompress(compobject *self, PyObject *args) { - int err, inplen, length = DEFAULTALLOC; + int err, inplen, old_length, length = DEFAULTALLOC; + int max_length = 0; PyObject *RetVal; Byte *input; unsigned long start_total_out; int return_error; PyObject * inputString; - if (!PyArg_ParseTuple(args, "S:decompress", &inputString)) + if (!PyArg_ParseTuple(args, "S|i:decompress", &inputString, &max_length)) + return NULL; + if (max_length < 0) { + PyErr_SetString(PyExc_ValueError, + "max_length must be greater than zero"); return NULL; + } + if (PyString_AsStringAndSize(inputString, (char**)&input, &inplen) == -1) return NULL; + /* limit amount of data allocated to max_length */ + if (max_length && length > max_length) + length = max_length; if (!(RetVal = PyString_FromStringAndSize(NULL, length))) { PyErr_SetString(PyExc_MemoryError, "Can't allocate memory to compress data"); @@ -637,23 +663,46 @@ PyZlib_objdecompress(compobject *self, PyObject *args) err = inflate(&(self->zst), Z_SYNC_FLUSH); Py_END_ALLOW_THREADS - /* while Z_OK and the output buffer is full, there might be more output, - so extend the output buffer and try again */ + /* While Z_OK and the output buffer is full, there might be more output. + So extend the output buffer and try again. + */ while (err == Z_OK && self->zst.avail_out == 0) { - if (_PyString_Resize(&RetVal, length << 1) == -1) { + /* If max_length set, don't continue decompressing if we've already + reached the limit. + */ + if (max_length && length >= max_length) + break; + + /* otherwise, ... */ + old_length = length; + length = length << 1; + if (max_length && length > max_length) + length = max_length; + + if (_PyString_Resize(&RetVal, length) == -1) { PyErr_SetString(PyExc_MemoryError, "Can't allocate memory to compress data"); return_error = 1; break; } - self->zst.next_out = (unsigned char *)PyString_AsString(RetVal) + length; - self->zst.avail_out = length; - length = length << 1; + self->zst.next_out = (unsigned char *)PyString_AsString(RetVal)+old_length; + self->zst.avail_out = length - old_length; + Py_BEGIN_ALLOW_THREADS err = inflate(&(self->zst), Z_SYNC_FLUSH); Py_END_ALLOW_THREADS } + /* Not all of the compressed data could be accomodated in the output buffer + of specified size. Return the unconsumed tail in an attribute.*/ + if(max_length) { + Py_DECREF(self->unconsumed_tail); + self->unconsumed_tail = PyString_FromStringAndSize(self->zst.next_in, + self->zst.avail_in); + if(!self->unconsumed_tail) + return_error = 1; + } + /* The end of the compressed data has been reached, so set the unused_data attribute to a string containing the remainder of the data in the string. Note that this is also a logical place to call inflateEnd, but the old @@ -885,6 +934,11 @@ Decomp_getattr(compobject *self, char *name) Py_INCREF(self->unused_data); retval = self->unused_data; } + else if (strcmp(name, "unconsumed_tail") == 0) + { + Py_INCREF(self->unconsumed_tail); + retval = self->unconsumed_tail; + } else retval = Py_FindMethod(Decomp_methods, (PyObject *)self, name); -- cgit v0.12