summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Hylton <jeremy@alum.mit.edu>2001-10-16 20:39:49 (GMT)
committerJeremy Hylton <jeremy@alum.mit.edu>2001-10-16 20:39:49 (GMT)
commit511e2cacc4aa67a83ed7c0ae34355407bdd13c57 (patch)
tree770b92726208bcae44db86fc8956348fe8d9a91e
parent7a59445e3783fc842bc4fd0181b18d6798883a3e (diff)
downloadcpython-511e2cacc4aa67a83ed7c0ae34355407bdd13c57.zip
cpython-511e2cacc4aa67a83ed7c0ae34355407bdd13c57.tar.gz
cpython-511e2cacc4aa67a83ed7c0ae34355407bdd13c57.tar.bz2
[ #403753 ] zlib decompress; uncontrollable memory usage
Mostly by Toby Dickenson and Titus Brown. Add an optional argument to a decompression object's decompress() method. The argument specifies the maximum length of the return value. If the uncompressed data exceeds this length, the excess data is stored as the unconsumed_tail attribute. (Not to be confused with unused_data, which is a separate issue.) Difference from SF patch: Default value for unconsumed_tail is "" rather than None. It's simpler if the attribute is always a string.
-rw-r--r--Doc/lib/libzlib.tex18
-rw-r--r--Lib/test/output/test_zlib3
-rw-r--r--Lib/test/test_zlib.py30
-rw-r--r--Modules/zlibmodule.c74
4 files changed, 113 insertions, 12 deletions
diff --git a/Doc/lib/libzlib.tex b/Doc/lib/libzlib.tex
index e384b1f..b9726d7 100644
--- a/Doc/lib/libzlib.tex
+++ b/Doc/lib/libzlib.tex
@@ -120,7 +120,7 @@ prevents compressing any more data. After calling
action is to delete the object.
\end{methoddesc}
-Decompression objects support the following methods, and a single attribute:
+Decompression objects support the following methods, and two attributes:
\begin{memberdesc}{unused_data}
A string which contains any unused data from the last string fed to
@@ -135,13 +135,27 @@ reading data and feeding it into a decompression object's
no longer the empty string.
\end{memberdesc}
-\begin{methoddesc}[Decompress]{decompress}{string}
+\begin{memberdesc}{unconsumed_tail}
+A string that contains any data that was not consumed by the last
+\method{decompress} call because it exceeded the limit for the
+uncompressed data buffer.
+\end{memberdesc}
+
+\begin{methoddesc}[Decompress]{decompress}{string}{\optional{max_length}}
Decompress \var{string}, returning a string containing the
uncompressed data corresponding to at least part of the data in
\var{string}. This data should be concatenated to the output produced
by any preceding calls to the
\method{decompress()} method. Some of the input data may be preserved
in internal buffers for later processing.
+
+If the optional parameter \var{max_length} is supplied then the return value
+will be no longer than \var{max_length}. This may mean that not all of the
+compressed input can be processed; and unconsumed data will be stored
+in the attribute \member{unconsumed_tail}. This string must be passed
+to a subsequent call to \method{decompress()} if decompression is to
+continue. If \var{max_length} is not supplied then the whole input is
+decompressed, and \member{unconsumed_tail} is an empty string.
\end{methoddesc}
\begin{methoddesc}[Decompress]{flush}{}
diff --git a/Lib/test/output/test_zlib b/Lib/test/output/test_zlib
index 61c33cf..1c2e2e9 100644
--- a/Lib/test/output/test_zlib
+++ b/Lib/test/output/test_zlib
@@ -8,4 +8,7 @@ normal compression/decompression succeeded
compress/decompression obj succeeded
decompress with init options succeeded
decompressobj with init options succeeded
+should be '': ''
+max_length decompressobj succeeded
+unconsumed_tail should be '': ''
Testing on 17K of random data
diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py
index 439db22..915f582 100644
--- a/Lib/test/test_zlib.py
+++ b/Lib/test/test_zlib.py
@@ -76,6 +76,36 @@ if decomp2 != buf:
else:
print "decompressobj with init options succeeded"
+print "should be '':", `deco.unconsumed_tail`
+
+# Check a decompression object with max_length specified
+deco = zlib.decompressobj(-12)
+cb = combuf
+bufs = []
+while cb:
+ max_length = 1 + len(cb)/10
+ chunk = deco.decompress(cb, max_length)
+ if len(chunk) > max_length:
+ print 'chunk too big (%d>%d)' % (len(chunk),max_length)
+ bufs.append(chunk)
+ cb = deco.unconsumed_tail
+bufs.append(deco.flush())
+decomp2 = ''.join(buf)
+if decomp2 != buf:
+ print "max_length decompressobj failed"
+else:
+ print "max_length decompressobj succeeded"
+
+# Misc tests of max_length
+deco = zlib.decompressobj(-12)
+try:
+ deco.decompress("", -1)
+except ValueError:
+ pass
+else:
+ print "failed to raise value error on bad max_length"
+print "unconsumed_tail should be '':", `deco.unconsumed_tail`
+
# Test flush() with the various options, using all the different levels
# in order to provide more variations.
sync_opt = ['Z_NO_FLUSH', 'Z_SYNC_FLUSH', 'Z_FULL_FLUSH']
diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c
index a2e6aed..2d9e777 100644
--- a/Modules/zlibmodule.c
+++ b/Modules/zlibmodule.c
@@ -78,6 +78,7 @@ typedef struct
PyObject_HEAD
z_stream zst;
PyObject *unused_data;
+ PyObject *unconsumed_tail;
int is_initialised;
} compobject;
@@ -100,6 +101,15 @@ newcompobject(PyTypeObject *type)
return NULL;
self->is_initialised = 0;
self->unused_data = PyString_FromString("");
+ if (self->unused_data == NULL) {
+ Py_DECREF(self);
+ return NULL;
+ }
+ self->unconsumed_tail = PyString_FromString("");
+ if (self->unconsumed_tail == NULL) {
+ Py_DECREF(self);
+ return NULL;
+ }
return self;
}
@@ -485,6 +495,7 @@ Comp_dealloc(compobject *self)
if (self->is_initialised)
deflateEnd(&self->zst);
Py_XDECREF(self->unused_data);
+ Py_XDECREF(self->unconsumed_tail);
PyObject_Del(self);
LEAVE_ZLIB
@@ -498,6 +509,7 @@ Decomp_dealloc(compobject *self)
if (self->is_initialised)
inflateEnd(&self->zst);
Py_XDECREF(self->unused_data);
+ Py_XDECREF(self->unconsumed_tail);
PyObject_Del(self);
LEAVE_ZLIB
@@ -595,27 +607,41 @@ PyZlib_objcompress(compobject *self, PyObject *args)
}
static char decomp_decompress__doc__[] =
-"decompress(data) -- Return a string containing the decompressed version of the data.\n\n"
+"decompress(data, max_length) -- Return a string containing\n"
+"the decompressed version of the data.\n\n"
"After calling this function, some of the input data may still\n"
"be stored in internal buffers for later processing.\n"
-"Call the flush() method to clear these buffers."
+"Call the flush() method to clear these buffers.\n"
+"If the max_length parameter is specified then the return value will be\n"
+"no longer than max_length. Unconsumed input data will be stored in\n"
+"the unconsumed_tail attribute."
;
static PyObject *
PyZlib_objdecompress(compobject *self, PyObject *args)
{
- int err, inplen, length = DEFAULTALLOC;
+ int err, inplen, old_length, length = DEFAULTALLOC;
+ int max_length = 0;
PyObject *RetVal;
Byte *input;
unsigned long start_total_out;
int return_error;
PyObject * inputString;
- if (!PyArg_ParseTuple(args, "S:decompress", &inputString))
+ if (!PyArg_ParseTuple(args, "S|i:decompress", &inputString, &max_length))
+ return NULL;
+ if (max_length < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "max_length must be greater than zero");
return NULL;
+ }
+
if (PyString_AsStringAndSize(inputString, (char**)&input, &inplen) == -1)
return NULL;
+ /* limit amount of data allocated to max_length */
+ if (max_length && length > max_length)
+ length = max_length;
if (!(RetVal = PyString_FromStringAndSize(NULL, length))) {
PyErr_SetString(PyExc_MemoryError,
"Can't allocate memory to compress data");
@@ -637,23 +663,46 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
err = inflate(&(self->zst), Z_SYNC_FLUSH);
Py_END_ALLOW_THREADS
- /* while Z_OK and the output buffer is full, there might be more output,
- so extend the output buffer and try again */
+ /* While Z_OK and the output buffer is full, there might be more output.
+ So extend the output buffer and try again.
+ */
while (err == Z_OK && self->zst.avail_out == 0) {
- if (_PyString_Resize(&RetVal, length << 1) == -1) {
+ /* If max_length set, don't continue decompressing if we've already
+ reached the limit.
+ */
+ if (max_length && length >= max_length)
+ break;
+
+ /* otherwise, ... */
+ old_length = length;
+ length = length << 1;
+ if (max_length && length > max_length)
+ length = max_length;
+
+ if (_PyString_Resize(&RetVal, length) == -1) {
PyErr_SetString(PyExc_MemoryError,
"Can't allocate memory to compress data");
return_error = 1;
break;
}
- self->zst.next_out = (unsigned char *)PyString_AsString(RetVal) + length;
- self->zst.avail_out = length;
- length = length << 1;
+ self->zst.next_out = (unsigned char *)PyString_AsString(RetVal)+old_length;
+ self->zst.avail_out = length - old_length;
+
Py_BEGIN_ALLOW_THREADS
err = inflate(&(self->zst), Z_SYNC_FLUSH);
Py_END_ALLOW_THREADS
}
+ /* Not all of the compressed data could be accomodated in the output buffer
+ of specified size. Return the unconsumed tail in an attribute.*/
+ if(max_length) {
+ Py_DECREF(self->unconsumed_tail);
+ self->unconsumed_tail = PyString_FromStringAndSize(self->zst.next_in,
+ self->zst.avail_in);
+ if(!self->unconsumed_tail)
+ return_error = 1;
+ }
+
/* The end of the compressed data has been reached, so set the unused_data
attribute to a string containing the remainder of the data in the string.
Note that this is also a logical place to call inflateEnd, but the old
@@ -885,6 +934,11 @@ Decomp_getattr(compobject *self, char *name)
Py_INCREF(self->unused_data);
retval = self->unused_data;
}
+ else if (strcmp(name, "unconsumed_tail") == 0)
+ {
+ Py_INCREF(self->unconsumed_tail);
+ retval = self->unconsumed_tail;
+ }
else
retval = Py_FindMethod(Decomp_methods, (PyObject *)self, name);