summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNadeem Vawda <nadeem.vawda@gmail.com>2012-06-21 00:13:12 (GMT)
committerNadeem Vawda <nadeem.vawda@gmail.com>2012-06-21 00:13:12 (GMT)
commitfd8a838d582e3495004036bc82e3e6fee91fa0ba (patch)
tree1857e51ac2ee013cdd896b712014b158cd47575b
parent50b0a365ba8ea87e0a7155f7fd92de0c7dc2f941 (diff)
downloadcpython-fd8a838d582e3495004036bc82e3e6fee91fa0ba.zip
cpython-fd8a838d582e3495004036bc82e3e6fee91fa0ba.tar.gz
cpython-fd8a838d582e3495004036bc82e3e6fee91fa0ba.tar.bz2
Issue #14684: Add support for predefined compression dictionaries to the zlib module.
Original patch by Sam Rushing.
-rw-r--r--Doc/library/zlib.rst31
-rw-r--r--Lib/test/test_zlib.py30
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/zlibmodule.c118
4 files changed, 153 insertions, 29 deletions
diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst
index 1e9a2bc..705f734 100644
--- a/Doc/library/zlib.rst
+++ b/Doc/library/zlib.rst
@@ -58,12 +58,19 @@ The available exception and functions in this module are:
exception if any error occurs.
-.. function:: compressobj([level])
+.. function:: compressobj([level[, method[, wbits[, memlevel[, strategy[, zdict]]]]]])
Returns a compression object, to be used for compressing data streams that won't
- fit into memory at once. *level* is an integer from ``1`` to ``9`` controlling
- the level of compression; ``1`` is fastest and produces the least compression,
- ``9`` is slowest and produces the most. The default value is ``6``.
+ fit into memory at once.
+
+ *level* is an integer from ``1`` to ``9`` controlling the level of
+ compression; ``1`` is fastest and produces the least compression, ``9`` is
+ slowest and produces the most. The default value is ``6``.
+
+ *zdict* is a predefined compression dictionary. This is a sequence of bytes
+ (such as a :class:`bytes` object) containing subsequences that are expected
+ to occur frequently in the data that is to be compressed. Those subsequences
+ that are expected to be most common should come at the end of the dictionary.
.. function:: crc32(data[, value])
@@ -114,11 +121,21 @@ The available exception and functions in this module are:
to :c:func:`malloc`. The default size is 16384.
-.. function:: decompressobj([wbits])
+.. function:: decompressobj([wbits[, zdict]])
Returns a decompression object, to be used for decompressing data streams that
- won't fit into memory at once. The *wbits* parameter controls the size of the
- window buffer.
+ won't fit into memory at once.
+
+ The *wbits* parameter controls the size of the window buffer.
+
+ The *zdict* parameter specifies a predefined compression dictionary. If
+ provided, this must be the same dictionary as was used by the compressor that
+ produced the data that is to be decompressed.
+
+.. note::
+ If *zdict* is a mutable object (such as a :class:`bytearray`), you must not
+ modify its contents between the call to :func:`decompressobj` and the first
+ call to the decompressor's ``decompress()`` method.
Compression objects support the following methods:
diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py
index 3c982c6..904ac4d 100644
--- a/Lib/test/test_zlib.py
+++ b/Lib/test/test_zlib.py
@@ -425,6 +425,36 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
dco = zlib.decompressobj()
self.assertEqual(dco.flush(), b"") # Returns nothing
+ def test_dictionary(self):
+ h = HAMLET_SCENE
+ # build a simulated dictionary out of the words in HAMLET
+ words = h.split()
+ random.shuffle(words)
+ zdict = b''.join(words)
+ # use it to compress HAMLET
+ co = zlib.compressobj(zdict=zdict)
+ cd = co.compress(h) + co.flush()
+ # verify that it will decompress with the dictionary
+ dco = zlib.decompressobj(zdict=zdict)
+ self.assertEqual(dco.decompress(cd) + dco.flush(), h)
+ # verify that it fails when not given the dictionary
+ dco = zlib.decompressobj()
+ self.assertRaises(zlib.error, dco.decompress, cd)
+
+ def test_dictionary_streaming(self):
+ # this is simulating the needs of SPDY to be able to reuse the same
+ # stream object (with its compression state) between sets of compressed
+ # headers.
+ co = zlib.compressobj(zdict=HAMLET_SCENE)
+ do = zlib.decompressobj(zdict=HAMLET_SCENE)
+ piece = HAMLET_SCENE[1000:1500]
+ d0 = co.compress(piece) + co.flush(zlib.Z_SYNC_FLUSH)
+ d1 = co.compress(piece[100:]) + co.flush(zlib.Z_SYNC_FLUSH)
+ d2 = co.compress(piece[:-100]) + co.flush(zlib.Z_SYNC_FLUSH)
+ self.assertEqual(do.decompress(d0), piece)
+ self.assertEqual(do.decompress(d1), piece[100:])
+ self.assertEqual(do.decompress(d2), piece[:-100])
+
def test_decompress_incomplete_stream(self):
# This is 'foo', deflated
x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E'
diff --git a/Misc/NEWS b/Misc/NEWS
index 2f9236f..c919c69 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -34,6 +34,9 @@ Core and Builtins
Library
-------
+- Issue #14684: zlib.compressobj() and zlib.decompressobj() now support the use
+ of predefined compression dictionaries. Original patch by Sam Rushing.
+
- Fix GzipFile's handling of filenames given as bytes objects.
- Issue #14772: Return destination values from some shutil functions.
diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c
index a6da056..102740b 100644
--- a/Modules/zlibmodule.c
+++ b/Modules/zlibmodule.c
@@ -45,6 +45,7 @@ typedef struct
PyObject *unconsumed_tail;
char eof;
int is_initialised;
+ PyObject *zdict;
#ifdef WITH_THREAD
PyThread_type_lock lock;
#endif
@@ -80,14 +81,21 @@ zlib_error(z_stream zst, int err, char *msg)
}
PyDoc_STRVAR(compressobj__doc__,
-"compressobj([level]) -- Return a compressor object.\n"
+"compressobj([level[, method[, wbits[, memlevel[, strategy[, zdict]]]]]])\n"
+" -- Return a compressor object.\n"
"\n"
-"Optional arg level is the compression level, in 1-9.");
+"Optional arg level is the compression level, in 1-9.\n"
+"\n"
+"Optional arg zdict is the predefined compression dictionary - a sequence of\n"
+"bytes containing subsequences that are likely to occur in the input data.");
PyDoc_STRVAR(decompressobj__doc__,
-"decompressobj([wbits]) -- Return a decompressor object.\n"
+"decompressobj([wbits[, zdict]]) -- Return a decompressor object.\n"
+"\n"
+"Optional arg wbits is the window buffer size.\n"
"\n"
-"Optional arg wbits is the window buffer size.");
+"Optional arg zdict is the predefined compression dictionary. This must be\n"
+"the same dictionary as used by the compressor that produced the input data.");
static compobject *
newcompobject(PyTypeObject *type)
@@ -98,6 +106,7 @@ newcompobject(PyTypeObject *type)
return NULL;
self->eof = 0;
self->is_initialised = 0;
+ self->zdict = NULL;
self->unused_data = PyBytes_FromStringAndSize("", 0);
if (self->unused_data == NULL) {
Py_DECREF(self);
@@ -316,19 +325,24 @@ PyZlib_decompress(PyObject *self, PyObject *args)
}
static PyObject *
-PyZlib_compressobj(PyObject *selfptr, PyObject *args)
+PyZlib_compressobj(PyObject *selfptr, PyObject *args, PyObject *kwargs)
{
compobject *self;
int level=Z_DEFAULT_COMPRESSION, method=DEFLATED;
int wbits=MAX_WBITS, memLevel=DEF_MEM_LEVEL, strategy=0, err;
-
- if (!PyArg_ParseTuple(args, "|iiiii:compressobj", &level, &method, &wbits,
- &memLevel, &strategy))
+ Py_buffer zdict;
+ static char *kwlist[] = {"level", "method", "wbits",
+ "memLevel", "strategy", "zdict", NULL};
+
+ zdict.buf = NULL; /* Sentinel, so we can tell whether zdict was supplied. */
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iiiiiy*:compressobj",
+ kwlist, &level, &method, &wbits,
+ &memLevel, &strategy, &zdict))
return NULL;
self = newcompobject(&Comptype);
if (self==NULL)
- return(NULL);
+ goto error;
self->zst.zalloc = (alloc_func)NULL;
self->zst.zfree = (free_func)Z_NULL;
self->zst.next_in = NULL;
@@ -337,30 +351,58 @@ PyZlib_compressobj(PyObject *selfptr, PyObject *args)
switch(err) {
case (Z_OK):
self->is_initialised = 1;
- return (PyObject*)self;
+ if (zdict.buf == NULL) {
+ goto success;
+ } else {
+ err = deflateSetDictionary(&self->zst, zdict.buf, zdict.len);
+ switch (err) {
+ case (Z_OK):
+ goto success;
+ case (Z_STREAM_ERROR):
+ PyErr_SetString(PyExc_ValueError, "Invalid dictionary");
+ goto error;
+ default:
+ PyErr_SetString(PyExc_ValueError, "deflateSetDictionary()");
+ goto error;
+ }
+ }
case (Z_MEM_ERROR):
- Py_DECREF(self);
PyErr_SetString(PyExc_MemoryError,
"Can't allocate memory for compression object");
- return NULL;
+ goto error;
case(Z_STREAM_ERROR):
- Py_DECREF(self);
PyErr_SetString(PyExc_ValueError, "Invalid initialization option");
- return NULL;
+ goto error;
default:
zlib_error(self->zst, err, "while creating compression object");
- Py_DECREF(self);
- return NULL;
+ goto error;
}
+
+ error:
+ Py_XDECREF(self);
+ self = NULL;
+ success:
+ if (zdict.buf != NULL)
+ PyBuffer_Release(&zdict);
+ return (PyObject*)self;
}
static PyObject *
-PyZlib_decompressobj(PyObject *selfptr, PyObject *args)
+PyZlib_decompressobj(PyObject *selfptr, PyObject *args, PyObject *kwargs)
{
+ static char *kwlist[] = {"wbits", "zdict", NULL};
int wbits=DEF_WBITS, err;
compobject *self;
- if (!PyArg_ParseTuple(args, "|i:decompressobj", &wbits))
+ PyObject *zdict=NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO:decompressobj",
+ kwlist, &wbits, &zdict))
+ return NULL;
+ if (zdict != NULL && !PyObject_CheckBuffer(zdict)) {
+ PyErr_SetString(PyExc_TypeError,
+ "zdict argument must support the buffer protocol");
return NULL;
+ }
self = newcompobject(&Decomptype);
if (self == NULL)
@@ -369,6 +411,10 @@ PyZlib_decompressobj(PyObject *selfptr, PyObject *args)
self->zst.zfree = (free_func)Z_NULL;
self->zst.next_in = NULL;
self->zst.avail_in = 0;
+ if (zdict != NULL) {
+ Py_INCREF(zdict);
+ self->zdict = zdict;
+ }
err = inflateInit2(&self->zst, wbits);
switch(err) {
case (Z_OK):
@@ -398,6 +444,7 @@ Dealloc(compobject *self)
#endif
Py_XDECREF(self->unused_data);
Py_XDECREF(self->unconsumed_tail);
+ Py_XDECREF(self->zdict);
PyObject_Del(self);
}
@@ -557,6 +604,27 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
err = inflate(&(self->zst), Z_SYNC_FLUSH);
Py_END_ALLOW_THREADS
+ if (err == Z_NEED_DICT && self->zdict != NULL) {
+ Py_buffer zdict_buf;
+ if (PyObject_GetBuffer(self->zdict, &zdict_buf, PyBUF_SIMPLE) == -1) {
+ Py_DECREF(RetVal);
+ RetVal = NULL;
+ goto error;
+ }
+ err = inflateSetDictionary(&(self->zst), zdict_buf.buf, zdict_buf.len);
+ PyBuffer_Release(&zdict_buf);
+ if (err != Z_OK) {
+ zlib_error(self->zst, err, "while decompressing data");
+ Py_DECREF(RetVal);
+ RetVal = NULL;
+ goto error;
+ }
+ /* repeat the call to inflate! */
+ Py_BEGIN_ALLOW_THREADS
+ err = inflate(&(self->zst), Z_SYNC_FLUSH);
+ Py_END_ALLOW_THREADS
+ }
+
/* While Z_OK and the output buffer is full, there might be more output.
So extend the output buffer and try again.
*/
@@ -770,10 +838,13 @@ PyZlib_copy(compobject *self)
}
Py_INCREF(self->unused_data);
Py_INCREF(self->unconsumed_tail);
+ Py_XINCREF(self->zdict);
Py_XDECREF(retval->unused_data);
Py_XDECREF(retval->unconsumed_tail);
+ Py_XDECREF(retval->zdict);
retval->unused_data = self->unused_data;
retval->unconsumed_tail = self->unconsumed_tail;
+ retval->zdict = self->zdict;
retval->eof = self->eof;
/* Mark it as being initialized */
@@ -822,10 +893,13 @@ PyZlib_uncopy(compobject *self)
Py_INCREF(self->unused_data);
Py_INCREF(self->unconsumed_tail);
+ Py_XINCREF(self->zdict);
Py_XDECREF(retval->unused_data);
Py_XDECREF(retval->unconsumed_tail);
+ Py_XDECREF(retval->zdict);
retval->unused_data = self->unused_data;
retval->unconsumed_tail = self->unconsumed_tail;
+ retval->zdict = self->zdict;
retval->eof = self->eof;
/* Mark it as being initialized */
@@ -1032,13 +1106,13 @@ static PyMethodDef zlib_methods[] =
adler32__doc__},
{"compress", (PyCFunction)PyZlib_compress, METH_VARARGS,
compress__doc__},
- {"compressobj", (PyCFunction)PyZlib_compressobj, METH_VARARGS,
+ {"compressobj", (PyCFunction)PyZlib_compressobj, METH_VARARGS|METH_KEYWORDS,
compressobj__doc__},
{"crc32", (PyCFunction)PyZlib_crc32, METH_VARARGS,
crc32__doc__},
{"decompress", (PyCFunction)PyZlib_decompress, METH_VARARGS,
decompress__doc__},
- {"decompressobj", (PyCFunction)PyZlib_decompressobj, METH_VARARGS,
+ {"decompressobj", (PyCFunction)PyZlib_decompressobj, METH_VARARGS|METH_KEYWORDS,
decompressobj__doc__},
{NULL, NULL}
};
@@ -1112,10 +1186,10 @@ PyDoc_STRVAR(zlib_module_documentation,
"\n"
"adler32(string[, start]) -- Compute an Adler-32 checksum.\n"
"compress(string[, level]) -- Compress string, with compression level in 1-9.\n"
-"compressobj([level]) -- Return a compressor object.\n"
+"compressobj([level[, ...]]) -- Return a compressor object.\n"
"crc32(string[, start]) -- Compute a CRC-32 checksum.\n"
"decompress(string,[wbits],[bufsize]) -- Decompresses a compressed string.\n"
-"decompressobj([wbits]) -- Return a decompressor object.\n"
+"decompressobj([wbits[, zdict]]]) -- Return a decompressor object.\n"
"\n"
"'wbits' is window buffer size.\n"
"Compressor objects support compress() and flush() methods; decompressor\n"