summaryrefslogtreecommitdiffstats
path: root/Modules/_lzmamodule.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_lzmamodule.c')
-rw-r--r--Modules/_lzmamodule.c226
1 files changed, 184 insertions, 42 deletions
diff --git a/Modules/_lzmamodule.c b/Modules/_lzmamodule.c
index c43676a..7e4e2df 100644
--- a/Modules/_lzmamodule.c
+++ b/Modules/_lzmamodule.c
@@ -66,6 +66,9 @@ typedef struct {
int check;
char eof;
PyObject *unused_data;
+ char needs_input;
+ uint8_t *input_buffer;
+ size_t input_buffer_size;
#ifdef WITH_THREAD
PyThread_type_lock lock;
#endif
@@ -142,10 +145,15 @@ PyLzma_Free(void *opaque, void *ptr)
#endif
static int
-grow_buffer(PyObject **buf)
+grow_buffer(PyObject **buf, Py_ssize_t max_length)
{
- size_t size = PyBytes_GET_SIZE(*buf);
- return _PyBytes_Resize(buf, size + (size >> 3) + 6);
+ Py_ssize_t size = PyBytes_GET_SIZE(*buf);
+ Py_ssize_t newsize = size + (size >> 3) + 6;
+
+ if (max_length > 0 && newsize > max_length)
+ newsize = max_length;
+
+ return _PyBytes_Resize(buf, newsize);
}
@@ -470,12 +478,11 @@ error:
/*[clinic input]
-output preset file
module _lzma
class _lzma.LZMACompressor "Compressor *" "&Compressor_type"
class _lzma.LZMADecompressor "Decompressor *" "&Decompressor_type"
[clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=f17afc786525d6c2]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2c14bbe05ff0c147]*/
#include "clinic/_lzmamodule.c.h"
@@ -504,7 +511,7 @@ class lzma_filter_converter(CConverter):
static PyObject *
compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
{
- size_t data_size = 0;
+ Py_ssize_t data_size = 0;
PyObject *result;
result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
@@ -527,7 +534,7 @@ compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
(action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
break;
} else if (c->lzs.avail_out == 0) {
- if (grow_buffer(&result) == -1)
+ if (grow_buffer(&result, -1) == -1)
goto error;
c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
c->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
@@ -888,25 +895,33 @@ static PyTypeObject Compressor_type = {
/* LZMADecompressor class. */
-static PyObject *
-decompress(Decompressor *d, uint8_t *data, size_t len)
+/* Decompress data of length d->lzs.avail_in in d->lzs.next_in. The output
+ buffer is allocated dynamically and returned. At most max_length bytes are
+ returned, so some of the input may not be consumed. d->lzs.next_in and
+ d->lzs.avail_in are updated to reflect the consumed input. */
+static PyObject*
+decompress_buf(Decompressor *d, Py_ssize_t max_length)
{
- size_t data_size = 0;
+ Py_ssize_t data_size = 0;
PyObject *result;
+ lzma_stream *lzs = &d->lzs;
- result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
+ if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
+ result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
+ else
+ result = PyBytes_FromStringAndSize(NULL, max_length);
if (result == NULL)
return NULL;
- d->lzs.next_in = data;
- d->lzs.avail_in = len;
- d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
- d->lzs.avail_out = PyBytes_GET_SIZE(result);
+
+ lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result);
+ lzs->avail_out = PyBytes_GET_SIZE(result);
+
for (;;) {
lzma_ret lzret;
Py_BEGIN_ALLOW_THREADS
- lzret = lzma_code(&d->lzs, LZMA_RUN);
- data_size = (char *)d->lzs.next_out - PyBytes_AS_STRING(result);
+ lzret = lzma_code(lzs, LZMA_RUN);
+ data_size = (char *)lzs->next_out - PyBytes_AS_STRING(result);
Py_END_ALLOW_THREADS
if (catch_lzma_error(lzret))
goto error;
@@ -914,26 +929,131 @@ decompress(Decompressor *d, uint8_t *data, size_t len)
d->check = lzma_get_check(&d->lzs);
if (lzret == LZMA_STREAM_END) {
d->eof = 1;
- if (d->lzs.avail_in > 0) {
- Py_CLEAR(d->unused_data);
- d->unused_data = PyBytes_FromStringAndSize(
- (char *)d->lzs.next_in, d->lzs.avail_in);
- if (d->unused_data == NULL)
- goto error;
- }
break;
- } else if (d->lzs.avail_in == 0) {
+ } else if (lzs->avail_in == 0) {
break;
- } else if (d->lzs.avail_out == 0) {
- if (grow_buffer(&result) == -1)
+ } else if (lzs->avail_out == 0) {
+ if (data_size == max_length)
+ break;
+ if (grow_buffer(&result, max_length) == -1)
goto error;
- d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
- d->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
+ lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
+ lzs->avail_out = PyBytes_GET_SIZE(result) - data_size;
}
}
if (data_size != PyBytes_GET_SIZE(result))
if (_PyBytes_Resize(&result, data_size) == -1)
goto error;
+
+ return result;
+
+error:
+ Py_XDECREF(result);
+ return NULL;
+}
+
+static PyObject *
+decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
+{
+ char input_buffer_in_use;
+ PyObject *result;
+ lzma_stream *lzs = &d->lzs;
+
+ /* Prepend unconsumed input if necessary */
+ if (lzs->next_in != NULL) {
+ size_t avail_now, avail_total;
+
+ /* Number of bytes we can append to input buffer */
+ avail_now = (d->input_buffer + d->input_buffer_size)
+ - (lzs->next_in + lzs->avail_in);
+
+ /* Number of bytes we can append if we move existing
+ contents to beginning of buffer (overwriting
+ consumed input) */
+ avail_total = d->input_buffer_size - lzs->avail_in;
+
+ if (avail_total < len) {
+ size_t offset = lzs->next_in - d->input_buffer;
+ uint8_t *tmp;
+ size_t new_size = d->input_buffer_size + len - avail_now;
+
+ /* Assign to temporary variable first, so we don't
+ lose address of allocated buffer if realloc fails */
+ tmp = PyMem_Realloc(d->input_buffer, new_size);
+ if (tmp == NULL) {
+ PyErr_SetNone(PyExc_MemoryError);
+ return NULL;
+ }
+ d->input_buffer = tmp;
+ d->input_buffer_size = new_size;
+
+ lzs->next_in = d->input_buffer + offset;
+ }
+ else if (avail_now < len) {
+ memmove(d->input_buffer, lzs->next_in,
+ lzs->avail_in);
+ lzs->next_in = d->input_buffer;
+ }
+ memcpy((void*)(lzs->next_in + lzs->avail_in), data, len);
+ lzs->avail_in += len;
+ input_buffer_in_use = 1;
+ }
+ else {
+ lzs->next_in = data;
+ lzs->avail_in = len;
+ input_buffer_in_use = 0;
+ }
+
+ result = decompress_buf(d, max_length);
+ if(result == NULL)
+ return NULL;
+
+ if (d->eof) {
+ d->needs_input = 0;
+ if (lzs->avail_in > 0) {
+ Py_CLEAR(d->unused_data);
+ d->unused_data = PyBytes_FromStringAndSize(
+ (char *)lzs->next_in, lzs->avail_in);
+ if (d->unused_data == NULL)
+ goto error;
+ }
+ }
+ else if (lzs->avail_in == 0) {
+ lzs->next_in = NULL;
+ d->needs_input = 1;
+ }
+ else {
+ d->needs_input = 0;
+
+ /* If we did not use the input buffer, we now have
+ to copy the tail from the caller's buffer into the
+ input buffer */
+ if (!input_buffer_in_use) {
+
+ /* Discard buffer if it's too small
+ (resizing it may needlessly copy the current contents) */
+ if (d->input_buffer != NULL &&
+ d->input_buffer_size < lzs->avail_in) {
+ PyMem_Free(d->input_buffer);
+ d->input_buffer = NULL;
+ }
+
+ /* Allocate if necessary */
+ if (d->input_buffer == NULL) {
+ d->input_buffer = PyMem_Malloc(lzs->avail_in);
+ if (d->input_buffer == NULL) {
+ PyErr_SetNone(PyExc_MemoryError);
+ goto error;
+ }
+ d->input_buffer_size = lzs->avail_in;
+ }
+
+ /* Copy tail */
+ memcpy(d->input_buffer, lzs->next_in, lzs->avail_in);
+ lzs->next_in = d->input_buffer;
+ }
+ }
+
return result;
error:
@@ -946,20 +1066,28 @@ _lzma.LZMADecompressor.decompress
self: self(type="Decompressor *")
data: Py_buffer
- /
+ max_length: Py_ssize_t=-1
+
+Decompress *data*, returning uncompressed data as bytes.
-Provide data to the decompressor object.
+If *max_length* is nonnegative, returns at most *max_length* bytes of
+decompressed data. If this limit is reached and further output can be
+produced, *self.needs_input* will be set to ``False``. In this case, the next
+call to *decompress()* may provide *data* as b'' to obtain more of the output.
-Returns a chunk of decompressed data if possible, or b'' otherwise.
+If all of the input data was decompressed and returned (either because this
+was less than *max_length* bytes, or because *max_length* was negative),
+*self.needs_input* will be set to True.
-Attempting to decompress data after the end of stream is reached
-raises an EOFError. Any data found after the end of the stream
-is ignored and saved in the unused_data attribute.
+Attempting to decompress data after the end of stream is reached raises an
+EOFError. Any data found after the end of the stream is ignored and saved in
+the unused_data attribute.
[clinic start generated code]*/
static PyObject *
-_lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data)
-/*[clinic end generated code: output=d86e78da7ff0ff21 input=50c4768b821bf0ef]*/
+_lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data,
+ Py_ssize_t max_length)
+/*[clinic end generated code: output=ef4e20ec7122241d input=f2bb902cc1caf203]*/
{
PyObject *result = NULL;
@@ -967,7 +1095,7 @@ _lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data)
if (self->eof)
PyErr_SetString(PyExc_EOFError, "Already at end of stream");
else
- result = decompress(self, data->buf, data->len);
+ result = decompress(self, data->buf, data->len, max_length);
RELEASE_LOCK(self);
return result;
}
@@ -1023,8 +1151,9 @@ For one-shot decompression, use the decompress() function instead.
[clinic start generated code]*/
static int
-_lzma_LZMADecompressor___init___impl(Decompressor *self, int format, PyObject *memlimit, PyObject *filters)
-/*[clinic end generated code: output=9b119f6f2cc2d7a8 input=458ca6132ef29801]*/
+_lzma_LZMADecompressor___init___impl(Decompressor *self, int format,
+ PyObject *memlimit, PyObject *filters)
+/*[clinic end generated code: output=3e1821f8aa36564c input=458ca6132ef29801]*/
{
const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
uint64_t memlimit_ = UINT64_MAX;
@@ -1055,6 +1184,7 @@ _lzma_LZMADecompressor___init___impl(Decompressor *self, int format, PyObject *m
self->alloc.alloc = PyLzma_Malloc;
self->alloc.free = PyLzma_Free;
self->lzs.allocator = &self->alloc;
+ self->lzs.next_in = NULL;
#ifdef WITH_THREAD
self->lock = PyThread_allocate_lock();
@@ -1065,6 +1195,9 @@ _lzma_LZMADecompressor___init___impl(Decompressor *self, int format, PyObject *m
#endif
self->check = LZMA_CHECK_UNKNOWN;
+ self->needs_input = 1;
+ self->input_buffer = NULL;
+ self->input_buffer_size = 0;
self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
if (self->unused_data == NULL)
goto error;
@@ -1113,6 +1246,9 @@ error:
static void
Decompressor_dealloc(Decompressor *self)
{
+ if(self->input_buffer != NULL)
+ PyMem_Free(self->input_buffer);
+
lzma_end(&self->lzs);
Py_CLEAR(self->unused_data);
#ifdef WITH_THREAD
@@ -1134,6 +1270,9 @@ PyDoc_STRVAR(Decompressor_check_doc,
PyDoc_STRVAR(Decompressor_eof_doc,
"True if the end-of-stream marker has been reached.");
+PyDoc_STRVAR(Decompressor_needs_input_doc,
+"True if more input is needed before more decompressed data can be produced.");
+
PyDoc_STRVAR(Decompressor_unused_data_doc,
"Data found after the end of the compressed stream.");
@@ -1142,6 +1281,8 @@ static PyMemberDef Decompressor_members[] = {
Decompressor_check_doc},
{"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
Decompressor_eof_doc},
+ {"needs_input", T_BOOL, offsetof(Decompressor, needs_input), READONLY,
+ Decompressor_needs_input_doc},
{"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
Decompressor_unused_data_doc},
{NULL}
@@ -1260,8 +1401,9 @@ The result does not include the filter ID itself, only the options.
[clinic start generated code]*/
static PyObject *
-_lzma__decode_filter_properties_impl(PyModuleDef *module, lzma_vli filter_id, Py_buffer *encoded_props)
-/*[clinic end generated code: output=235f7f5345d48744 input=246410800782160c]*/
+_lzma__decode_filter_properties_impl(PyModuleDef *module, lzma_vli filter_id,
+ Py_buffer *encoded_props)
+/*[clinic end generated code: output=af248f570746668b input=246410800782160c]*/
{
lzma_filter filter;
lzma_ret lzret;