diff options
Diffstat (limited to 'Modules/_lzmamodule.c')
-rw-r--r-- | Modules/_lzmamodule.c | 1286 |
1 files changed, 1286 insertions, 0 deletions
diff --git a/Modules/_lzmamodule.c b/Modules/_lzmamodule.c new file mode 100644 index 0000000..b482a77 --- /dev/null +++ b/Modules/_lzmamodule.c @@ -0,0 +1,1286 @@ +/* _lzma - Low-level Python interface to liblzma. + + Initial implementation by Per Øyvind Karlsen. + Rewritten by Nadeem Vawda. + +*/ + +#define PY_SSIZE_T_CLEAN + +#include "Python.h" +#include "structmember.h" +#ifdef WITH_THREAD +#include "pythread.h" +#endif + +#include <stdarg.h> +#include <string.h> + +#include <lzma.h> + + +#ifndef PY_LONG_LONG +#error "This module requires PY_LONG_LONG to be defined" +#endif + + +#ifdef WITH_THREAD +#define ACQUIRE_LOCK(obj) do { \ + if (!PyThread_acquire_lock((obj)->lock, 0)) { \ + Py_BEGIN_ALLOW_THREADS \ + PyThread_acquire_lock((obj)->lock, 1); \ + Py_END_ALLOW_THREADS \ + } } while (0) +#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock) +#else +#define ACQUIRE_LOCK(obj) +#define RELEASE_LOCK(obj) +#endif + + +/* Container formats: */ +enum { + FORMAT_AUTO, + FORMAT_XZ, + FORMAT_ALONE, + FORMAT_RAW, +}; + +#define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1) + + +typedef struct { + PyObject_HEAD + lzma_stream lzs; + int flushed; +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif +} Compressor; + +typedef struct { + PyObject_HEAD + lzma_stream lzs; + int check; + char eof; + PyObject *unused_data; +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif +} Decompressor; + +/* LZMAError class object. */ +static PyObject *Error; + +/* An empty tuple, used by the filter specifier parsing code. */ +static PyObject *empty_tuple; + + +/* Helper functions. */ + +static int +catch_lzma_error(lzma_ret lzret) +{ + switch (lzret) { + case LZMA_OK: + case LZMA_GET_CHECK: + case LZMA_NO_CHECK: + case LZMA_STREAM_END: + return 0; + case LZMA_UNSUPPORTED_CHECK: + PyErr_SetString(Error, "Unsupported integrity check"); + return 1; + case LZMA_MEM_ERROR: + PyErr_NoMemory(); + return 1; + case LZMA_MEMLIMIT_ERROR: + PyErr_SetString(Error, "Memory usage limit exceeded"); + return 1; + case LZMA_FORMAT_ERROR: + PyErr_SetString(Error, "Input format not supported by decoder"); + return 1; + case LZMA_OPTIONS_ERROR: + PyErr_SetString(Error, "Invalid or unsupported options"); + return 1; + case LZMA_DATA_ERROR: + PyErr_SetString(Error, "Corrupt input data"); + return 1; + case LZMA_BUF_ERROR: + PyErr_SetString(Error, "Insufficient buffer space"); + return 1; + case LZMA_PROG_ERROR: + PyErr_SetString(Error, "Internal error"); + return 1; + default: + PyErr_Format(Error, "Unrecognized error from liblzma: %d", lzret); + return 1; + } +} + +#if BUFSIZ < 8192 +#define INITIAL_BUFFER_SIZE 8192 +#else +#define INITIAL_BUFFER_SIZE BUFSIZ +#endif + +static int +grow_buffer(PyObject **buf) +{ + size_t size = PyBytes_GET_SIZE(*buf); + return _PyBytes_Resize(buf, size + (size >> 3) + 6); +} + + +/* Some custom type conversions for PyArg_ParseTupleAndKeywords(), + since the predefined conversion specifiers do not suit our needs: + + uint32_t - the "I" (unsigned int) specifier is the right size, but + silently ignores overflows on conversion. + + lzma_vli - the "K" (unsigned PY_LONG_LONG) specifier is the right + size, but like "I" it silently ignores overflows on conversion. + + lzma_mode and lzma_match_finder - these are enumeration types, and + so the size of each is implementation-defined. Worse, different + enum types can be of different sizes within the same program, so + to be strictly correct, we need to define two separate converters. + */ + +#define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \ + static int \ + FUNCNAME(PyObject *obj, void *ptr) \ + { \ + unsigned PY_LONG_LONG val; \ + \ + val = PyLong_AsUnsignedLongLong(obj); \ + if (PyErr_Occurred()) \ + return 0; \ + if ((unsigned PY_LONG_LONG)(TYPE)val != val) { \ + PyErr_SetString(PyExc_OverflowError, \ + "Value too large for " #TYPE " type"); \ + return 0; \ + } \ + *(TYPE *)ptr = (TYPE)val; \ + return 1; \ + } + +INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter) +INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter) +INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter) +INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter) + +#undef INT_TYPE_CONVERTER_FUNC + + +/* Filter specifier parsing. + + This code handles converting filter specifiers (Python dicts) into + the C lzma_filter structs expected by liblzma. */ + +static void * +parse_filter_spec_lzma(PyObject *spec) +{ + static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp", + "pb", "mode", "nice_len", "mf", "depth", NULL}; + PyObject *id; + PyObject *preset_obj; + uint32_t preset = LZMA_PRESET_DEFAULT; + lzma_options_lzma *options; + + /* First, fill in default values for all the options using a preset. + Then, override the defaults with any values given by the caller. */ + + preset_obj = PyMapping_GetItemString(spec, "preset"); + if (preset_obj == NULL) { + if (PyErr_ExceptionMatches(PyExc_KeyError)) + PyErr_Clear(); + else + return NULL; + } else { + int ok = uint32_converter(preset_obj, &preset); + Py_DECREF(preset_obj); + if (!ok) + return NULL; + } + + options = (lzma_options_lzma *)PyMem_Malloc(sizeof *options); + if (options == NULL) + return PyErr_NoMemory(); + memset(options, 0, sizeof *options); + + if (lzma_lzma_preset(options, preset)) { + PyMem_Free(options); + PyErr_Format(Error, "Invalid compression preset: %d", preset); + return NULL; + } + + if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, + "|OOO&O&O&O&O&O&O&O&", optnames, + &id, &preset_obj, + uint32_converter, &options->dict_size, + uint32_converter, &options->lc, + uint32_converter, &options->lp, + uint32_converter, &options->pb, + lzma_mode_converter, &options->mode, + uint32_converter, &options->nice_len, + lzma_mf_converter, &options->mf, + uint32_converter, &options->depth)) { + PyErr_SetString(PyExc_ValueError, + "Invalid filter specifier for LZMA filter"); + PyMem_Free(options); + options = NULL; + } + return options; +} + +static void * +parse_filter_spec_delta(PyObject *spec) +{ + static char *optnames[] = {"id", "dist", NULL}; + PyObject *id; + uint32_t dist = 1; + lzma_options_delta *options; + + if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames, + &id, uint32_converter, &dist)) { + PyErr_SetString(PyExc_ValueError, + "Invalid filter specifier for delta filter"); + return NULL; + } + + options = (lzma_options_delta *)PyMem_Malloc(sizeof *options); + if (options == NULL) + return PyErr_NoMemory(); + memset(options, 0, sizeof *options); + options->type = LZMA_DELTA_TYPE_BYTE; + options->dist = dist; + return options; +} + +static void * +parse_filter_spec_bcj(PyObject *spec) +{ + static char *optnames[] = {"id", "start_offset", NULL}; + PyObject *id; + uint32_t start_offset = 0; + lzma_options_bcj *options; + + if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames, + &id, uint32_converter, &start_offset)) { + PyErr_SetString(PyExc_ValueError, + "Invalid filter specifier for BCJ filter"); + return NULL; + } + + options = (lzma_options_bcj *)PyMem_Malloc(sizeof *options); + if (options == NULL) + return PyErr_NoMemory(); + memset(options, 0, sizeof *options); + options->start_offset = start_offset; + return options; +} + +static void * +parse_filter_spec(lzma_filter *f, PyObject *spec) +{ + PyObject *id_obj; + + if (!PyMapping_Check(spec)) { + PyErr_SetString(PyExc_TypeError, + "Filter specifier must be a dict or dict-like object"); + return NULL; + } + id_obj = PyMapping_GetItemString(spec, "id"); + if (id_obj == NULL) { + if (PyErr_ExceptionMatches(PyExc_KeyError)) + PyErr_SetString(PyExc_ValueError, + "Filter specifier must have an \"id\" entry"); + return NULL; + } + f->id = PyLong_AsUnsignedLongLong(id_obj); + Py_DECREF(id_obj); + if (PyErr_Occurred()) + return NULL; + + switch (f->id) { + case LZMA_FILTER_LZMA1: + case LZMA_FILTER_LZMA2: + f->options = parse_filter_spec_lzma(spec); + return f->options; + case LZMA_FILTER_DELTA: + f->options = parse_filter_spec_delta(spec); + return f->options; + case LZMA_FILTER_X86: + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_IA64: + case LZMA_FILTER_ARM: + case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_SPARC: + f->options = parse_filter_spec_bcj(spec); + return f->options; + default: + PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id); + return NULL; + } +} + +static void +free_filter_chain(lzma_filter filters[]) +{ + int i; + + for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++) + PyMem_Free(filters[i].options); +} + +static int +parse_filter_chain_spec(lzma_filter filters[], PyObject *filterspecs) +{ + Py_ssize_t i, num_filters; + + num_filters = PySequence_Length(filterspecs); + if (num_filters == -1) + return -1; + if (num_filters > LZMA_FILTERS_MAX) { + PyErr_Format(PyExc_ValueError, + "Too many filters - liblzma supports a maximum of %d", + LZMA_FILTERS_MAX); + return -1; + } + + for (i = 0; i < num_filters; i++) { + int ok = 1; + PyObject *spec = PySequence_GetItem(filterspecs, i); + if (spec == NULL || parse_filter_spec(&filters[i], spec) == NULL) + ok = 0; + Py_XDECREF(spec); + if (!ok) { + filters[i].id = LZMA_VLI_UNKNOWN; + free_filter_chain(filters); + return -1; + } + } + filters[num_filters].id = LZMA_VLI_UNKNOWN; + return 0; +} + + +/* Filter specifier construction. + + This code handles converting C lzma_filter structs into + Python-level filter specifiers (represented as dicts). */ + +static int +spec_add_field(PyObject *spec, _Py_Identifier *key, unsigned PY_LONG_LONG value) +{ + int status; + PyObject *value_object; + + value_object = PyLong_FromUnsignedLongLong(value); + if (value_object == NULL) + return -1; + + status = _PyDict_SetItemId(spec, key, value_object); + Py_DECREF(value_object); + return status; +} + +static PyObject * +build_filter_spec(const lzma_filter *f) +{ + PyObject *spec; + + spec = PyDict_New(); + if (spec == NULL) + return NULL; + +#define ADD_FIELD(SOURCE, FIELD) \ + do { \ + _Py_IDENTIFIER(FIELD); \ + if (spec_add_field(spec, &PyId_##FIELD, SOURCE->FIELD) == -1) \ + goto error;\ + } while (0) + + ADD_FIELD(f, id); + + switch (f->id) { + /* For LZMA1 filters, lzma_properties_{encode,decode}() only look at the + lc, lp, pb, and dict_size fields. For LZMA2 filters, only the + dict_size field is used. */ + case LZMA_FILTER_LZMA1: { + lzma_options_lzma *options = f->options; + ADD_FIELD(options, lc); + ADD_FIELD(options, lp); + ADD_FIELD(options, pb); + ADD_FIELD(options, dict_size); + break; + } + case LZMA_FILTER_LZMA2: { + lzma_options_lzma *options = f->options; + ADD_FIELD(options, dict_size); + break; + } + case LZMA_FILTER_DELTA: { + lzma_options_delta *options = f->options; + ADD_FIELD(options, dist); + break; + } + case LZMA_FILTER_X86: + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_IA64: + case LZMA_FILTER_ARM: + case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_SPARC: { + lzma_options_bcj *options = f->options; + ADD_FIELD(options, start_offset); + break; + } + default: + PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id); + goto error; + } + +#undef ADD_FIELD + + return spec; + +error: + Py_DECREF(spec); + return NULL; +} + + +/* LZMACompressor class. */ + +static PyObject * +compress(Compressor *c, uint8_t *data, size_t len, lzma_action action) +{ + size_t data_size = 0; + PyObject *result; + + result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE); + if (result == NULL) + return NULL; + c->lzs.next_in = data; + c->lzs.avail_in = len; + c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result); + c->lzs.avail_out = PyBytes_GET_SIZE(result); + for (;;) { + lzma_ret lzret; + + Py_BEGIN_ALLOW_THREADS + lzret = lzma_code(&c->lzs, action); + data_size = (char *)c->lzs.next_out - PyBytes_AS_STRING(result); + Py_END_ALLOW_THREADS + if (catch_lzma_error(lzret)) + goto error; + if ((action == LZMA_RUN && c->lzs.avail_in == 0) || + (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) { + break; + } else if (c->lzs.avail_out == 0) { + if (grow_buffer(&result) == -1) + goto error; + c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size; + c->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size; + } + } + if (data_size != PyBytes_GET_SIZE(result)) + if (_PyBytes_Resize(&result, data_size) == -1) + goto error; + return result; + +error: + Py_XDECREF(result); + return NULL; +} + +PyDoc_STRVAR(Compressor_compress_doc, +"compress(data) -> bytes\n" +"\n" +"Provide data to the compressor object. Returns a chunk of\n" +"compressed data if possible, or b\"\" otherwise.\n" +"\n" +"When you have finished providing data to the compressor, call the\n" +"flush() method to finish the conversion process.\n"); + +static PyObject * +Compressor_compress(Compressor *self, PyObject *args) +{ + Py_buffer buffer; + PyObject *result = NULL; + + if (!PyArg_ParseTuple(args, "y*:compress", &buffer)) + return NULL; + + ACQUIRE_LOCK(self); + if (self->flushed) + PyErr_SetString(PyExc_ValueError, "Compressor has been flushed"); + else + result = compress(self, buffer.buf, buffer.len, LZMA_RUN); + RELEASE_LOCK(self); + PyBuffer_Release(&buffer); + return result; +} + +PyDoc_STRVAR(Compressor_flush_doc, +"flush() -> bytes\n" +"\n" +"Finish the compression process. Returns the compressed data left\n" +"in internal buffers.\n" +"\n" +"The compressor object cannot be used after this method is called.\n"); + +static PyObject * +Compressor_flush(Compressor *self, PyObject *noargs) +{ + PyObject *result = NULL; + + ACQUIRE_LOCK(self); + if (self->flushed) { + PyErr_SetString(PyExc_ValueError, "Repeated call to flush()"); + } else { + self->flushed = 1; + result = compress(self, NULL, 0, LZMA_FINISH); + } + RELEASE_LOCK(self); + return result; +} + +static int +Compressor_init_xz(lzma_stream *lzs, int check, uint32_t preset, + PyObject *filterspecs) +{ + lzma_ret lzret; + + if (filterspecs == Py_None) { + lzret = lzma_easy_encoder(lzs, preset, check); + } else { + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + + if (parse_filter_chain_spec(filters, filterspecs) == -1) + return -1; + lzret = lzma_stream_encoder(lzs, filters, check); + free_filter_chain(filters); + } + if (catch_lzma_error(lzret)) + return -1; + else + return 0; +} + +static int +Compressor_init_alone(lzma_stream *lzs, uint32_t preset, PyObject *filterspecs) +{ + lzma_ret lzret; + + if (filterspecs == Py_None) { + lzma_options_lzma options; + + if (lzma_lzma_preset(&options, preset)) { + PyErr_Format(Error, "Invalid compression preset: %d", preset); + return -1; + } + lzret = lzma_alone_encoder(lzs, &options); + } else { + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + + if (parse_filter_chain_spec(filters, filterspecs) == -1) + return -1; + if (filters[0].id == LZMA_FILTER_LZMA1 && + filters[1].id == LZMA_VLI_UNKNOWN) { + lzret = lzma_alone_encoder(lzs, filters[0].options); + } else { + PyErr_SetString(PyExc_ValueError, + "Invalid filter chain for FORMAT_ALONE - " + "must be a single LZMA1 filter"); + lzret = LZMA_PROG_ERROR; + } + free_filter_chain(filters); + } + if (PyErr_Occurred() || catch_lzma_error(lzret)) + return -1; + else + return 0; +} + +static int +Compressor_init_raw(lzma_stream *lzs, PyObject *filterspecs) +{ + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + lzma_ret lzret; + + if (filterspecs == Py_None) { + PyErr_SetString(PyExc_ValueError, + "Must specify filters for FORMAT_RAW"); + return -1; + } + if (parse_filter_chain_spec(filters, filterspecs) == -1) + return -1; + lzret = lzma_raw_encoder(lzs, filters); + free_filter_chain(filters); + if (catch_lzma_error(lzret)) + return -1; + else + return 0; +} + +static int +Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs) +{ + static char *arg_names[] = {"format", "check", "preset", "filters", NULL}; + int format = FORMAT_XZ; + int check = -1; + uint32_t preset = LZMA_PRESET_DEFAULT; + PyObject *preset_obj = Py_None; + PyObject *filterspecs = Py_None; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "|iiOO:LZMACompressor", arg_names, + &format, &check, &preset_obj, + &filterspecs)) + return -1; + + if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) { + PyErr_SetString(PyExc_ValueError, + "Integrity checks are only supported by FORMAT_XZ"); + return -1; + } + + if (preset_obj != Py_None && filterspecs != Py_None) { + PyErr_SetString(PyExc_ValueError, + "Cannot specify both preset and filter chain"); + return -1; + } + + if (preset_obj != Py_None) + if (!uint32_converter(preset_obj, &preset)) + return -1; + +#ifdef WITH_THREAD + self->lock = PyThread_allocate_lock(); + if (self->lock == NULL) { + PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock"); + return -1; + } +#endif + + self->flushed = 0; + switch (format) { + case FORMAT_XZ: + if (check == -1) + check = LZMA_CHECK_CRC64; + if (Compressor_init_xz(&self->lzs, check, preset, filterspecs) != 0) + break; + return 0; + + case FORMAT_ALONE: + if (Compressor_init_alone(&self->lzs, preset, filterspecs) != 0) + break; + return 0; + + case FORMAT_RAW: + if (Compressor_init_raw(&self->lzs, filterspecs) != 0) + break; + return 0; + + default: + PyErr_Format(PyExc_ValueError, + "Invalid container format: %d", format); + break; + } + +#ifdef WITH_THREAD + PyThread_free_lock(self->lock); + self->lock = NULL; +#endif + return -1; +} + +static void +Compressor_dealloc(Compressor *self) +{ + lzma_end(&self->lzs); +#ifdef WITH_THREAD + if (self->lock != NULL) + PyThread_free_lock(self->lock); +#endif + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyMethodDef Compressor_methods[] = { + {"compress", (PyCFunction)Compressor_compress, METH_VARARGS, + Compressor_compress_doc}, + {"flush", (PyCFunction)Compressor_flush, METH_NOARGS, + Compressor_flush_doc}, + {NULL} +}; + +PyDoc_STRVAR(Compressor_doc, +"LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n" +"\n" +"Create a compressor object for compressing data incrementally.\n" +"\n" +"format specifies the container format to use for the output. This can\n" +"be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n" +"\n" +"check specifies the integrity check to use. For FORMAT_XZ, the default\n" +"is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not suport integrity\n" +"checks; for these formats, check must be omitted, or be CHECK_NONE.\n" +"\n" +"The settings used by the compressor can be specified either as a\n" +"preset compression level (with the 'preset' argument), or in detail\n" +"as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n" +"and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n" +"level. For FORMAT_RAW, the caller must always specify a filter chain;\n" +"the raw compressor does not support preset compression levels.\n" +"\n" +"preset (if provided) should be an integer in the range 0-9, optionally\n" +"OR-ed with the constant PRESET_EXTREME.\n" +"\n" +"filters (if provided) should be a sequence of dicts. Each dict should\n" +"have an entry for \"id\" indicating the ID of the filter, plus\n" +"additional entries for options to the filter.\n" +"\n" +"For one-shot compression, use the compress() function instead.\n"); + +static PyTypeObject Compressor_type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_lzma.LZMACompressor", /* tp_name */ + sizeof(Compressor), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)Compressor_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + Compressor_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Compressor_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)Compressor_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + + +/* LZMADecompressor class. */ + +static PyObject * +decompress(Decompressor *d, uint8_t *data, size_t len) +{ + size_t data_size = 0; + PyObject *result; + + result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE); + if (result == NULL) + return NULL; + d->lzs.next_in = data; + d->lzs.avail_in = len; + d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result); + d->lzs.avail_out = PyBytes_GET_SIZE(result); + for (;;) { + lzma_ret lzret; + + Py_BEGIN_ALLOW_THREADS + lzret = lzma_code(&d->lzs, LZMA_RUN); + data_size = (char *)d->lzs.next_out - PyBytes_AS_STRING(result); + Py_END_ALLOW_THREADS + if (catch_lzma_error(lzret)) + goto error; + if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) + d->check = lzma_get_check(&d->lzs); + if (lzret == LZMA_STREAM_END) { + d->eof = 1; + if (d->lzs.avail_in > 0) { + Py_CLEAR(d->unused_data); + d->unused_data = PyBytes_FromStringAndSize( + (char *)d->lzs.next_in, d->lzs.avail_in); + if (d->unused_data == NULL) + goto error; + } + break; + } else if (d->lzs.avail_in == 0) { + break; + } else if (d->lzs.avail_out == 0) { + if (grow_buffer(&result) == -1) + goto error; + d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size; + d->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size; + } + } + if (data_size != PyBytes_GET_SIZE(result)) + if (_PyBytes_Resize(&result, data_size) == -1) + goto error; + return result; + +error: + Py_XDECREF(result); + return NULL; +} + +PyDoc_STRVAR(Decompressor_decompress_doc, +"decompress(data) -> bytes\n" +"\n" +"Provide data to the decompressor object. Returns a chunk of\n" +"decompressed data if possible, or b\"\" otherwise.\n" +"\n" +"Attempting to decompress data after the end of the stream is\n" +"reached raises an EOFError. Any data found after the end of the\n" +"stream is ignored, and saved in the unused_data attribute.\n"); + +static PyObject * +Decompressor_decompress(Decompressor *self, PyObject *args) +{ + Py_buffer buffer; + PyObject *result = NULL; + + if (!PyArg_ParseTuple(args, "y*:decompress", &buffer)) + return NULL; + + ACQUIRE_LOCK(self); + if (self->eof) + PyErr_SetString(PyExc_EOFError, "Already at end of stream"); + else + result = decompress(self, buffer.buf, buffer.len); + RELEASE_LOCK(self); + PyBuffer_Release(&buffer); + return result; +} + +static int +Decompressor_init_raw(lzma_stream *lzs, PyObject *filterspecs) +{ + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + lzma_ret lzret; + + if (parse_filter_chain_spec(filters, filterspecs) == -1) + return -1; + lzret = lzma_raw_decoder(lzs, filters); + free_filter_chain(filters); + if (catch_lzma_error(lzret)) + return -1; + else + return 0; +} + +static int +Decompressor_init(Decompressor *self, PyObject *args, PyObject *kwargs) +{ + static char *arg_names[] = {"format", "memlimit", "filters", NULL}; + const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK; + int format = FORMAT_AUTO; + uint64_t memlimit = UINT64_MAX; + PyObject *memlimit_obj = Py_None; + PyObject *filterspecs = Py_None; + lzma_ret lzret; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "|iOO:LZMADecompressor", arg_names, + &format, &memlimit_obj, &filterspecs)) + return -1; + + if (memlimit_obj != Py_None) { + if (format == FORMAT_RAW) { + PyErr_SetString(PyExc_ValueError, + "Cannot specify memory limit with FORMAT_RAW"); + return -1; + } + memlimit = PyLong_AsUnsignedLongLong(memlimit_obj); + if (PyErr_Occurred()) + return -1; + } + + if (format == FORMAT_RAW && filterspecs == Py_None) { + PyErr_SetString(PyExc_ValueError, + "Must specify filters for FORMAT_RAW"); + return -1; + } else if (format != FORMAT_RAW && filterspecs != Py_None) { + PyErr_SetString(PyExc_ValueError, + "Cannot specify filters except with FORMAT_RAW"); + return -1; + } + +#ifdef WITH_THREAD + self->lock = PyThread_allocate_lock(); + if (self->lock == NULL) { + PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock"); + return -1; + } +#endif + + self->check = LZMA_CHECK_UNKNOWN; + self->unused_data = PyBytes_FromStringAndSize(NULL, 0); + if (self->unused_data == NULL) + goto error; + + switch (format) { + case FORMAT_AUTO: + lzret = lzma_auto_decoder(&self->lzs, memlimit, decoder_flags); + if (catch_lzma_error(lzret)) + break; + return 0; + + case FORMAT_XZ: + lzret = lzma_stream_decoder(&self->lzs, memlimit, decoder_flags); + if (catch_lzma_error(lzret)) + break; + return 0; + + case FORMAT_ALONE: + self->check = LZMA_CHECK_NONE; + lzret = lzma_alone_decoder(&self->lzs, memlimit); + if (catch_lzma_error(lzret)) + break; + return 0; + + case FORMAT_RAW: + self->check = LZMA_CHECK_NONE; + if (Decompressor_init_raw(&self->lzs, filterspecs) == -1) + break; + return 0; + + default: + PyErr_Format(PyExc_ValueError, + "Invalid container format: %d", format); + break; + } + +error: + Py_CLEAR(self->unused_data); +#ifdef WITH_THREAD + PyThread_free_lock(self->lock); + self->lock = NULL; +#endif + return -1; +} + +static void +Decompressor_dealloc(Decompressor *self) +{ + lzma_end(&self->lzs); + Py_CLEAR(self->unused_data); +#ifdef WITH_THREAD + if (self->lock != NULL) + PyThread_free_lock(self->lock); +#endif + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyMethodDef Decompressor_methods[] = { + {"decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS, + Decompressor_decompress_doc}, + {NULL} +}; + +PyDoc_STRVAR(Decompressor_check_doc, +"ID of the integrity check used by the input stream."); + +PyDoc_STRVAR(Decompressor_eof_doc, +"True if the end-of-stream marker has been reached."); + +PyDoc_STRVAR(Decompressor_unused_data_doc, +"Data found after the end of the compressed stream."); + +static PyMemberDef Decompressor_members[] = { + {"check", T_INT, offsetof(Decompressor, check), READONLY, + Decompressor_check_doc}, + {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY, + Decompressor_eof_doc}, + {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY, + Decompressor_unused_data_doc}, + {NULL} +}; + +PyDoc_STRVAR(Decompressor_doc, +"LZMADecompressor(format=FORMAT_AUTO, memlimit=None, filters=None)\n" +"\n" +"Create a decompressor object for decompressing data incrementally.\n" +"\n" +"format specifies the container format of the input stream. If this is\n" +"FORMAT_AUTO (the default), the decompressor will automatically detect\n" +"whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with\n" +"FORMAT_RAW cannot be autodetected.\n" +"\n" +"memlimit can be specified to limit the amount of memory used by the\n" +"decompressor. This will cause decompression to fail if the input\n" +"cannot be decompressed within the given limit.\n" +"\n" +"filters specifies a custom filter chain. This argument is required for\n" +"FORMAT_RAW, and not accepted with any other format. When provided,\n" +"this should be a sequence of dicts, each indicating the ID and options\n" +"for a single filter.\n" +"\n" +"For one-shot decompression, use the decompress() function instead.\n"); + +static PyTypeObject Decompressor_type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_lzma.LZMADecompressor", /* tp_name */ + sizeof(Decompressor), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)Decompressor_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + Decompressor_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Decompressor_methods, /* tp_methods */ + Decompressor_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)Decompressor_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + + +/* Module-level functions. */ + +PyDoc_STRVAR(is_check_supported_doc, +"is_check_supported(check_id) -> bool\n" +"\n" +"Test whether the given integrity check is supported.\n" +"\n" +"Always returns True for CHECK_NONE and CHECK_CRC32.\n"); + +static PyObject * +is_check_supported(PyObject *self, PyObject *args) +{ + int check_id; + + if (!PyArg_ParseTuple(args, "i:is_check_supported", &check_id)) + return NULL; + + return PyBool_FromLong(lzma_check_is_supported(check_id)); +} + + +PyDoc_STRVAR(_encode_filter_properties_doc, +"_encode_filter_properties(filter) -> bytes\n" +"\n" +"Return a bytes object encoding the options (properties) of the filter\n" +"specified by *filter* (a dict).\n" +"\n" +"The result does not include the filter ID itself, only the options.\n"); + +static PyObject * +_encode_filter_properties(PyObject *self, PyObject *args) +{ + PyObject *filterspec; + lzma_filter filter; + lzma_ret lzret; + uint32_t encoded_size; + PyObject *result = NULL; + + if (!PyArg_ParseTuple(args, "O:_encode_filter_properties", &filterspec)) + return NULL; + + if (parse_filter_spec(&filter, filterspec) == NULL) + return NULL; + + lzret = lzma_properties_size(&encoded_size, &filter); + if (catch_lzma_error(lzret)) + goto error; + + result = PyBytes_FromStringAndSize(NULL, encoded_size); + if (result == NULL) + goto error; + + lzret = lzma_properties_encode( + &filter, (uint8_t *)PyBytes_AS_STRING(result)); + if (catch_lzma_error(lzret)) + goto error; + + PyMem_Free(filter.options); + return result; + +error: + Py_XDECREF(result); + PyMem_Free(filter.options); + return NULL; +} + + +PyDoc_STRVAR(_decode_filter_properties_doc, +"_decode_filter_properties(filter_id, encoded_props) -> dict\n" +"\n" +"Return a dict describing a filter with ID *filter_id*, and options\n" +"(properties) decoded from the bytes object *encoded_props*.\n"); + +static PyObject * +_decode_filter_properties(PyObject *self, PyObject *args) +{ + Py_buffer encoded_props; + lzma_filter filter; + lzma_ret lzret; + PyObject *result = NULL; + + if (!PyArg_ParseTuple(args, "O&y*:_decode_filter_properties", + lzma_vli_converter, &filter.id, &encoded_props)) + return NULL; + + lzret = lzma_properties_decode( + &filter, NULL, encoded_props.buf, encoded_props.len); + PyBuffer_Release(&encoded_props); + if (catch_lzma_error(lzret)) + return NULL; + + result = build_filter_spec(&filter); + + /* We use vanilla free() here instead of PyMem_Free() - filter.options was + allocated by lzma_properties_decode() using the default allocator. */ + free(filter.options); + return result; +} + + +/* Module initialization. */ + +static PyMethodDef module_methods[] = { + {"is_check_supported", (PyCFunction)is_check_supported, + METH_VARARGS, is_check_supported_doc}, + {"_encode_filter_properties", (PyCFunction)_encode_filter_properties, + METH_VARARGS, _encode_filter_properties_doc}, + {"_decode_filter_properties", (PyCFunction)_decode_filter_properties, + METH_VARARGS, _decode_filter_properties_doc}, + {NULL} +}; + +static PyModuleDef _lzmamodule = { + PyModuleDef_HEAD_INIT, + "_lzma", + NULL, + -1, + module_methods, + NULL, + NULL, + NULL, + NULL, +}; + +/* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant + would not work correctly on platforms with 32-bit longs. */ +static int +module_add_int_constant(PyObject *m, const char *name, PY_LONG_LONG value) +{ + PyObject *o = PyLong_FromLongLong(value); + if (o == NULL) + return -1; + if (PyModule_AddObject(m, name, o) == 0) + return 0; + Py_DECREF(o); + return -1; +} + +#define ADD_INT_PREFIX_MACRO(m, macro) \ + module_add_int_constant(m, #macro, LZMA_ ## macro) + +PyMODINIT_FUNC +PyInit__lzma(void) +{ + PyObject *m; + + empty_tuple = PyTuple_New(0); + if (empty_tuple == NULL) + return NULL; + + m = PyModule_Create(&_lzmamodule); + if (m == NULL) + return NULL; + + if (PyModule_AddIntMacro(m, FORMAT_AUTO) == -1 || + PyModule_AddIntMacro(m, FORMAT_XZ) == -1 || + PyModule_AddIntMacro(m, FORMAT_ALONE) == -1 || + PyModule_AddIntMacro(m, FORMAT_RAW) == -1 || + ADD_INT_PREFIX_MACRO(m, CHECK_NONE) == -1 || + ADD_INT_PREFIX_MACRO(m, CHECK_CRC32) == -1 || + ADD_INT_PREFIX_MACRO(m, CHECK_CRC64) == -1 || + ADD_INT_PREFIX_MACRO(m, CHECK_SHA256) == -1 || + ADD_INT_PREFIX_MACRO(m, CHECK_ID_MAX) == -1 || + ADD_INT_PREFIX_MACRO(m, CHECK_UNKNOWN) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_LZMA1) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_LZMA2) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_DELTA) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_X86) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_IA64) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_ARM) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_ARMTHUMB) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_SPARC) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_POWERPC) == -1 || + ADD_INT_PREFIX_MACRO(m, MF_HC3) == -1 || + ADD_INT_PREFIX_MACRO(m, MF_HC4) == -1 || + ADD_INT_PREFIX_MACRO(m, MF_BT2) == -1 || + ADD_INT_PREFIX_MACRO(m, MF_BT3) == -1 || + ADD_INT_PREFIX_MACRO(m, MF_BT4) == -1 || + ADD_INT_PREFIX_MACRO(m, MODE_FAST) == -1 || + ADD_INT_PREFIX_MACRO(m, MODE_NORMAL) == -1 || + ADD_INT_PREFIX_MACRO(m, PRESET_DEFAULT) == -1 || + ADD_INT_PREFIX_MACRO(m, PRESET_EXTREME) == -1) + return NULL; + + Error = PyErr_NewExceptionWithDoc( + "_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL); + if (Error == NULL) + return NULL; + Py_INCREF(Error); + if (PyModule_AddObject(m, "LZMAError", Error) == -1) + return NULL; + + if (PyType_Ready(&Compressor_type) == -1) + return NULL; + Py_INCREF(&Compressor_type); + if (PyModule_AddObject(m, "LZMACompressor", + (PyObject *)&Compressor_type) == -1) + return NULL; + + if (PyType_Ready(&Decompressor_type) == -1) + return NULL; + Py_INCREF(&Decompressor_type); + if (PyModule_AddObject(m, "LZMADecompressor", + (PyObject *)&Decompressor_type) == -1) + return NULL; + + return m; +} |