summaryrefslogtreecommitdiffstats
path: root/Modules/clinic/zlibmodule.c.h
diff options
context:
space:
mode:
authorRuben Vorderman <r.h.p.vorderman@lumc.nl>2022-10-17 02:10:58 (GMT)
committerGitHub <noreply@github.com>2022-10-17 02:10:58 (GMT)
commiteae7dad40255bad42e4abce53ff8143dcbc66af5 (patch)
tree7cea56066a6db7c451712f8375034c2d8b8914f4 /Modules/clinic/zlibmodule.c.h
parentbb38b39b339191c5fc001c8fbfbc3037c13bc7bb (diff)
downloadcpython-eae7dad40255bad42e4abce53ff8143dcbc66af5.zip
cpython-eae7dad40255bad42e4abce53ff8143dcbc66af5.tar.gz
cpython-eae7dad40255bad42e4abce53ff8143dcbc66af5.tar.bz2
gh-95534: Improve gzip reading speed by 10% (#97664)
Change summary: + There is now a `gzip.READ_BUFFER_SIZE` constant that is 128KB. Other programs that read in 128KB chunks: pigz and cat. So this seems best practice among good programs. Also it is faster than 8 kb chunks. + a zlib._ZlibDecompressor was added. This is the _bz2.BZ2Decompressor ported to zlib. Since the zlib.Decompress object is better for in-memory decompression, the _ZlibDecompressor is hidden. It only makes sense in file decompression, and that is already implemented now in the gzip library. No need to bother the users with this. + The ZlibDecompressor uses the older Cpython arrange_output_buffer functions, as those are faster and more appropriate for the use case. + GzipFile.read has been optimized. There is no longer a `unconsumed_tail` member to write back to padded file. This is instead handled by the ZlibDecompressor itself, which has an internal buffer. `_add_read_data` has been inlined, as it was just two calls. EDIT: While I am adding improvements anyway, I figured I could add another one-liner optimization now to the python -m gzip application. That read chunks in io.DEFAULT_BUFFER_SIZE previously, but has been updated now to use READ_BUFFER_SIZE chunks.
Diffstat (limited to 'Modules/clinic/zlibmodule.c.h')
-rw-r--r--Modules/clinic/zlibmodule.c.h100
1 files changed, 99 insertions, 1 deletions
diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h
index a04b954..65412b2 100644
--- a/Modules/clinic/zlibmodule.c.h
+++ b/Modules/clinic/zlibmodule.c.h
@@ -897,6 +897,104 @@ exit:
return return_value;
}
+PyDoc_STRVAR(zlib_ZlibDecompressor_decompress__doc__,
+"decompress($self, /, data, max_length=-1)\n"
+"--\n"
+"\n"
+"Decompress *data*, returning uncompressed data as bytes.\n"
+"\n"
+"If *max_length* is nonnegative, returns at most *max_length* bytes of\n"
+"decompressed data. If this limit is reached and further output can be\n"
+"produced, *self.needs_input* will be set to ``False``. In this case, the next\n"
+"call to *decompress()* may provide *data* as b\'\' to obtain more of the output.\n"
+"\n"
+"If all of the input data was decompressed and returned (either because this\n"
+"was less than *max_length* bytes, or because *max_length* was negative),\n"
+"*self.needs_input* will be set to True.\n"
+"\n"
+"Attempting to decompress data after the end of stream is reached raises an\n"
+"EOFError. Any data found after the end of the stream is ignored and saved in\n"
+"the unused_data attribute.");
+
+#define ZLIB_ZLIBDECOMPRESSOR_DECOMPRESS_METHODDEF \
+ {"decompress", _PyCFunction_CAST(zlib_ZlibDecompressor_decompress), METH_FASTCALL|METH_KEYWORDS, zlib_ZlibDecompressor_decompress__doc__},
+
+static PyObject *
+zlib_ZlibDecompressor_decompress_impl(ZlibDecompressor *self,
+ Py_buffer *data, Py_ssize_t max_length);
+
+static PyObject *
+zlib_ZlibDecompressor_decompress(ZlibDecompressor *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_item = { &_Py_ID(data), &_Py_ID(max_length), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"data", "max_length", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "decompress",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
+ Py_buffer data = {NULL, NULL};
+ Py_ssize_t max_length = -1;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) {
+ goto exit;
+ }
+ if (!PyBuffer_IsContiguous(&data, 'C')) {
+ _PyArg_BadArgument("decompress", "argument 'data'", "contiguous buffer", args[0]);
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_pos;
+ }
+ {
+ Py_ssize_t ival = -1;
+ PyObject *iobj = _PyNumber_Index(args[1]);
+ if (iobj != NULL) {
+ ival = PyLong_AsSsize_t(iobj);
+ Py_DECREF(iobj);
+ }
+ if (ival == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+ max_length = ival;
+ }
+skip_optional_pos:
+ return_value = zlib_ZlibDecompressor_decompress_impl(self, &data, max_length);
+
+exit:
+ /* Cleanup for data */
+ if (data.obj) {
+ PyBuffer_Release(&data);
+ }
+
+ return return_value;
+}
+
PyDoc_STRVAR(zlib_adler32__doc__,
"adler32($module, data, value=1, /)\n"
"--\n"
@@ -1031,4 +1129,4 @@ exit:
#ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
#define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
#endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */
-/*[clinic end generated code: output=9e5f9911d0c273e1 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=57ff7b511ab23132 input=a9049054013a1b77]*/