diff options
author | Ma Lin <animalize@users.noreply.github.com> | 2021-04-28 06:58:54 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-28 06:58:54 (GMT) |
commit | f9bedb630e8a0b7d94e1c7e609b20dfaa2b22231 (patch) | |
tree | ca8e232aebdae960f8a55737897cd20766df71ca /Include/internal/pycore_blocks_output_buffer.h | |
parent | a5e64444e6df7d1d498576bab26deaddc288a7bd (diff) | |
download | cpython-f9bedb630e8a0b7d94e1c7e609b20dfaa2b22231.zip cpython-f9bedb630e8a0b7d94e1c7e609b20dfaa2b22231.tar.gz cpython-f9bedb630e8a0b7d94e1c7e609b20dfaa2b22231.tar.bz2 |
bpo-41486: Faster bz2/lzma/zlib via new output buffering (GH-21740)
Faster bz2/lzma/zlib via new output buffering.
Also adds .readall() function to _compression.DecompressReader class
to take best advantage of this in the consume-all-output at once scenario.
Often a 5-20% speedup in common scenarios due to less data copying.
Contributed by Ma Lin.
Diffstat (limited to 'Include/internal/pycore_blocks_output_buffer.h')
-rw-r--r-- | Include/internal/pycore_blocks_output_buffer.h | 317 |
1 files changed, 317 insertions, 0 deletions
diff --git a/Include/internal/pycore_blocks_output_buffer.h b/Include/internal/pycore_blocks_output_buffer.h new file mode 100644 index 0000000..22546e9 --- /dev/null +++ b/Include/internal/pycore_blocks_output_buffer.h @@ -0,0 +1,317 @@ +/* + _BlocksOutputBuffer is used to maintain an output buffer + that has unpredictable size. Suitable for compression/decompression + API (bz2/lzma/zlib) that has stream->next_out and stream->avail_out: + + stream->next_out: point to the next output position. + stream->avail_out: the number of available bytes left in the buffer. + + It maintains a list of bytes object, so there is no overhead of resizing + the buffer. + + Usage: + + 1, Initialize the struct instance like this: + _BlocksOutputBuffer buffer = {.list = NULL}; + Set .list to NULL for _BlocksOutputBuffer_OnError() + + 2, Initialize the buffer use one of these functions: + _BlocksOutputBuffer_InitAndGrow() + _BlocksOutputBuffer_InitWithSize() + + 3, If (avail_out == 0), grow the buffer: + _BlocksOutputBuffer_Grow() + + 4, Get the current outputted data size: + _BlocksOutputBuffer_GetDataSize() + + 5, Finish the buffer, and return a bytes object: + _BlocksOutputBuffer_Finish() + + 6, Clean up the buffer when an error occurred: + _BlocksOutputBuffer_OnError() +*/ + +#ifndef Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H +#define Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H +#ifdef __cplusplus +extern "C" { +#endif + +#include "Python.h" + +typedef struct { + // List of bytes objects + PyObject *list; + // Number of whole allocated size + Py_ssize_t allocated; + // Max length of the buffer, negative number means unlimited length. + Py_ssize_t max_length; +} _BlocksOutputBuffer; + +static const char unable_allocate_msg[] = "Unable to allocate output buffer."; + +/* In 32-bit build, the max block size should <= INT32_MAX. */ +#define OUTPUT_BUFFER_MAX_BLOCK_SIZE (256*1024*1024) + +/* Block size sequence */ +#define KB (1024) +#define MB (1024*1024) +const Py_ssize_t BUFFER_BLOCK_SIZE[] = + { 32*KB, 64*KB, 256*KB, 1*MB, 4*MB, 8*MB, 16*MB, 16*MB, + 32*MB, 32*MB, 32*MB, 32*MB, 64*MB, 64*MB, 128*MB, 128*MB, + OUTPUT_BUFFER_MAX_BLOCK_SIZE }; +#undef KB +#undef MB + +/* According to the block sizes defined by BUFFER_BLOCK_SIZE, the whole + allocated size growth step is: + 1 32 KB +32 KB + 2 96 KB +64 KB + 3 352 KB +256 KB + 4 1.34 MB +1 MB + 5 5.34 MB +4 MB + 6 13.34 MB +8 MB + 7 29.34 MB +16 MB + 8 45.34 MB +16 MB + 9 77.34 MB +32 MB + 10 109.34 MB +32 MB + 11 141.34 MB +32 MB + 12 173.34 MB +32 MB + 13 237.34 MB +64 MB + 14 301.34 MB +64 MB + 15 429.34 MB +128 MB + 16 557.34 MB +128 MB + 17 813.34 MB +256 MB + 18 1069.34 MB +256 MB + 19 1325.34 MB +256 MB + 20 1581.34 MB +256 MB + 21 1837.34 MB +256 MB + 22 2093.34 MB +256 MB + ... +*/ + +/* Initialize the buffer, and grow the buffer. + + max_length: Max length of the buffer, -1 for unlimited length. + + On success, return allocated size (>=0) + On failure, return -1 +*/ +static inline Py_ssize_t +_BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, + const Py_ssize_t max_length, + void **next_out) +{ + PyObject *b; + Py_ssize_t block_size; + + // ensure .list was set to NULL + assert(buffer->list == NULL); + + // get block size + if (0 <= max_length && max_length < BUFFER_BLOCK_SIZE[0]) { + block_size = max_length; + } else { + block_size = BUFFER_BLOCK_SIZE[0]; + } + + // the first block + b = PyBytes_FromStringAndSize(NULL, block_size); + if (b == NULL) { + return -1; + } + + // create the list + buffer->list = PyList_New(1); + if (buffer->list == NULL) { + Py_DECREF(b); + return -1; + } + PyList_SET_ITEM(buffer->list, 0, b); + + // set variables + buffer->allocated = block_size; + buffer->max_length = max_length; + + *next_out = PyBytes_AS_STRING(b); + return block_size; +} + +/* Initialize the buffer, with an initial size. + + Check block size limit in the outer wrapper function. For example, some libs + accept UINT32_MAX as the maximum block size, then init_size should <= it. + + On success, return allocated size (>=0) + On failure, return -1 +*/ +static inline Py_ssize_t +_BlocksOutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, + const Py_ssize_t init_size, + void **next_out) +{ + PyObject *b; + + // ensure .list was set to NULL + assert(buffer->list == NULL); + + // the first block + b = PyBytes_FromStringAndSize(NULL, init_size); + if (b == NULL) { + PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); + return -1; + } + + // create the list + buffer->list = PyList_New(1); + if (buffer->list == NULL) { + Py_DECREF(b); + return -1; + } + PyList_SET_ITEM(buffer->list, 0, b); + + // set variables + buffer->allocated = init_size; + buffer->max_length = -1; + + *next_out = PyBytes_AS_STRING(b); + return init_size; +} + +/* Grow the buffer. The avail_out must be 0, please check it before calling. + + On success, return allocated size (>=0) + On failure, return -1 +*/ +static inline Py_ssize_t +_BlocksOutputBuffer_Grow(_BlocksOutputBuffer *buffer, + void **next_out, + const Py_ssize_t avail_out) +{ + PyObject *b; + const Py_ssize_t list_len = Py_SIZE(buffer->list); + Py_ssize_t block_size; + + // ensure no gaps in the data + if (avail_out != 0) { + PyErr_SetString(PyExc_SystemError, + "avail_out is non-zero in _BlocksOutputBuffer_Grow()."); + return -1; + } + + // get block size + if (list_len < (Py_ssize_t) Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE)) { + block_size = BUFFER_BLOCK_SIZE[list_len]; + } else { + block_size = BUFFER_BLOCK_SIZE[Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE) - 1]; + } + + // check max_length + if (buffer->max_length >= 0) { + // if (rest == 0), should not grow the buffer. + Py_ssize_t rest = buffer->max_length - buffer->allocated; + assert(rest > 0); + + // block_size of the last block + if (block_size > rest) { + block_size = rest; + } + } + + // check buffer->allocated overflow + if (block_size > PY_SSIZE_T_MAX - buffer->allocated) { + PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); + return -1; + } + + // create the block + b = PyBytes_FromStringAndSize(NULL, block_size); + if (b == NULL) { + PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); + return -1; + } + if (PyList_Append(buffer->list, b) < 0) { + Py_DECREF(b); + return -1; + } + Py_DECREF(b); + + // set variables + buffer->allocated += block_size; + + *next_out = PyBytes_AS_STRING(b); + return block_size; +} + +/* Return the current outputted data size. */ +static inline Py_ssize_t +_BlocksOutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, + const Py_ssize_t avail_out) +{ + return buffer->allocated - avail_out; +} + +/* Finish the buffer. + + Return a bytes object on success + Return NULL on failure +*/ +static inline PyObject * +_BlocksOutputBuffer_Finish(_BlocksOutputBuffer *buffer, + const Py_ssize_t avail_out) +{ + PyObject *result, *block; + const Py_ssize_t list_len = Py_SIZE(buffer->list); + + // fast path for single block + if ((list_len == 1 && avail_out == 0) || + (list_len == 2 && Py_SIZE(PyList_GET_ITEM(buffer->list, 1)) == avail_out)) + { + block = PyList_GET_ITEM(buffer->list, 0); + Py_INCREF(block); + + Py_CLEAR(buffer->list); + return block; + } + + // final bytes object + result = PyBytes_FromStringAndSize(NULL, buffer->allocated - avail_out); + if (result == NULL) { + PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); + return NULL; + } + + // memory copy + if (list_len > 0) { + char *posi = PyBytes_AS_STRING(result); + + // blocks except the last one + Py_ssize_t i = 0; + for (; i < list_len-1; i++) { + block = PyList_GET_ITEM(buffer->list, i); + memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block)); + posi += Py_SIZE(block); + } + // the last block + block = PyList_GET_ITEM(buffer->list, i); + memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block) - avail_out); + } else { + assert(Py_SIZE(result) == 0); + } + + Py_CLEAR(buffer->list); + return result; +} + +/* Clean up the buffer when an error occurred. */ +static inline void +_BlocksOutputBuffer_OnError(_BlocksOutputBuffer *buffer) +{ + Py_CLEAR(buffer->list); +} + +#ifdef __cplusplus +} +#endif +#endif /* Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H */
\ No newline at end of file |