diff options
| author | Antoine Pitrou <solipsis@pitrou.net> | 2011-10-06 17:04:12 (GMT) |
|---|---|---|
| committer | Antoine Pitrou <solipsis@pitrou.net> | 2011-10-06 17:04:12 (GMT) |
| commit | c61c8d7a5e2884238494a678ecd22c812b062280 (patch) | |
| tree | 3fd366f94e25c5b3d5cc9430f8fc2b7de869e782 /Objects/accu.c | |
| parent | c6f0df7b2095eaf0a6d5914a043f9062f66d19f7 (diff) | |
| parent | eeb7eea1f95793437b3e251f47c98446e15fa680 (diff) | |
| download | cpython-c61c8d7a5e2884238494a678ecd22c812b062280.zip cpython-c61c8d7a5e2884238494a678ecd22c812b062280.tar.gz cpython-c61c8d7a5e2884238494a678ecd22c812b062280.tar.bz2 | |
Issue #12911: Fix memory consumption when calculating the repr() of huge tuples or lists.
This introduces a small private API for this common pattern.
The issue has been discovered thanks to Martin's huge-mem buildbot.
Diffstat (limited to 'Objects/accu.c')
| -rw-r--r-- | Objects/accu.c | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/Objects/accu.c b/Objects/accu.c new file mode 100644 index 0000000..88e8f08 --- /dev/null +++ b/Objects/accu.c @@ -0,0 +1,114 @@ +/* Accumulator struct implementation */ + +#include "Python.h" + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return ''.join(lst) */ + PyObject *sep, *ret; + sep = PyUnicode_FromStringAndSize("", 0); + ret = PyUnicode_Join(sep, lst); + Py_DECREF(sep); + return ret; +} + +int +_PyAccu_Init(_PyAccu *acc) +{ + /* Lazily allocated */ + acc->large = NULL; + acc->small = PyList_New(0); + if (acc->small == NULL) + return -1; + return 0; +} + +static int +flush_accumulator(_PyAccu *acc) +{ + Py_ssize_t nsmall = PyList_GET_SIZE(acc->small); + if (nsmall) { + int ret; + PyObject *joined; + if (acc->large == NULL) { + acc->large = PyList_New(0); + if (acc->large == NULL) + return -1; + } + joined = join_list_unicode(acc->small); + if (joined == NULL) + return -1; + if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) { + Py_DECREF(joined); + return -1; + } + ret = PyList_Append(acc->large, joined); + Py_DECREF(joined); + return ret; + } + return 0; +} + +int +_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode) +{ + Py_ssize_t nsmall; + assert(PyUnicode_Check(unicode)); + + if (PyList_Append(acc->small, unicode)) + return -1; + nsmall = PyList_GET_SIZE(acc->small); + /* Each item in a list of unicode objects has an overhead (in 64-bit + * builds) of: + * - 8 bytes for the list slot + * - 56 bytes for the header of the unicode object + * that is, 64 bytes. 100000 such objects waste more than 6MB + * compared to a single concatenated string. + */ + if (nsmall < 100000) + return 0; + return flush_accumulator(acc); +} + +PyObject * +_PyAccu_FinishAsList(_PyAccu *acc) +{ + int ret; + PyObject *res; + + ret = flush_accumulator(acc); + Py_CLEAR(acc->small); + if (ret) { + Py_CLEAR(acc->large); + return NULL; + } + res = acc->large; + acc->large = NULL; + return res; +} + +PyObject * +_PyAccu_Finish(_PyAccu *acc) +{ + PyObject *list, *res; + if (acc->large == NULL) { + list = acc->small; + acc->small = NULL; + } + else { + list = _PyAccu_FinishAsList(acc); + if (!list) + return NULL; + } + res = join_list_unicode(list); + Py_DECREF(list); + return res; +} + +void +_PyAccu_Destroy(_PyAccu *acc) +{ + Py_CLEAR(acc->small); + Py_CLEAR(acc->large); +} |
