summaryrefslogtreecommitdiffstats
path: root/Objects/accu.c
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2011-10-06 16:57:27 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2011-10-06 16:57:27 (GMT)
commiteeb7eea1f95793437b3e251f47c98446e15fa680 (patch)
treee83a45f7eddbc424f375a89581a918baf2a42dcd /Objects/accu.c
parentbb2095f1e273d06f7dfaa8303c46ce6f01212c76 (diff)
downloadcpython-eeb7eea1f95793437b3e251f47c98446e15fa680.zip
cpython-eeb7eea1f95793437b3e251f47c98446e15fa680.tar.gz
cpython-eeb7eea1f95793437b3e251f47c98446e15fa680.tar.bz2
Issue #12911: Fix memory consumption when calculating the repr() of huge tuples or lists.
This introduces a small private API for this common pattern. The issue has been discovered thanks to Martin's huge-mem buildbot.
Diffstat (limited to 'Objects/accu.c')
-rw-r--r--Objects/accu.c114
1 files changed, 114 insertions, 0 deletions
diff --git a/Objects/accu.c b/Objects/accu.c
new file mode 100644
index 0000000..88e8f08
--- /dev/null
+++ b/Objects/accu.c
@@ -0,0 +1,114 @@
+/* Accumulator struct implementation */
+
+#include "Python.h"
+
+static PyObject *
+join_list_unicode(PyObject *lst)
+{
+ /* return ''.join(lst) */
+ PyObject *sep, *ret;
+ sep = PyUnicode_FromStringAndSize("", 0);
+ ret = PyUnicode_Join(sep, lst);
+ Py_DECREF(sep);
+ return ret;
+}
+
+int
+_PyAccu_Init(_PyAccu *acc)
+{
+ /* Lazily allocated */
+ acc->large = NULL;
+ acc->small = PyList_New(0);
+ if (acc->small == NULL)
+ return -1;
+ return 0;
+}
+
+static int
+flush_accumulator(_PyAccu *acc)
+{
+ Py_ssize_t nsmall = PyList_GET_SIZE(acc->small);
+ if (nsmall) {
+ int ret;
+ PyObject *joined;
+ if (acc->large == NULL) {
+ acc->large = PyList_New(0);
+ if (acc->large == NULL)
+ return -1;
+ }
+ joined = join_list_unicode(acc->small);
+ if (joined == NULL)
+ return -1;
+ if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
+ Py_DECREF(joined);
+ return -1;
+ }
+ ret = PyList_Append(acc->large, joined);
+ Py_DECREF(joined);
+ return ret;
+ }
+ return 0;
+}
+
+int
+_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode)
+{
+ Py_ssize_t nsmall;
+ assert(PyUnicode_Check(unicode));
+
+ if (PyList_Append(acc->small, unicode))
+ return -1;
+ nsmall = PyList_GET_SIZE(acc->small);
+ /* Each item in a list of unicode objects has an overhead (in 64-bit
+ * builds) of:
+ * - 8 bytes for the list slot
+ * - 56 bytes for the header of the unicode object
+ * that is, 64 bytes. 100000 such objects waste more than 6MB
+ * compared to a single concatenated string.
+ */
+ if (nsmall < 100000)
+ return 0;
+ return flush_accumulator(acc);
+}
+
+PyObject *
+_PyAccu_FinishAsList(_PyAccu *acc)
+{
+ int ret;
+ PyObject *res;
+
+ ret = flush_accumulator(acc);
+ Py_CLEAR(acc->small);
+ if (ret) {
+ Py_CLEAR(acc->large);
+ return NULL;
+ }
+ res = acc->large;
+ acc->large = NULL;
+ return res;
+}
+
+PyObject *
+_PyAccu_Finish(_PyAccu *acc)
+{
+ PyObject *list, *res;
+ if (acc->large == NULL) {
+ list = acc->small;
+ acc->small = NULL;
+ }
+ else {
+ list = _PyAccu_FinishAsList(acc);
+ if (!list)
+ return NULL;
+ }
+ res = join_list_unicode(list);
+ Py_DECREF(list);
+ return res;
+}
+
+void
+_PyAccu_Destroy(_PyAccu *acc)
+{
+ Py_CLEAR(acc->small);
+ Py_CLEAR(acc->large);
+}