Issue #12911: Fix memory consumption when calculating the repr() of huge tuples or lists.

This introduces a small private API for this common pattern. The issue has been discovered thanks to Martin's huge-mem buildbot.
author: Antoine Pitrou <solipsis@pitrou.net> 2011-10-06 16:57:27 (GMT)
committer: Antoine Pitrou <solipsis@pitrou.net> 2011-10-06 16:57:27 (GMT)
commit: eeb7eea1f95793437b3e251f47c98446e15fa680 (patch)
tree: e83a45f7eddbc424f375a89581a918baf2a42dcd /Objects/accu.c
parent: bb2095f1e273d06f7dfaa8303c46ce6f01212c76 (diff)
download: cpython-eeb7eea1f95793437b3e251f47c98446e15fa680.zip
cpython-eeb7eea1f95793437b3e251f47c98446e15fa680.tar.gz
cpython-eeb7eea1f95793437b3e251f47c98446e15fa680.tar.bz2
1 files changed, 114 insertions, 0 deletions
diff --git a/Objects/accu.c b/Objects/accu.c
new file mode 100644
index 0000000..88e8f08
--- /dev/null
+++ b/Objects/accu.c
@@ -0,0 +1,114 @@
+/* Accumulator struct implementation */
+
+#include "Python.h"
+
+static PyObject *
+join_list_unicode(PyObject *lst)
+{
+    /* return ''.join(lst) */
+    PyObject *sep, *ret;
+    sep = PyUnicode_FromStringAndSize("", 0);
+    ret = PyUnicode_Join(sep, lst);
+    Py_DECREF(sep);
+    return ret;
+}
+
+int
+_PyAccu_Init(_PyAccu *acc)
+{
+    /* Lazily allocated */
+    acc->large = NULL;
+    acc->small = PyList_New(0);
+    if (acc->small == NULL)
+        return -1;
+    return 0;
+}
+
+static int
+flush_accumulator(_PyAccu *acc)
+{
+    Py_ssize_t nsmall = PyList_GET_SIZE(acc->small);
+    if (nsmall) {
+        int ret;
+        PyObject *joined;
+        if (acc->large == NULL) {
+            acc->large = PyList_New(0);
+            if (acc->large == NULL)
+                return -1;
+        }
+        joined = join_list_unicode(acc->small);
+        if (joined == NULL)
+            return -1;
+        if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
+            Py_DECREF(joined);
+            return -1;
+        }
+        ret = PyList_Append(acc->large, joined);
+        Py_DECREF(joined);
+        return ret;
+    }
+    return 0;
+}
+
+int
+_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode)
+{
+    Py_ssize_t nsmall;
+    assert(PyUnicode_Check(unicode));
+
+    if (PyList_Append(acc->small, unicode))
+        return -1;
+    nsmall = PyList_GET_SIZE(acc->small);
+    /* Each item in a list of unicode objects has an overhead (in 64-bit
+     * builds) of:
+     *   - 8 bytes for the list slot
+     *   - 56 bytes for the header of the unicode object
+     * that is, 64 bytes.  100000 such objects waste more than 6MB
+     * compared to a single concatenated string.
+     */
+    if (nsmall < 100000)
+        return 0;
+    return flush_accumulator(acc);
+}
+
+PyObject *
+_PyAccu_FinishAsList(_PyAccu *acc)
+{
+    int ret;
+    PyObject *res;
+
+    ret = flush_accumulator(acc);
+    Py_CLEAR(acc->small);
+    if (ret) {
+        Py_CLEAR(acc->large);
+        return NULL;
+    }
+    res = acc->large;
+    acc->large = NULL;
+    return res;
+}
+
+PyObject *
+_PyAccu_Finish(_PyAccu *acc)
+{
+    PyObject *list, *res;
+    if (acc->large == NULL) {
+        list = acc->small;
+        acc->small = NULL;
+    }
+    else {
+        list = _PyAccu_FinishAsList(acc);
+        if (!list)
+            return NULL;
+    }
+    res = join_list_unicode(list);
+    Py_DECREF(list);
+    return res;
+}
+
+void
+_PyAccu_Destroy(_PyAccu *acc)
+{
+    Py_CLEAR(acc->small);
+    Py_CLEAR(acc->large);
+}
author	Antoine Pitrou <solipsis@pitrou.net>	2011-10-06 16:57:27 (GMT)
committer	Antoine Pitrou <solipsis@pitrou.net>	2011-10-06 16:57:27 (GMT)
commit	eeb7eea1f95793437b3e251f47c98446e15fa680 (patch)
tree	e83a45f7eddbc424f375a89581a918baf2a42dcd /Objects/accu.c
parent	bb2095f1e273d06f7dfaa8303c46ce6f01212c76 (diff)
download	cpython-eeb7eea1f95793437b3e251f47c98446e15fa680.zip cpython-eeb7eea1f95793437b3e251f47c98446e15fa680.tar.gz cpython-eeb7eea1f95793437b3e251f47c98446e15fa680.tar.bz2