summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2011-10-06 16:57:27 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2011-10-06 16:57:27 (GMT)
commiteeb7eea1f95793437b3e251f47c98446e15fa680 (patch)
treee83a45f7eddbc424f375a89581a918baf2a42dcd /Objects
parentbb2095f1e273d06f7dfaa8303c46ce6f01212c76 (diff)
downloadcpython-eeb7eea1f95793437b3e251f47c98446e15fa680.zip
cpython-eeb7eea1f95793437b3e251f47c98446e15fa680.tar.gz
cpython-eeb7eea1f95793437b3e251f47c98446e15fa680.tar.bz2
Issue #12911: Fix memory consumption when calculating the repr() of huge tuples or lists.
This introduces a small private API for this common pattern. The issue has been discovered thanks to Martin's huge-mem buildbot.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/accu.c114
-rw-r--r--Objects/listobject.c81
-rw-r--r--Objects/tupleobject.c75
3 files changed, 185 insertions, 85 deletions
diff --git a/Objects/accu.c b/Objects/accu.c
new file mode 100644
index 0000000..88e8f08
--- /dev/null
+++ b/Objects/accu.c
@@ -0,0 +1,114 @@
+/* Accumulator struct implementation */
+
+#include "Python.h"
+
+static PyObject *
+join_list_unicode(PyObject *lst)
+{
+ /* return ''.join(lst) */
+ PyObject *sep, *ret;
+ sep = PyUnicode_FromStringAndSize("", 0);
+ ret = PyUnicode_Join(sep, lst);
+ Py_DECREF(sep);
+ return ret;
+}
+
+int
+_PyAccu_Init(_PyAccu *acc)
+{
+ /* Lazily allocated */
+ acc->large = NULL;
+ acc->small = PyList_New(0);
+ if (acc->small == NULL)
+ return -1;
+ return 0;
+}
+
+static int
+flush_accumulator(_PyAccu *acc)
+{
+ Py_ssize_t nsmall = PyList_GET_SIZE(acc->small);
+ if (nsmall) {
+ int ret;
+ PyObject *joined;
+ if (acc->large == NULL) {
+ acc->large = PyList_New(0);
+ if (acc->large == NULL)
+ return -1;
+ }
+ joined = join_list_unicode(acc->small);
+ if (joined == NULL)
+ return -1;
+ if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
+ Py_DECREF(joined);
+ return -1;
+ }
+ ret = PyList_Append(acc->large, joined);
+ Py_DECREF(joined);
+ return ret;
+ }
+ return 0;
+}
+
+int
+_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode)
+{
+ Py_ssize_t nsmall;
+ assert(PyUnicode_Check(unicode));
+
+ if (PyList_Append(acc->small, unicode))
+ return -1;
+ nsmall = PyList_GET_SIZE(acc->small);
+ /* Each item in a list of unicode objects has an overhead (in 64-bit
+ * builds) of:
+ * - 8 bytes for the list slot
+ * - 56 bytes for the header of the unicode object
+ * that is, 64 bytes. 100000 such objects waste more than 6MB
+ * compared to a single concatenated string.
+ */
+ if (nsmall < 100000)
+ return 0;
+ return flush_accumulator(acc);
+}
+
+PyObject *
+_PyAccu_FinishAsList(_PyAccu *acc)
+{
+ int ret;
+ PyObject *res;
+
+ ret = flush_accumulator(acc);
+ Py_CLEAR(acc->small);
+ if (ret) {
+ Py_CLEAR(acc->large);
+ return NULL;
+ }
+ res = acc->large;
+ acc->large = NULL;
+ return res;
+}
+
+PyObject *
+_PyAccu_Finish(_PyAccu *acc)
+{
+ PyObject *list, *res;
+ if (acc->large == NULL) {
+ list = acc->small;
+ acc->small = NULL;
+ }
+ else {
+ list = _PyAccu_FinishAsList(acc);
+ if (!list)
+ return NULL;
+ }
+ res = join_list_unicode(list);
+ Py_DECREF(list);
+ return res;
+}
+
+void
+_PyAccu_Destroy(_PyAccu *acc)
+{
+ Py_CLEAR(acc->small);
+ Py_CLEAR(acc->large);
+}
diff --git a/Objects/listobject.c b/Objects/listobject.c
index 36f8b9d..00de597 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -321,70 +321,59 @@ static PyObject *
list_repr(PyListObject *v)
{
Py_ssize_t i;
- PyObject *s, *temp;
- PyObject *pieces = NULL, *result = NULL;
+ PyObject *s = NULL;
+ _PyAccu acc;
+ static PyObject *sep = NULL;
+
+ if (Py_SIZE(v) == 0) {
+ return PyUnicode_FromString("[]");
+ }
+
+ if (sep == NULL) {
+ sep = PyUnicode_FromString(", ");
+ if (sep == NULL)
+ return NULL;
+ }
i = Py_ReprEnter((PyObject*)v);
if (i != 0) {
return i > 0 ? PyUnicode_FromString("[...]") : NULL;
}
- if (Py_SIZE(v) == 0) {
- result = PyUnicode_FromString("[]");
- goto Done;
- }
+ if (_PyAccu_Init(&acc))
+ goto error;
- pieces = PyList_New(0);
- if (pieces == NULL)
- goto Done;
+ s = PyUnicode_FromString("[");
+ if (s == NULL || _PyAccu_Accumulate(&acc, s))
+ goto error;
+ Py_CLEAR(s);
/* Do repr() on each element. Note that this may mutate the list,
so must refetch the list size on each iteration. */
for (i = 0; i < Py_SIZE(v); ++i) {
- int status;
if (Py_EnterRecursiveCall(" while getting the repr of a list"))
- goto Done;
+ goto error;
s = PyObject_Repr(v->ob_item[i]);
Py_LeaveRecursiveCall();
- if (s == NULL)
- goto Done;
- status = PyList_Append(pieces, s);
- Py_DECREF(s); /* append created a new ref */
- if (status < 0)
- goto Done;
+ if (i > 0 && _PyAccu_Accumulate(&acc, sep))
+ goto error;
+ if (s == NULL || _PyAccu_Accumulate(&acc, s))
+ goto error;
+ Py_CLEAR(s);
}
+ s = PyUnicode_FromString("]");
+ if (s == NULL || _PyAccu_Accumulate(&acc, s))
+ goto error;
+ Py_CLEAR(s);
- /* Add "[]" decorations to the first and last items. */
- assert(PyList_GET_SIZE(pieces) > 0);
- s = PyUnicode_FromString("[");
- if (s == NULL)
- goto Done;
- temp = PyList_GET_ITEM(pieces, 0);
- PyUnicode_AppendAndDel(&s, temp);
- PyList_SET_ITEM(pieces, 0, s);
- if (s == NULL)
- goto Done;
+ Py_ReprLeave((PyObject *)v);
+ return _PyAccu_Finish(&acc);
- s = PyUnicode_FromString("]");
- if (s == NULL)
- goto Done;
- temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1);
- PyUnicode_AppendAndDel(&temp, s);
- PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp);
- if (temp == NULL)
- goto Done;
-
- /* Paste them all together with ", " between. */
- s = PyUnicode_FromString(", ");
- if (s == NULL)
- goto Done;
- result = PyUnicode_Join(s, pieces);
- Py_DECREF(s);
-
-Done:
- Py_XDECREF(pieces);
+error:
+ _PyAccu_Destroy(&acc);
+ Py_XDECREF(s);
Py_ReprLeave((PyObject *)v);
- return result;
+ return NULL;
}
static Py_ssize_t
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index 72b79c9..8aacd12 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -240,13 +240,20 @@ static PyObject *
tuplerepr(PyTupleObject *v)
{
Py_ssize_t i, n;
- PyObject *s, *temp;
- PyObject *pieces, *result = NULL;
+ PyObject *s = NULL;
+ _PyAccu acc;
+ static PyObject *sep = NULL;
n = Py_SIZE(v);
if (n == 0)
return PyUnicode_FromString("()");
+ if (sep == NULL) {
+ sep = PyUnicode_FromString(", ");
+ if (sep == NULL)
+ return NULL;
+ }
+
/* While not mutable, it is still possible to end up with a cycle in a
tuple through an object that stores itself within a tuple (and thus
infinitely asks for the repr of itself). This should only be
@@ -256,52 +263,42 @@ tuplerepr(PyTupleObject *v)
return i > 0 ? PyUnicode_FromString("(...)") : NULL;
}
- pieces = PyTuple_New(n);
- if (pieces == NULL)
- return NULL;
+ if (_PyAccu_Init(&acc))
+ goto error;
+
+ s = PyUnicode_FromString("(");
+ if (s == NULL || _PyAccu_Accumulate(&acc, s))
+ goto error;
+ Py_CLEAR(s);
/* Do repr() on each element. */
for (i = 0; i < n; ++i) {
if (Py_EnterRecursiveCall(" while getting the repr of a tuple"))
- goto Done;
+ goto error;
s = PyObject_Repr(v->ob_item[i]);
Py_LeaveRecursiveCall();
- if (s == NULL)
- goto Done;
- PyTuple_SET_ITEM(pieces, i, s);
+ if (i > 0 && _PyAccu_Accumulate(&acc, sep))
+ goto error;
+ if (s == NULL || _PyAccu_Accumulate(&acc, s))
+ goto error;
+ Py_CLEAR(s);
}
+ if (n > 1)
+ s = PyUnicode_FromString(")");
+ else
+ s = PyUnicode_FromString(",)");
+ if (s == NULL || _PyAccu_Accumulate(&acc, s))
+ goto error;
+ Py_CLEAR(s);
- /* Add "()" decorations to the first and last items. */
- assert(n > 0);
- s = PyUnicode_FromString("(");
- if (s == NULL)
- goto Done;
- temp = PyTuple_GET_ITEM(pieces, 0);
- PyUnicode_AppendAndDel(&s, temp);
- PyTuple_SET_ITEM(pieces, 0, s);
- if (s == NULL)
- goto Done;
-
- s = PyUnicode_FromString(n == 1 ? ",)" : ")");
- if (s == NULL)
- goto Done;
- temp = PyTuple_GET_ITEM(pieces, n-1);
- PyUnicode_AppendAndDel(&temp, s);
- PyTuple_SET_ITEM(pieces, n-1, temp);
- if (temp == NULL)
- goto Done;
-
- /* Paste them all together with ", " between. */
- s = PyUnicode_FromString(", ");
- if (s == NULL)
- goto Done;
- result = PyUnicode_Join(s, pieces);
- Py_DECREF(s);
-
-Done:
- Py_DECREF(pieces);
Py_ReprLeave((PyObject *)v);
- return result;
+ return _PyAccu_Finish(&acc);
+
+error:
+ _PyAccu_Destroy(&acc);
+ Py_XDECREF(s);
+ Py_ReprLeave((PyObject *)v);
+ return NULL;
}
/* The addend 82520, was selected from the range(0, 1000000) for