diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2011-10-06 16:57:27 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2011-10-06 16:57:27 (GMT) |
commit | eeb7eea1f95793437b3e251f47c98446e15fa680 (patch) | |
tree | e83a45f7eddbc424f375a89581a918baf2a42dcd /Objects | |
parent | bb2095f1e273d06f7dfaa8303c46ce6f01212c76 (diff) | |
download | cpython-eeb7eea1f95793437b3e251f47c98446e15fa680.zip cpython-eeb7eea1f95793437b3e251f47c98446e15fa680.tar.gz cpython-eeb7eea1f95793437b3e251f47c98446e15fa680.tar.bz2 |
Issue #12911: Fix memory consumption when calculating the repr() of huge tuples or lists.
This introduces a small private API for this common pattern.
The issue has been discovered thanks to Martin's huge-mem buildbot.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/accu.c | 114 | ||||
-rw-r--r-- | Objects/listobject.c | 81 | ||||
-rw-r--r-- | Objects/tupleobject.c | 75 |
3 files changed, 185 insertions, 85 deletions
diff --git a/Objects/accu.c b/Objects/accu.c new file mode 100644 index 0000000..88e8f08 --- /dev/null +++ b/Objects/accu.c @@ -0,0 +1,114 @@ +/* Accumulator struct implementation */ + +#include "Python.h" + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return ''.join(lst) */ + PyObject *sep, *ret; + sep = PyUnicode_FromStringAndSize("", 0); + ret = PyUnicode_Join(sep, lst); + Py_DECREF(sep); + return ret; +} + +int +_PyAccu_Init(_PyAccu *acc) +{ + /* Lazily allocated */ + acc->large = NULL; + acc->small = PyList_New(0); + if (acc->small == NULL) + return -1; + return 0; +} + +static int +flush_accumulator(_PyAccu *acc) +{ + Py_ssize_t nsmall = PyList_GET_SIZE(acc->small); + if (nsmall) { + int ret; + PyObject *joined; + if (acc->large == NULL) { + acc->large = PyList_New(0); + if (acc->large == NULL) + return -1; + } + joined = join_list_unicode(acc->small); + if (joined == NULL) + return -1; + if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) { + Py_DECREF(joined); + return -1; + } + ret = PyList_Append(acc->large, joined); + Py_DECREF(joined); + return ret; + } + return 0; +} + +int +_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode) +{ + Py_ssize_t nsmall; + assert(PyUnicode_Check(unicode)); + + if (PyList_Append(acc->small, unicode)) + return -1; + nsmall = PyList_GET_SIZE(acc->small); + /* Each item in a list of unicode objects has an overhead (in 64-bit + * builds) of: + * - 8 bytes for the list slot + * - 56 bytes for the header of the unicode object + * that is, 64 bytes. 100000 such objects waste more than 6MB + * compared to a single concatenated string. + */ + if (nsmall < 100000) + return 0; + return flush_accumulator(acc); +} + +PyObject * +_PyAccu_FinishAsList(_PyAccu *acc) +{ + int ret; + PyObject *res; + + ret = flush_accumulator(acc); + Py_CLEAR(acc->small); + if (ret) { + Py_CLEAR(acc->large); + return NULL; + } + res = acc->large; + acc->large = NULL; + return res; +} + +PyObject * +_PyAccu_Finish(_PyAccu *acc) +{ + PyObject *list, *res; + if (acc->large == NULL) { + list = acc->small; + acc->small = NULL; + } + else { + list = _PyAccu_FinishAsList(acc); + if (!list) + return NULL; + } + res = join_list_unicode(list); + Py_DECREF(list); + return res; +} + +void +_PyAccu_Destroy(_PyAccu *acc) +{ + Py_CLEAR(acc->small); + Py_CLEAR(acc->large); +} diff --git a/Objects/listobject.c b/Objects/listobject.c index 36f8b9d..00de597 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -321,70 +321,59 @@ static PyObject * list_repr(PyListObject *v) { Py_ssize_t i; - PyObject *s, *temp; - PyObject *pieces = NULL, *result = NULL; + PyObject *s = NULL; + _PyAccu acc; + static PyObject *sep = NULL; + + if (Py_SIZE(v) == 0) { + return PyUnicode_FromString("[]"); + } + + if (sep == NULL) { + sep = PyUnicode_FromString(", "); + if (sep == NULL) + return NULL; + } i = Py_ReprEnter((PyObject*)v); if (i != 0) { return i > 0 ? PyUnicode_FromString("[...]") : NULL; } - if (Py_SIZE(v) == 0) { - result = PyUnicode_FromString("[]"); - goto Done; - } + if (_PyAccu_Init(&acc)) + goto error; - pieces = PyList_New(0); - if (pieces == NULL) - goto Done; + s = PyUnicode_FromString("["); + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); /* Do repr() on each element. Note that this may mutate the list, so must refetch the list size on each iteration. */ for (i = 0; i < Py_SIZE(v); ++i) { - int status; if (Py_EnterRecursiveCall(" while getting the repr of a list")) - goto Done; + goto error; s = PyObject_Repr(v->ob_item[i]); Py_LeaveRecursiveCall(); - if (s == NULL) - goto Done; - status = PyList_Append(pieces, s); - Py_DECREF(s); /* append created a new ref */ - if (status < 0) - goto Done; + if (i > 0 && _PyAccu_Accumulate(&acc, sep)) + goto error; + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); } + s = PyUnicode_FromString("]"); + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); - /* Add "[]" decorations to the first and last items. */ - assert(PyList_GET_SIZE(pieces) > 0); - s = PyUnicode_FromString("["); - if (s == NULL) - goto Done; - temp = PyList_GET_ITEM(pieces, 0); - PyUnicode_AppendAndDel(&s, temp); - PyList_SET_ITEM(pieces, 0, s); - if (s == NULL) - goto Done; + Py_ReprLeave((PyObject *)v); + return _PyAccu_Finish(&acc); - s = PyUnicode_FromString("]"); - if (s == NULL) - goto Done; - temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1); - PyUnicode_AppendAndDel(&temp, s); - PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp); - if (temp == NULL) - goto Done; - - /* Paste them all together with ", " between. */ - s = PyUnicode_FromString(", "); - if (s == NULL) - goto Done; - result = PyUnicode_Join(s, pieces); - Py_DECREF(s); - -Done: - Py_XDECREF(pieces); +error: + _PyAccu_Destroy(&acc); + Py_XDECREF(s); Py_ReprLeave((PyObject *)v); - return result; + return NULL; } static Py_ssize_t diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 72b79c9..8aacd12 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -240,13 +240,20 @@ static PyObject * tuplerepr(PyTupleObject *v) { Py_ssize_t i, n; - PyObject *s, *temp; - PyObject *pieces, *result = NULL; + PyObject *s = NULL; + _PyAccu acc; + static PyObject *sep = NULL; n = Py_SIZE(v); if (n == 0) return PyUnicode_FromString("()"); + if (sep == NULL) { + sep = PyUnicode_FromString(", "); + if (sep == NULL) + return NULL; + } + /* While not mutable, it is still possible to end up with a cycle in a tuple through an object that stores itself within a tuple (and thus infinitely asks for the repr of itself). This should only be @@ -256,52 +263,42 @@ tuplerepr(PyTupleObject *v) return i > 0 ? PyUnicode_FromString("(...)") : NULL; } - pieces = PyTuple_New(n); - if (pieces == NULL) - return NULL; + if (_PyAccu_Init(&acc)) + goto error; + + s = PyUnicode_FromString("("); + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); /* Do repr() on each element. */ for (i = 0; i < n; ++i) { if (Py_EnterRecursiveCall(" while getting the repr of a tuple")) - goto Done; + goto error; s = PyObject_Repr(v->ob_item[i]); Py_LeaveRecursiveCall(); - if (s == NULL) - goto Done; - PyTuple_SET_ITEM(pieces, i, s); + if (i > 0 && _PyAccu_Accumulate(&acc, sep)) + goto error; + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); } + if (n > 1) + s = PyUnicode_FromString(")"); + else + s = PyUnicode_FromString(",)"); + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); - /* Add "()" decorations to the first and last items. */ - assert(n > 0); - s = PyUnicode_FromString("("); - if (s == NULL) - goto Done; - temp = PyTuple_GET_ITEM(pieces, 0); - PyUnicode_AppendAndDel(&s, temp); - PyTuple_SET_ITEM(pieces, 0, s); - if (s == NULL) - goto Done; - - s = PyUnicode_FromString(n == 1 ? ",)" : ")"); - if (s == NULL) - goto Done; - temp = PyTuple_GET_ITEM(pieces, n-1); - PyUnicode_AppendAndDel(&temp, s); - PyTuple_SET_ITEM(pieces, n-1, temp); - if (temp == NULL) - goto Done; - - /* Paste them all together with ", " between. */ - s = PyUnicode_FromString(", "); - if (s == NULL) - goto Done; - result = PyUnicode_Join(s, pieces); - Py_DECREF(s); - -Done: - Py_DECREF(pieces); Py_ReprLeave((PyObject *)v); - return result; + return _PyAccu_Finish(&acc); + +error: + _PyAccu_Destroy(&acc); + Py_XDECREF(s); + Py_ReprLeave((PyObject *)v); + return NULL; } /* The addend 82520, was selected from the range(0, 1000000) for |