diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2011-10-06 17:04:12 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2011-10-06 17:04:12 (GMT) |
commit | c61c8d7a5e2884238494a678ecd22c812b062280 (patch) | |
tree | 3fd366f94e25c5b3d5cc9430f8fc2b7de869e782 | |
parent | c6f0df7b2095eaf0a6d5914a043f9062f66d19f7 (diff) | |
parent | eeb7eea1f95793437b3e251f47c98446e15fa680 (diff) | |
download | cpython-c61c8d7a5e2884238494a678ecd22c812b062280.zip cpython-c61c8d7a5e2884238494a678ecd22c812b062280.tar.gz cpython-c61c8d7a5e2884238494a678ecd22c812b062280.tar.bz2 |
Issue #12911: Fix memory consumption when calculating the repr() of huge tuples or lists.
This introduces a small private API for this common pattern.
The issue has been discovered thanks to Martin's huge-mem buildbot.
-rw-r--r-- | Include/Python.h | 2 | ||||
-rw-r--r-- | Include/accu.h | 35 | ||||
-rw-r--r-- | Lib/test/test_list.py | 11 | ||||
-rw-r--r-- | Lib/test/test_tuple.py | 10 | ||||
-rw-r--r-- | Makefile.pre.in | 2 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Objects/accu.c | 114 | ||||
-rw-r--r-- | Objects/listobject.c | 81 | ||||
-rw-r--r-- | Objects/tupleobject.c | 75 | ||||
-rw-r--r-- | PC/VC6/pythoncore.dsp | 4 | ||||
-rw-r--r-- | PC/VS7.1/pythoncore.vcproj | 3 | ||||
-rw-r--r-- | PC/VS8.0/pythoncore.vcproj | 8 | ||||
-rw-r--r-- | PCbuild/pythoncore.vcproj | 8 |
13 files changed, 270 insertions, 86 deletions
diff --git a/Include/Python.h b/Include/Python.h index ae384ee..01b98f9 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -101,7 +101,7 @@ #include "warnings.h" #include "weakrefobject.h" #include "structseq.h" - +#include "accu.h" #include "codecs.h" #include "pyerrors.h" diff --git a/Include/accu.h b/Include/accu.h new file mode 100644 index 0000000..9655d37 --- /dev/null +++ b/Include/accu.h @@ -0,0 +1,35 @@ +#ifndef Py_LIMITED_API +#ifndef Py_ACCU_H +#define Py_ACCU_H + +/*** This is a private API for use by the interpreter and the stdlib. + *** Its definition may be changed or removed at any moment. + ***/ + +/* + * A two-level accumulator of unicode objects that avoids both the overhead + * of keeping a huge number of small separate objects, and the quadratic + * behaviour of using a naive repeated concatenation scheme. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + PyObject *large; /* A list of previously accumulated large strings */ + PyObject *small; /* Pending small strings */ +} _PyAccu; + +PyAPI_FUNC(int) _PyAccu_Init(_PyAccu *acc); +PyAPI_FUNC(int) _PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode); +PyAPI_FUNC(PyObject *) _PyAccu_FinishAsList(_PyAccu *acc); +PyAPI_FUNC(PyObject *) _PyAccu_Finish(_PyAccu *acc); +PyAPI_FUNC(void) _PyAccu_Destroy(_PyAccu *acc); + +#ifdef __cplusplus +} +#endif + +#endif /* Py_ACCU_H */ +#endif /* Py_LIMITED_API */ diff --git a/Lib/test/test_list.py b/Lib/test/test_list.py index 3510bd5..d7e6629 100644 --- a/Lib/test/test_list.py +++ b/Lib/test/test_list.py @@ -59,6 +59,17 @@ class ListTest(list_tests.CommonTest): self.assertRaises((MemoryError, OverflowError), mul, lst, n) self.assertRaises((MemoryError, OverflowError), imul, lst, n) + def test_repr_large(self): + # Check the repr of large list objects + def check(n): + l = [0] * n + s = repr(l) + self.assertEqual(s, + '[' + ', '.join(['0'] * n) + ']') + check(10) # check our checking code + check(1000000) + + def test_main(verbose=None): support.run_unittest(ListTest) diff --git a/Lib/test/test_tuple.py b/Lib/test/test_tuple.py index 75fbe45..1464a0e 100644 --- a/Lib/test/test_tuple.py +++ b/Lib/test/test_tuple.py @@ -154,6 +154,16 @@ class TupleTest(seq_tests.CommonTest): # Trying to untrack an unfinished tuple could crash Python self._not_tracked(tuple(gc.collect() for i in range(101))) + def test_repr_large(self): + # Check the repr of large list objects + def check(n): + l = (0,) * n + s = repr(l) + self.assertEqual(s, + '(' + ', '.join(['0'] * n) + ')') + check(10) # check our checking code + check(1000000) + def test_main(): support.run_unittest(TupleTest) diff --git a/Makefile.pre.in b/Makefile.pre.in index 52743e4..51da7a5 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -342,6 +342,7 @@ PYTHON_OBJS= \ # Objects OBJECT_OBJS= \ Objects/abstract.o \ + Objects/accu.o \ Objects/boolobject.o \ Objects/bytes_methods.o \ Objects/bytearrayobject.o \ @@ -661,6 +662,7 @@ $(srcdir)/Objects/typeslots.inc: $(srcdir)/Include/typeslots.h $(srcdir)/Objects PYTHON_HEADERS= \ Include/Python.h \ Include/abstract.h \ + Include/accu.h \ Include/asdl.h \ Include/ast.h \ Include/bltinmodule.h \ @@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1? Core and Builtins ----------------- +- Issue #12911: Fix memory consumption when calculating the repr() of huge + tuples or lists. + - PEP 393: flexible string representation. Thanks to Torsten Becker for the initial implementation, and Victor Stinner for various bug fixes. diff --git a/Objects/accu.c b/Objects/accu.c new file mode 100644 index 0000000..88e8f08 --- /dev/null +++ b/Objects/accu.c @@ -0,0 +1,114 @@ +/* Accumulator struct implementation */ + +#include "Python.h" + +static PyObject * +join_list_unicode(PyObject *lst) +{ + /* return ''.join(lst) */ + PyObject *sep, *ret; + sep = PyUnicode_FromStringAndSize("", 0); + ret = PyUnicode_Join(sep, lst); + Py_DECREF(sep); + return ret; +} + +int +_PyAccu_Init(_PyAccu *acc) +{ + /* Lazily allocated */ + acc->large = NULL; + acc->small = PyList_New(0); + if (acc->small == NULL) + return -1; + return 0; +} + +static int +flush_accumulator(_PyAccu *acc) +{ + Py_ssize_t nsmall = PyList_GET_SIZE(acc->small); + if (nsmall) { + int ret; + PyObject *joined; + if (acc->large == NULL) { + acc->large = PyList_New(0); + if (acc->large == NULL) + return -1; + } + joined = join_list_unicode(acc->small); + if (joined == NULL) + return -1; + if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) { + Py_DECREF(joined); + return -1; + } + ret = PyList_Append(acc->large, joined); + Py_DECREF(joined); + return ret; + } + return 0; +} + +int +_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode) +{ + Py_ssize_t nsmall; + assert(PyUnicode_Check(unicode)); + + if (PyList_Append(acc->small, unicode)) + return -1; + nsmall = PyList_GET_SIZE(acc->small); + /* Each item in a list of unicode objects has an overhead (in 64-bit + * builds) of: + * - 8 bytes for the list slot + * - 56 bytes for the header of the unicode object + * that is, 64 bytes. 100000 such objects waste more than 6MB + * compared to a single concatenated string. + */ + if (nsmall < 100000) + return 0; + return flush_accumulator(acc); +} + +PyObject * +_PyAccu_FinishAsList(_PyAccu *acc) +{ + int ret; + PyObject *res; + + ret = flush_accumulator(acc); + Py_CLEAR(acc->small); + if (ret) { + Py_CLEAR(acc->large); + return NULL; + } + res = acc->large; + acc->large = NULL; + return res; +} + +PyObject * +_PyAccu_Finish(_PyAccu *acc) +{ + PyObject *list, *res; + if (acc->large == NULL) { + list = acc->small; + acc->small = NULL; + } + else { + list = _PyAccu_FinishAsList(acc); + if (!list) + return NULL; + } + res = join_list_unicode(list); + Py_DECREF(list); + return res; +} + +void +_PyAccu_Destroy(_PyAccu *acc) +{ + Py_CLEAR(acc->small); + Py_CLEAR(acc->large); +} diff --git a/Objects/listobject.c b/Objects/listobject.c index 1c516e9..28d94e7 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -321,70 +321,59 @@ static PyObject * list_repr(PyListObject *v) { Py_ssize_t i; - PyObject *s, *temp; - PyObject *pieces = NULL, *result = NULL; + PyObject *s = NULL; + _PyAccu acc; + static PyObject *sep = NULL; + + if (Py_SIZE(v) == 0) { + return PyUnicode_FromString("[]"); + } + + if (sep == NULL) { + sep = PyUnicode_FromString(", "); + if (sep == NULL) + return NULL; + } i = Py_ReprEnter((PyObject*)v); if (i != 0) { return i > 0 ? PyUnicode_FromString("[...]") : NULL; } - if (Py_SIZE(v) == 0) { - result = PyUnicode_FromString("[]"); - goto Done; - } + if (_PyAccu_Init(&acc)) + goto error; - pieces = PyList_New(0); - if (pieces == NULL) - goto Done; + s = PyUnicode_FromString("["); + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); /* Do repr() on each element. Note that this may mutate the list, so must refetch the list size on each iteration. */ for (i = 0; i < Py_SIZE(v); ++i) { - int status; if (Py_EnterRecursiveCall(" while getting the repr of a list")) - goto Done; + goto error; s = PyObject_Repr(v->ob_item[i]); Py_LeaveRecursiveCall(); - if (s == NULL) - goto Done; - status = PyList_Append(pieces, s); - Py_DECREF(s); /* append created a new ref */ - if (status < 0) - goto Done; + if (i > 0 && _PyAccu_Accumulate(&acc, sep)) + goto error; + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); } + s = PyUnicode_FromString("]"); + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); - /* Add "[]" decorations to the first and last items. */ - assert(PyList_GET_SIZE(pieces) > 0); - s = PyUnicode_FromString("["); - if (s == NULL) - goto Done; - temp = PyList_GET_ITEM(pieces, 0); - PyUnicode_AppendAndDel(&s, temp); - PyList_SET_ITEM(pieces, 0, s); - if (s == NULL) - goto Done; + Py_ReprLeave((PyObject *)v); + return _PyAccu_Finish(&acc); - s = PyUnicode_FromString("]"); - if (s == NULL) - goto Done; - temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1); - PyUnicode_AppendAndDel(&temp, s); - PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp); - if (temp == NULL) - goto Done; - - /* Paste them all together with ", " between. */ - s = PyUnicode_FromString(", "); - if (s == NULL) - goto Done; - result = PyUnicode_Join(s, pieces); - Py_DECREF(s); - -Done: - Py_XDECREF(pieces); +error: + _PyAccu_Destroy(&acc); + Py_XDECREF(s); Py_ReprLeave((PyObject *)v); - return result; + return NULL; } static Py_ssize_t diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index ddb69e4..54a580d 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -240,13 +240,20 @@ static PyObject * tuplerepr(PyTupleObject *v) { Py_ssize_t i, n; - PyObject *s, *temp; - PyObject *pieces, *result = NULL; + PyObject *s = NULL; + _PyAccu acc; + static PyObject *sep = NULL; n = Py_SIZE(v); if (n == 0) return PyUnicode_FromString("()"); + if (sep == NULL) { + sep = PyUnicode_FromString(", "); + if (sep == NULL) + return NULL; + } + /* While not mutable, it is still possible to end up with a cycle in a tuple through an object that stores itself within a tuple (and thus infinitely asks for the repr of itself). This should only be @@ -256,52 +263,42 @@ tuplerepr(PyTupleObject *v) return i > 0 ? PyUnicode_FromString("(...)") : NULL; } - pieces = PyTuple_New(n); - if (pieces == NULL) - return NULL; + if (_PyAccu_Init(&acc)) + goto error; + + s = PyUnicode_FromString("("); + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); /* Do repr() on each element. */ for (i = 0; i < n; ++i) { if (Py_EnterRecursiveCall(" while getting the repr of a tuple")) - goto Done; + goto error; s = PyObject_Repr(v->ob_item[i]); Py_LeaveRecursiveCall(); - if (s == NULL) - goto Done; - PyTuple_SET_ITEM(pieces, i, s); + if (i > 0 && _PyAccu_Accumulate(&acc, sep)) + goto error; + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); } + if (n > 1) + s = PyUnicode_FromString(")"); + else + s = PyUnicode_FromString(",)"); + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); - /* Add "()" decorations to the first and last items. */ - assert(n > 0); - s = PyUnicode_FromString("("); - if (s == NULL) - goto Done; - temp = PyTuple_GET_ITEM(pieces, 0); - PyUnicode_AppendAndDel(&s, temp); - PyTuple_SET_ITEM(pieces, 0, s); - if (s == NULL) - goto Done; - - s = PyUnicode_FromString(n == 1 ? ",)" : ")"); - if (s == NULL) - goto Done; - temp = PyTuple_GET_ITEM(pieces, n-1); - PyUnicode_AppendAndDel(&temp, s); - PyTuple_SET_ITEM(pieces, n-1, temp); - if (temp == NULL) - goto Done; - - /* Paste them all together with ", " between. */ - s = PyUnicode_FromString(", "); - if (s == NULL) - goto Done; - result = PyUnicode_Join(s, pieces); - Py_DECREF(s); - -Done: - Py_DECREF(pieces); Py_ReprLeave((PyObject *)v); - return result; + return _PyAccu_Finish(&acc); + +error: + _PyAccu_Destroy(&acc); + Py_XDECREF(s); + Py_ReprLeave((PyObject *)v); + return NULL; } /* The addend 82520, was selected from the range(0, 1000000) for diff --git a/PC/VC6/pythoncore.dsp b/PC/VC6/pythoncore.dsp index 60074ed..61247b7 100644 --- a/PC/VC6/pythoncore.dsp +++ b/PC/VC6/pythoncore.dsp @@ -205,6 +205,10 @@ SOURCE=..\..\Objects\abstract.c # End Source File
# Begin Source File
+SOURCE=..\..\Objects\accu.c
+# End Source File
+# Begin Source File
+
SOURCE=..\..\Parser\acceler.c
# End Source File
# Begin Source File
diff --git a/PC/VS7.1/pythoncore.vcproj b/PC/VS7.1/pythoncore.vcproj index 205dd8f..d882911 100644 --- a/PC/VS7.1/pythoncore.vcproj +++ b/PC/VS7.1/pythoncore.vcproj @@ -445,6 +445,9 @@ RelativePath="..\..\Objects\abstract.c">
</File>
<File
+ RelativePath="..\..\Objects\accu.c">
+ </File>
+ <File
RelativePath="..\..\Parser\acceler.c">
</File>
<File
diff --git a/PC/VS8.0/pythoncore.vcproj b/PC/VS8.0/pythoncore.vcproj index 7252bcd..5e1ef1a 100644 --- a/PC/VS8.0/pythoncore.vcproj +++ b/PC/VS8.0/pythoncore.vcproj @@ -635,6 +635,10 @@ >
</File>
<File
+ RelativePath="..\..\Include\accu.h"
+ >
+ </File>
+ <File
RelativePath="..\..\Include\asdl.h"
>
</File>
@@ -1447,6 +1451,10 @@ >
</File>
<File
+ RelativePath="..\..\Objects\accu.c"
+ >
+ </File>
+ <File
RelativePath="..\..\Objects\boolobject.c"
>
</File>
diff --git a/PCbuild/pythoncore.vcproj b/PCbuild/pythoncore.vcproj index 7e9f472..69a8ca8 100644 --- a/PCbuild/pythoncore.vcproj +++ b/PCbuild/pythoncore.vcproj @@ -635,6 +635,10 @@ >
</File>
<File
+ RelativePath="..\Include\accu.h"
+ >
+ </File>
+ <File
RelativePath="..\Include\asdl.h"
>
</File>
@@ -1455,6 +1459,10 @@ >
</File>
<File
+ RelativePath="..\Objects\accu.c"
+ >
+ </File>
+ <File
RelativePath="..\Objects\boolobject.c"
>
</File>
|