summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2011-10-06 17:04:12 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2011-10-06 17:04:12 (GMT)
commitc61c8d7a5e2884238494a678ecd22c812b062280 (patch)
tree3fd366f94e25c5b3d5cc9430f8fc2b7de869e782
parentc6f0df7b2095eaf0a6d5914a043f9062f66d19f7 (diff)
parenteeb7eea1f95793437b3e251f47c98446e15fa680 (diff)
downloadcpython-c61c8d7a5e2884238494a678ecd22c812b062280.zip
cpython-c61c8d7a5e2884238494a678ecd22c812b062280.tar.gz
cpython-c61c8d7a5e2884238494a678ecd22c812b062280.tar.bz2
Issue #12911: Fix memory consumption when calculating the repr() of huge tuples or lists.
This introduces a small private API for this common pattern. The issue has been discovered thanks to Martin's huge-mem buildbot.
-rw-r--r--Include/Python.h2
-rw-r--r--Include/accu.h35
-rw-r--r--Lib/test/test_list.py11
-rw-r--r--Lib/test/test_tuple.py10
-rw-r--r--Makefile.pre.in2
-rw-r--r--Misc/NEWS3
-rw-r--r--Objects/accu.c114
-rw-r--r--Objects/listobject.c81
-rw-r--r--Objects/tupleobject.c75
-rw-r--r--PC/VC6/pythoncore.dsp4
-rw-r--r--PC/VS7.1/pythoncore.vcproj3
-rw-r--r--PC/VS8.0/pythoncore.vcproj8
-rw-r--r--PCbuild/pythoncore.vcproj8
13 files changed, 270 insertions, 86 deletions
diff --git a/Include/Python.h b/Include/Python.h
index ae384ee..01b98f9 100644
--- a/Include/Python.h
+++ b/Include/Python.h
@@ -101,7 +101,7 @@
#include "warnings.h"
#include "weakrefobject.h"
#include "structseq.h"
-
+#include "accu.h"
#include "codecs.h"
#include "pyerrors.h"
diff --git a/Include/accu.h b/Include/accu.h
new file mode 100644
index 0000000..9655d37
--- /dev/null
+++ b/Include/accu.h
@@ -0,0 +1,35 @@
+#ifndef Py_LIMITED_API
+#ifndef Py_ACCU_H
+#define Py_ACCU_H
+
+/*** This is a private API for use by the interpreter and the stdlib.
+ *** Its definition may be changed or removed at any moment.
+ ***/
+
+/*
+ * A two-level accumulator of unicode objects that avoids both the overhead
+ * of keeping a huge number of small separate objects, and the quadratic
+ * behaviour of using a naive repeated concatenation scheme.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ PyObject *large; /* A list of previously accumulated large strings */
+ PyObject *small; /* Pending small strings */
+} _PyAccu;
+
+PyAPI_FUNC(int) _PyAccu_Init(_PyAccu *acc);
+PyAPI_FUNC(int) _PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode);
+PyAPI_FUNC(PyObject *) _PyAccu_FinishAsList(_PyAccu *acc);
+PyAPI_FUNC(PyObject *) _PyAccu_Finish(_PyAccu *acc);
+PyAPI_FUNC(void) _PyAccu_Destroy(_PyAccu *acc);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* Py_ACCU_H */
+#endif /* Py_LIMITED_API */
diff --git a/Lib/test/test_list.py b/Lib/test/test_list.py
index 3510bd5..d7e6629 100644
--- a/Lib/test/test_list.py
+++ b/Lib/test/test_list.py
@@ -59,6 +59,17 @@ class ListTest(list_tests.CommonTest):
self.assertRaises((MemoryError, OverflowError), mul, lst, n)
self.assertRaises((MemoryError, OverflowError), imul, lst, n)
+ def test_repr_large(self):
+ # Check the repr of large list objects
+ def check(n):
+ l = [0] * n
+ s = repr(l)
+ self.assertEqual(s,
+ '[' + ', '.join(['0'] * n) + ']')
+ check(10) # check our checking code
+ check(1000000)
+
+
def test_main(verbose=None):
support.run_unittest(ListTest)
diff --git a/Lib/test/test_tuple.py b/Lib/test/test_tuple.py
index 75fbe45..1464a0e 100644
--- a/Lib/test/test_tuple.py
+++ b/Lib/test/test_tuple.py
@@ -154,6 +154,16 @@ class TupleTest(seq_tests.CommonTest):
# Trying to untrack an unfinished tuple could crash Python
self._not_tracked(tuple(gc.collect() for i in range(101)))
+ def test_repr_large(self):
+ # Check the repr of large list objects
+ def check(n):
+ l = (0,) * n
+ s = repr(l)
+ self.assertEqual(s,
+ '(' + ', '.join(['0'] * n) + ')')
+ check(10) # check our checking code
+ check(1000000)
+
def test_main():
support.run_unittest(TupleTest)
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 52743e4..51da7a5 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -342,6 +342,7 @@ PYTHON_OBJS= \
# Objects
OBJECT_OBJS= \
Objects/abstract.o \
+ Objects/accu.o \
Objects/boolobject.o \
Objects/bytes_methods.o \
Objects/bytearrayobject.o \
@@ -661,6 +662,7 @@ $(srcdir)/Objects/typeslots.inc: $(srcdir)/Include/typeslots.h $(srcdir)/Objects
PYTHON_HEADERS= \
Include/Python.h \
Include/abstract.h \
+ Include/accu.h \
Include/asdl.h \
Include/ast.h \
Include/bltinmodule.h \
diff --git a/Misc/NEWS b/Misc/NEWS
index 9564891..af31f9f 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins
-----------------
+- Issue #12911: Fix memory consumption when calculating the repr() of huge
+ tuples or lists.
+
- PEP 393: flexible string representation. Thanks to Torsten Becker for the
initial implementation, and Victor Stinner for various bug fixes.
diff --git a/Objects/accu.c b/Objects/accu.c
new file mode 100644
index 0000000..88e8f08
--- /dev/null
+++ b/Objects/accu.c
@@ -0,0 +1,114 @@
+/* Accumulator struct implementation */
+
+#include "Python.h"
+
+static PyObject *
+join_list_unicode(PyObject *lst)
+{
+ /* return ''.join(lst) */
+ PyObject *sep, *ret;
+ sep = PyUnicode_FromStringAndSize("", 0);
+ ret = PyUnicode_Join(sep, lst);
+ Py_DECREF(sep);
+ return ret;
+}
+
+int
+_PyAccu_Init(_PyAccu *acc)
+{
+ /* Lazily allocated */
+ acc->large = NULL;
+ acc->small = PyList_New(0);
+ if (acc->small == NULL)
+ return -1;
+ return 0;
+}
+
+static int
+flush_accumulator(_PyAccu *acc)
+{
+ Py_ssize_t nsmall = PyList_GET_SIZE(acc->small);
+ if (nsmall) {
+ int ret;
+ PyObject *joined;
+ if (acc->large == NULL) {
+ acc->large = PyList_New(0);
+ if (acc->large == NULL)
+ return -1;
+ }
+ joined = join_list_unicode(acc->small);
+ if (joined == NULL)
+ return -1;
+ if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
+ Py_DECREF(joined);
+ return -1;
+ }
+ ret = PyList_Append(acc->large, joined);
+ Py_DECREF(joined);
+ return ret;
+ }
+ return 0;
+}
+
+int
+_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode)
+{
+ Py_ssize_t nsmall;
+ assert(PyUnicode_Check(unicode));
+
+ if (PyList_Append(acc->small, unicode))
+ return -1;
+ nsmall = PyList_GET_SIZE(acc->small);
+ /* Each item in a list of unicode objects has an overhead (in 64-bit
+ * builds) of:
+ * - 8 bytes for the list slot
+ * - 56 bytes for the header of the unicode object
+ * that is, 64 bytes. 100000 such objects waste more than 6MB
+ * compared to a single concatenated string.
+ */
+ if (nsmall < 100000)
+ return 0;
+ return flush_accumulator(acc);
+}
+
+PyObject *
+_PyAccu_FinishAsList(_PyAccu *acc)
+{
+ int ret;
+ PyObject *res;
+
+ ret = flush_accumulator(acc);
+ Py_CLEAR(acc->small);
+ if (ret) {
+ Py_CLEAR(acc->large);
+ return NULL;
+ }
+ res = acc->large;
+ acc->large = NULL;
+ return res;
+}
+
+PyObject *
+_PyAccu_Finish(_PyAccu *acc)
+{
+ PyObject *list, *res;
+ if (acc->large == NULL) {
+ list = acc->small;
+ acc->small = NULL;
+ }
+ else {
+ list = _PyAccu_FinishAsList(acc);
+ if (!list)
+ return NULL;
+ }
+ res = join_list_unicode(list);
+ Py_DECREF(list);
+ return res;
+}
+
+void
+_PyAccu_Destroy(_PyAccu *acc)
+{
+ Py_CLEAR(acc->small);
+ Py_CLEAR(acc->large);
+}
diff --git a/Objects/listobject.c b/Objects/listobject.c
index 1c516e9..28d94e7 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -321,70 +321,59 @@ static PyObject *
list_repr(PyListObject *v)
{
Py_ssize_t i;
- PyObject *s, *temp;
- PyObject *pieces = NULL, *result = NULL;
+ PyObject *s = NULL;
+ _PyAccu acc;
+ static PyObject *sep = NULL;
+
+ if (Py_SIZE(v) == 0) {
+ return PyUnicode_FromString("[]");
+ }
+
+ if (sep == NULL) {
+ sep = PyUnicode_FromString(", ");
+ if (sep == NULL)
+ return NULL;
+ }
i = Py_ReprEnter((PyObject*)v);
if (i != 0) {
return i > 0 ? PyUnicode_FromString("[...]") : NULL;
}
- if (Py_SIZE(v) == 0) {
- result = PyUnicode_FromString("[]");
- goto Done;
- }
+ if (_PyAccu_Init(&acc))
+ goto error;
- pieces = PyList_New(0);
- if (pieces == NULL)
- goto Done;
+ s = PyUnicode_FromString("[");
+ if (s == NULL || _PyAccu_Accumulate(&acc, s))
+ goto error;
+ Py_CLEAR(s);
/* Do repr() on each element. Note that this may mutate the list,
so must refetch the list size on each iteration. */
for (i = 0; i < Py_SIZE(v); ++i) {
- int status;
if (Py_EnterRecursiveCall(" while getting the repr of a list"))
- goto Done;
+ goto error;
s = PyObject_Repr(v->ob_item[i]);
Py_LeaveRecursiveCall();
- if (s == NULL)
- goto Done;
- status = PyList_Append(pieces, s);
- Py_DECREF(s); /* append created a new ref */
- if (status < 0)
- goto Done;
+ if (i > 0 && _PyAccu_Accumulate(&acc, sep))
+ goto error;
+ if (s == NULL || _PyAccu_Accumulate(&acc, s))
+ goto error;
+ Py_CLEAR(s);
}
+ s = PyUnicode_FromString("]");
+ if (s == NULL || _PyAccu_Accumulate(&acc, s))
+ goto error;
+ Py_CLEAR(s);
- /* Add "[]" decorations to the first and last items. */
- assert(PyList_GET_SIZE(pieces) > 0);
- s = PyUnicode_FromString("[");
- if (s == NULL)
- goto Done;
- temp = PyList_GET_ITEM(pieces, 0);
- PyUnicode_AppendAndDel(&s, temp);
- PyList_SET_ITEM(pieces, 0, s);
- if (s == NULL)
- goto Done;
+ Py_ReprLeave((PyObject *)v);
+ return _PyAccu_Finish(&acc);
- s = PyUnicode_FromString("]");
- if (s == NULL)
- goto Done;
- temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1);
- PyUnicode_AppendAndDel(&temp, s);
- PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp);
- if (temp == NULL)
- goto Done;
-
- /* Paste them all together with ", " between. */
- s = PyUnicode_FromString(", ");
- if (s == NULL)
- goto Done;
- result = PyUnicode_Join(s, pieces);
- Py_DECREF(s);
-
-Done:
- Py_XDECREF(pieces);
+error:
+ _PyAccu_Destroy(&acc);
+ Py_XDECREF(s);
Py_ReprLeave((PyObject *)v);
- return result;
+ return NULL;
}
static Py_ssize_t
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index ddb69e4..54a580d 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -240,13 +240,20 @@ static PyObject *
tuplerepr(PyTupleObject *v)
{
Py_ssize_t i, n;
- PyObject *s, *temp;
- PyObject *pieces, *result = NULL;
+ PyObject *s = NULL;
+ _PyAccu acc;
+ static PyObject *sep = NULL;
n = Py_SIZE(v);
if (n == 0)
return PyUnicode_FromString("()");
+ if (sep == NULL) {
+ sep = PyUnicode_FromString(", ");
+ if (sep == NULL)
+ return NULL;
+ }
+
/* While not mutable, it is still possible to end up with a cycle in a
tuple through an object that stores itself within a tuple (and thus
infinitely asks for the repr of itself). This should only be
@@ -256,52 +263,42 @@ tuplerepr(PyTupleObject *v)
return i > 0 ? PyUnicode_FromString("(...)") : NULL;
}
- pieces = PyTuple_New(n);
- if (pieces == NULL)
- return NULL;
+ if (_PyAccu_Init(&acc))
+ goto error;
+
+ s = PyUnicode_FromString("(");
+ if (s == NULL || _PyAccu_Accumulate(&acc, s))
+ goto error;
+ Py_CLEAR(s);
/* Do repr() on each element. */
for (i = 0; i < n; ++i) {
if (Py_EnterRecursiveCall(" while getting the repr of a tuple"))
- goto Done;
+ goto error;
s = PyObject_Repr(v->ob_item[i]);
Py_LeaveRecursiveCall();
- if (s == NULL)
- goto Done;
- PyTuple_SET_ITEM(pieces, i, s);
+ if (i > 0 && _PyAccu_Accumulate(&acc, sep))
+ goto error;
+ if (s == NULL || _PyAccu_Accumulate(&acc, s))
+ goto error;
+ Py_CLEAR(s);
}
+ if (n > 1)
+ s = PyUnicode_FromString(")");
+ else
+ s = PyUnicode_FromString(",)");
+ if (s == NULL || _PyAccu_Accumulate(&acc, s))
+ goto error;
+ Py_CLEAR(s);
- /* Add "()" decorations to the first and last items. */
- assert(n > 0);
- s = PyUnicode_FromString("(");
- if (s == NULL)
- goto Done;
- temp = PyTuple_GET_ITEM(pieces, 0);
- PyUnicode_AppendAndDel(&s, temp);
- PyTuple_SET_ITEM(pieces, 0, s);
- if (s == NULL)
- goto Done;
-
- s = PyUnicode_FromString(n == 1 ? ",)" : ")");
- if (s == NULL)
- goto Done;
- temp = PyTuple_GET_ITEM(pieces, n-1);
- PyUnicode_AppendAndDel(&temp, s);
- PyTuple_SET_ITEM(pieces, n-1, temp);
- if (temp == NULL)
- goto Done;
-
- /* Paste them all together with ", " between. */
- s = PyUnicode_FromString(", ");
- if (s == NULL)
- goto Done;
- result = PyUnicode_Join(s, pieces);
- Py_DECREF(s);
-
-Done:
- Py_DECREF(pieces);
Py_ReprLeave((PyObject *)v);
- return result;
+ return _PyAccu_Finish(&acc);
+
+error:
+ _PyAccu_Destroy(&acc);
+ Py_XDECREF(s);
+ Py_ReprLeave((PyObject *)v);
+ return NULL;
}
/* The addend 82520, was selected from the range(0, 1000000) for
diff --git a/PC/VC6/pythoncore.dsp b/PC/VC6/pythoncore.dsp
index 60074ed..61247b7 100644
--- a/PC/VC6/pythoncore.dsp
+++ b/PC/VC6/pythoncore.dsp
@@ -205,6 +205,10 @@ SOURCE=..\..\Objects\abstract.c
# End Source File
# Begin Source File
+SOURCE=..\..\Objects\accu.c
+# End Source File
+# Begin Source File
+
SOURCE=..\..\Parser\acceler.c
# End Source File
# Begin Source File
diff --git a/PC/VS7.1/pythoncore.vcproj b/PC/VS7.1/pythoncore.vcproj
index 205dd8f..d882911 100644
--- a/PC/VS7.1/pythoncore.vcproj
+++ b/PC/VS7.1/pythoncore.vcproj
@@ -445,6 +445,9 @@
RelativePath="..\..\Objects\abstract.c">
</File>
<File
+ RelativePath="..\..\Objects\accu.c">
+ </File>
+ <File
RelativePath="..\..\Parser\acceler.c">
</File>
<File
diff --git a/PC/VS8.0/pythoncore.vcproj b/PC/VS8.0/pythoncore.vcproj
index 7252bcd..5e1ef1a 100644
--- a/PC/VS8.0/pythoncore.vcproj
+++ b/PC/VS8.0/pythoncore.vcproj
@@ -635,6 +635,10 @@
>
</File>
<File
+ RelativePath="..\..\Include\accu.h"
+ >
+ </File>
+ <File
RelativePath="..\..\Include\asdl.h"
>
</File>
@@ -1447,6 +1451,10 @@
>
</File>
<File
+ RelativePath="..\..\Objects\accu.c"
+ >
+ </File>
+ <File
RelativePath="..\..\Objects\boolobject.c"
>
</File>
diff --git a/PCbuild/pythoncore.vcproj b/PCbuild/pythoncore.vcproj
index 7e9f472..69a8ca8 100644
--- a/PCbuild/pythoncore.vcproj
+++ b/PCbuild/pythoncore.vcproj
@@ -635,6 +635,10 @@
>
</File>
<File
+ RelativePath="..\Include\accu.h"
+ >
+ </File>
+ <File
RelativePath="..\Include\asdl.h"
>
</File>
@@ -1455,6 +1459,10 @@
>
</File>
<File
+ RelativePath="..\Objects\accu.c"
+ >
+ </File>
+ <File
RelativePath="..\Objects\boolobject.c"
>
</File>