summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPablo Galindo <Pablogsal@gmail.com>2018-10-28 20:16:26 (GMT)
committerGitHub <noreply@github.com>2018-10-28 20:16:26 (GMT)
commit372d705d958964289d762953d0a61622755f5386 (patch)
treef6a01b7f97739c53a8a99812ce690cbd7058588c
parent569d12f44847f18fc5b514b24e8ab901b0d96895 (diff)
downloadcpython-372d705d958964289d762953d0a61622755f5386.zip
cpython-372d705d958964289d762953d0a61622755f5386.tar.gz
cpython-372d705d958964289d762953d0a61622755f5386.tar.bz2
bpo-33234 Improve list() pre-sizing for inputs with known lengths (GH-9846)
The list() constructor isn't taking full advantage of known input lengths or length hints. This commit makes the constructor pre-size and not over-allocate when the input size is known (the input collection implements __len__). One on the main advantages is that this provides 12% difference in memory savings due to the difference between overallocating and allocating exactly the input size. For efficiency purposes and to avoid a performance regression for small generators and collections, the size of the input object is calculated using __len__ and not __length_hint__, as the later is considerably slower.
-rw-r--r--Lib/test/test_list.py9
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst2
-rw-r--r--Objects/listobject.c40
3 files changed, 51 insertions, 0 deletions
diff --git a/Lib/test/test_list.py b/Lib/test/test_list.py
index def4bad..c5002b1 100644
--- a/Lib/test/test_list.py
+++ b/Lib/test/test_list.py
@@ -1,5 +1,6 @@
import sys
from test import list_tests
+from test.support import cpython_only
import pickle
import unittest
@@ -157,5 +158,13 @@ class ListTest(list_tests.CommonTest):
with self.assertRaises(TypeError):
(3,) + L([1,2])
+ @cpython_only
+ def test_preallocation(self):
+ iterable = [0] * 10
+ iter_size = sys.getsizeof(iterable)
+
+ self.assertEqual(iter_size, sys.getsizeof(list([0] * 10)))
+ self.assertEqual(iter_size, sys.getsizeof(list(range(10))))
+
if __name__ == "__main__":
unittest.main()
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst b/Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst
new file mode 100644
index 0000000..2f9cd62
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst
@@ -0,0 +1,2 @@
+The list constructor will pre-size and not over-allocate when
+the input lenght is known.
diff --git a/Objects/listobject.c b/Objects/listobject.c
index fa26444..e85fa5c 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -76,6 +76,33 @@ list_resize(PyListObject *self, Py_ssize_t newsize)
return 0;
}
+static int
+list_preallocate_exact(PyListObject *self, Py_ssize_t size)
+{
+ assert(self->ob_item == NULL);
+
+ PyObject **items;
+ size_t allocated;
+
+ allocated = (size_t)size;
+ if (allocated > (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ if (size == 0) {
+ allocated = 0;
+ }
+ items = (PyObject **)PyMem_New(PyObject*, allocated);
+ if (items == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ self->ob_item = items;
+ self->allocated = allocated;
+ return 0;
+}
+
/* Debug statistic to compare allocations with reuse through the free list */
#undef SHOW_ALLOC_COUNT
#ifdef SHOW_ALLOC_COUNT
@@ -2683,6 +2710,19 @@ list___init___impl(PyListObject *self, PyObject *iterable)
(void)_list_clear(self);
}
if (iterable != NULL) {
+ if (_PyObject_HasLen(iterable)) {
+ Py_ssize_t iter_len = PyObject_Size(iterable);
+ if (iter_len == -1) {
+ if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
+ return -1;
+ }
+ PyErr_Clear();
+ }
+ if (iter_len > 0 && self->ob_item == NULL
+ && list_preallocate_exact(self, iter_len)) {
+ return -1;
+ }
+ }
PyObject *rv = list_extend(self, iterable);
if (rv == NULL)
return -1;