summaryrefslogtreecommitdiffstats
path: root/Objects/dictobject.c
diff options
context:
space:
mode:
authorInada Naoki <songofacandy@gmail.com>2020-08-07 05:08:55 (GMT)
committerGitHub <noreply@github.com>2020-08-07 05:08:55 (GMT)
commitd9323a8c6e07071a59dc4c910661db33236c01b2 (patch)
tree113cc103733582d9bc58875aba732f886481fa62 /Objects/dictobject.c
parent5f0769a7529fcbc28190864f098f192053a10747 (diff)
downloadcpython-d9323a8c6e07071a59dc4c910661db33236c01b2.zip
cpython-d9323a8c6e07071a59dc4c910661db33236c01b2.tar.gz
cpython-d9323a8c6e07071a59dc4c910661db33236c01b2.tar.bz2
bpo-41493: Refactoring dictresize (GH-21751)
Split newsize calculation into new function. dictresize() now accepts exact newsize.
Diffstat (limited to 'Objects/dictobject.c')
-rw-r--r--Objects/dictobject.c67
1 files changed, 41 insertions, 26 deletions
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 1b7ae06..6c3fc62 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -111,6 +111,7 @@ converting the dict to the combined table.
#define PyDict_MINSIZE 8
#include "Python.h"
+#include "pycore_bitutils.h" // _Py_bit_length
#include "pycore_gc.h" // _PyObject_GC_IS_TRACKED()
#include "pycore_object.h" // _PyObject_GC_TRACK()
#include "pycore_pyerrors.h" // _PyErr_Fetch()
@@ -236,7 +237,7 @@ lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject **value_addr);
-static int dictresize(PyDictObject *mp, Py_ssize_t minused);
+static int dictresize(PyDictObject *mp, Py_ssize_t newsize);
static PyObject* dict_iter(PyDictObject *dict);
@@ -411,18 +412,40 @@ dictkeys_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
*/
#define USABLE_FRACTION(n) (((n) << 1)/3)
-/* ESTIMATE_SIZE is reverse function of USABLE_FRACTION.
+/* Find the smallest dk_size >= minsize. */
+static inline Py_ssize_t
+calculate_keysize(Py_ssize_t minsize)
+{
+#if SIZEOF_LONG == SIZEOF_SIZE_T
+ minsize = (minsize | PyDict_MINSIZE) - 1;
+ return 1LL << _Py_bit_length(minsize | (PyDict_MINSIZE-1));
+#elif defined(_MSC_VER)
+ // On 64bit Windows, sizeof(long) == 4.
+ minsize = (minsize | PyDict_MINSIZE) - 1;
+ unsigned long msb;
+ _BitScanReverse64(&msb, (uint64_t)minsize);
+ return 1LL << (msb + 1);
+#else
+ Py_ssize_t size;
+ for (size = PyDict_MINSIZE;
+ size < minsize && size > 0;
+ size <<= 1)
+ ;
+ return size;
+#endif
+}
+
+/* estimate_keysize is reverse function of USABLE_FRACTION.
+ *
* This can be used to reserve enough size to insert n entries without
* resizing.
*/
-#define ESTIMATE_SIZE(n) (((n)*3+1) >> 1)
+static inline Py_ssize_t
+estimate_keysize(Py_ssize_t n)
+{
+ return calculate_keysize((n*3 + 1) / 2);
+}
-/* Alternative fraction that is otherwise close enough to 2n/3 to make
- * little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10.
- * 32 * 2/3 = 21, 32 * 5/8 = 20.
- * Its advantage is that it is faster to compute on machines with slow division.
- * #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3))
- */
/* GROWTH_RATE. Growth rate upon hitting maximum load.
* Currently set to used*3.
@@ -1036,7 +1059,7 @@ find_empty_slot(PyDictKeysObject *keys, Py_hash_t hash)
static int
insertion_resize(PyDictObject *mp)
{
- return dictresize(mp, GROWTH_RATE(mp));
+ return dictresize(mp, calculate_keysize(GROWTH_RATE(mp)));
}
/*
@@ -1194,22 +1217,19 @@ After resizing a table is always combined,
but can be resplit by make_keys_shared().
*/
static int
-dictresize(PyDictObject *mp, Py_ssize_t minsize)
+dictresize(PyDictObject *mp, Py_ssize_t newsize)
{
- Py_ssize_t newsize, numentries;
+ Py_ssize_t numentries;
PyDictKeysObject *oldkeys;
PyObject **oldvalues;
PyDictKeyEntry *oldentries, *newentries;
- /* Find the smallest table size > minused. */
- for (newsize = PyDict_MINSIZE;
- newsize < minsize && newsize > 0;
- newsize <<= 1)
- ;
if (newsize <= 0) {
PyErr_NoMemory();
return -1;
}
+ assert(IS_POWER_OF_2(newsize));
+ assert(newsize >= PyDict_MINSIZE);
oldkeys = mp->ma_keys;
@@ -1355,13 +1375,8 @@ _PyDict_NewPresized(Py_ssize_t minused)
newsize = max_presize;
}
else {
- Py_ssize_t minsize = ESTIMATE_SIZE(minused);
- newsize = PyDict_MINSIZE*2;
- while (newsize < minsize) {
- newsize <<= 1;
- }
+ newsize = estimate_keysize(minused);
}
- assert(IS_POWER_OF_2(newsize));
new_keys = new_keys_object(newsize);
if (new_keys == NULL)
@@ -1930,7 +1945,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
PyObject *key;
Py_hash_t hash;
- if (dictresize(mp, ESTIMATE_SIZE(PyDict_GET_SIZE(iterable)))) {
+ if (dictresize(mp, estimate_keysize(PyDict_GET_SIZE(iterable)))) {
Py_DECREF(d);
return NULL;
}
@@ -1949,7 +1964,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
PyObject *key;
Py_hash_t hash;
- if (dictresize(mp, ESTIMATE_SIZE(PySet_GET_SIZE(iterable)))) {
+ if (dictresize(mp, estimate_keysize(PySet_GET_SIZE(iterable)))) {
Py_DECREF(d);
return NULL;
}
@@ -2558,7 +2573,7 @@ dict_merge(PyObject *a, PyObject *b, int override)
* that there will be no (or few) overlapping keys.
*/
if (USABLE_FRACTION(mp->ma_keys->dk_size) < other->ma_used) {
- if (dictresize(mp, ESTIMATE_SIZE(mp->ma_used + other->ma_used))) {
+ if (dictresize(mp, estimate_keysize(mp->ma_used + other->ma_used))) {
return -1;
}
}