summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Objects/dictnotes.txt38
-rw-r--r--Objects/dictobject.c24
2 files changed, 18 insertions, 44 deletions
diff --git a/Objects/dictnotes.txt b/Objects/dictnotes.txt
index a38b052..f89720c 100644
--- a/Objects/dictnotes.txt
+++ b/Objects/dictnotes.txt
@@ -70,42 +70,8 @@ A values array
Tunable Dictionary Parameters
-----------------------------
-* PyDict_STARTSIZE. Starting size of dict (unless an instance dict).
- Currently set to 8. Must be a power of two.
- New dicts have to zero-out every cell.
- Increasing improves the sparseness of small dictionaries but costs
- time to read in the additional cache lines if they are not already
- in cache. That case is common when keyword arguments are passed.
- Prior to version 3.3, PyDict_MINSIZE was used as the starting size
- of a new dict.
-
-* PyDict_MINSIZE. Minimum size of a dict.
- Currently set to 4 (to keep instance dicts small).
- Must be a power of two. Prior to version 3.3, PyDict_MINSIZE was
- set to 8.
-
-* USABLE_FRACTION. Maximum dictionary load in PyDict_SetItem.
- Currently set to 2/3. Increasing this ratio makes dictionaries more
- dense resulting in more collisions. Decreasing it improves sparseness
- at the expense of spreading entries over more cache lines and at the
- cost of total memory consumed.
-
-* Growth rate upon hitting maximum load. Currently set to *2.
- Raising this to *4 results in half the number of resizes, less
- effort to resize, better sparseness for some (but not all dict sizes),
- and potentially doubles memory consumption depending on the size of
- the dictionary. Setting to *4 eliminates every other resize step.
-
-* Maximum sparseness (minimum dictionary load). What percentage
- of entries can be unused before the dictionary shrinks to
- free up memory and speed up iteration? (The current CPython
- code does not represent this parameter directly.)
-
-* Shrinkage rate upon exceeding maximum sparseness. The current
- CPython code never even checks sparseness when deleting a
- key. When a new key is added, it resizes based on the number
- of active keys, so that the addition may trigger shrinkage
- rather than growth.
+See comments for PyDict_MINSIZE_SPLIT, PyDict_MINSIZE_COMBINED,
+USABLE_FRACTION and GROWTH_RATE in dictobject.c
Tune-ups should be measured across a broad range of applications and
use cases. A change to any parameter will help in some situations and
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 4af5c49..aef8d10 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -279,7 +279,13 @@ PyDict_Fini(void)
#define DK_MASK(dk) (((dk)->dk_size)-1)
#define IS_POWER_OF_2(x) (((x) & (x-1)) == 0)
-/* USABLE_FRACTION must obey the following:
+/* USABLE_FRACTION is the maximum dictionary load.
+ * Currently set to (2n+1)/3. Increasing this ratio makes dictionaries more
+ * dense resulting in more collisions. Decreasing it improves sparseness
+ * at the expense of spreading entries over more cache lines and at the
+ * cost of total memory consumed.
+ *
+ * USABLE_FRACTION must obey the following:
* (0 < USABLE_FRACTION(n) < n) for all n >= 2
*
* USABLE_FRACTION should be very quick to calculate.
@@ -299,6 +305,14 @@ PyDict_Fini(void)
* #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3))
*/
+/* GROWTH_RATE. Growth rate upon hitting maximum load. Currently set to *2.
+ * Raising this to *4 doubles memory consumption depending on the size of
+ * the dictionary, but results in half the number of resizes, less effort to
+ * resize and better sparseness for some (but not all dict sizes).
+ * Setting to *4 eliminates every other resize step.
+ * GROWTH_RATE was set to *4 up to version 3.2.
+ */
+#define GROWTH_RATE(x) ((x) * 2)
#define ENSURE_ALLOWS_DELETIONS(d) \
if ((d)->ma_keys->dk_lookup == lookdict_unicode_nodummy) { \
@@ -776,13 +790,7 @@ find_empty_slot(PyDictObject *mp, PyObject *key, Py_hash_t hash,
static int
insertion_resize(PyDictObject *mp)
{
- /*
- * Double the size of the dict,
- * Previous versions quadrupled size, but doing so may result in excessive
- * memory use. Doubling keeps the number of resizes low without wasting
- * too much memory.
- */
- return dictresize(mp, 2 * mp->ma_used);
+ return dictresize(mp, GROWTH_RATE(mp->ma_used));
}
/*