diff options
-rw-r--r-- | Objects/dictnotes.txt | 38 | ||||
-rw-r--r-- | Objects/dictobject.c | 24 |
2 files changed, 18 insertions, 44 deletions
diff --git a/Objects/dictnotes.txt b/Objects/dictnotes.txt index a38b052..f89720c 100644 --- a/Objects/dictnotes.txt +++ b/Objects/dictnotes.txt @@ -70,42 +70,8 @@ A values array Tunable Dictionary Parameters ----------------------------- -* PyDict_STARTSIZE. Starting size of dict (unless an instance dict). - Currently set to 8. Must be a power of two. - New dicts have to zero-out every cell. - Increasing improves the sparseness of small dictionaries but costs - time to read in the additional cache lines if they are not already - in cache. That case is common when keyword arguments are passed. - Prior to version 3.3, PyDict_MINSIZE was used as the starting size - of a new dict. - -* PyDict_MINSIZE. Minimum size of a dict. - Currently set to 4 (to keep instance dicts small). - Must be a power of two. Prior to version 3.3, PyDict_MINSIZE was - set to 8. - -* USABLE_FRACTION. Maximum dictionary load in PyDict_SetItem. - Currently set to 2/3. Increasing this ratio makes dictionaries more - dense resulting in more collisions. Decreasing it improves sparseness - at the expense of spreading entries over more cache lines and at the - cost of total memory consumed. - -* Growth rate upon hitting maximum load. Currently set to *2. - Raising this to *4 results in half the number of resizes, less - effort to resize, better sparseness for some (but not all dict sizes), - and potentially doubles memory consumption depending on the size of - the dictionary. Setting to *4 eliminates every other resize step. - -* Maximum sparseness (minimum dictionary load). What percentage - of entries can be unused before the dictionary shrinks to - free up memory and speed up iteration? (The current CPython - code does not represent this parameter directly.) - -* Shrinkage rate upon exceeding maximum sparseness. The current - CPython code never even checks sparseness when deleting a - key. When a new key is added, it resizes based on the number - of active keys, so that the addition may trigger shrinkage - rather than growth. +See comments for PyDict_MINSIZE_SPLIT, PyDict_MINSIZE_COMBINED, +USABLE_FRACTION and GROWTH_RATE in dictobject.c Tune-ups should be measured across a broad range of applications and use cases. A change to any parameter will help in some situations and diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 4af5c49..aef8d10 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -279,7 +279,13 @@ PyDict_Fini(void) #define DK_MASK(dk) (((dk)->dk_size)-1) #define IS_POWER_OF_2(x) (((x) & (x-1)) == 0) -/* USABLE_FRACTION must obey the following: +/* USABLE_FRACTION is the maximum dictionary load. + * Currently set to (2n+1)/3. Increasing this ratio makes dictionaries more + * dense resulting in more collisions. Decreasing it improves sparseness + * at the expense of spreading entries over more cache lines and at the + * cost of total memory consumed. + * + * USABLE_FRACTION must obey the following: * (0 < USABLE_FRACTION(n) < n) for all n >= 2 * * USABLE_FRACTION should be very quick to calculate. @@ -299,6 +305,14 @@ PyDict_Fini(void) * #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3)) */ +/* GROWTH_RATE. Growth rate upon hitting maximum load. Currently set to *2. + * Raising this to *4 doubles memory consumption depending on the size of + * the dictionary, but results in half the number of resizes, less effort to + * resize and better sparseness for some (but not all dict sizes). + * Setting to *4 eliminates every other resize step. + * GROWTH_RATE was set to *4 up to version 3.2. + */ +#define GROWTH_RATE(x) ((x) * 2) #define ENSURE_ALLOWS_DELETIONS(d) \ if ((d)->ma_keys->dk_lookup == lookdict_unicode_nodummy) { \ @@ -776,13 +790,7 @@ find_empty_slot(PyDictObject *mp, PyObject *key, Py_hash_t hash, static int insertion_resize(PyDictObject *mp) { - /* - * Double the size of the dict, - * Previous versions quadrupled size, but doing so may result in excessive - * memory use. Doubling keeps the number of resizes low without wasting - * too much memory. - */ - return dictresize(mp, 2 * mp->ma_used); + return dictresize(mp, GROWTH_RATE(mp->ma_used)); } /* |