summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Misc/NEWS8
-rw-r--r--Objects/dictobject.c90
2 files changed, 80 insertions, 18 deletions
diff --git a/Misc/NEWS b/Misc/NEWS
index 3a15837..b960117 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -116,6 +116,14 @@ Core
to crash if the element comparison routines for the dict keys and/or
values mutated the dicts. Making the code bulletproof slowed it down.
+- Collisions in dicts now use polynomial division instead of multiplication
+ to generate the probe sequence, following an idea of Christian Tismer's.
+ This allows all bits of the hash code to come into play. It should have
+ little or no effect on speed in ordinary cases, but can help dramatically
+ in bad cases. For example, looking up every key in a dict d with
+ d.keys() = [i << 16 for i in range(20000)] is approximately 500x faster
+ now.
+
Library
- calendar.py uses month and day names based on the current locale.
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index d5700c9..5944b6e 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -31,6 +31,58 @@ k a second time. Theory can be used to find such polys efficiently, but the
operational defn. of "works" is sufficient to find them in reasonable time
via brute force program (hint: any poly that has an even number of 1 bits
cannot work; ditto any poly with low bit 0; exploit those).
+
+Some major subtleties: Most hash schemes depend on having a "good" hash
+function, in the sense of simulating randomness. Python doesn't: some of
+its hash functions are trivial, such as hash(i) == i for ints i (excepting
+i == -1, because -1 is the "error occurred" return value from tp_hash).
+
+This isn't necessarily bad! To the contrary, that our hash tables are powers
+of 2 in size, and that we take the low-order bits as the initial table index,
+means that there are no collisions at all for dicts indexed by a contiguous
+range of ints. This is "better than random" behavior, and that's very
+desirable.
+
+On the other hand, when collisions occur, the tendency to fill contiguous
+slices of the hash table makes a good collision resolution strategy crucial;
+e.g., linear probing is right out.
+
+Reimer Behrends contributed the idea of using a polynomial-based approach,
+using repeated multiplication by x in GF(2**n) where a polynomial is chosen
+such that x is a primitive root. This visits every table location exactly
+once, and the sequence of locations probed is highly non-linear.
+
+The same is also largely true of quadratic probing for power-of-2 tables, of
+the specific
+
+ (i + comb(1, 2)) mod size
+ (i + comb(2, 2)) mod size
+ (i + comb(3, 2)) mod size
+ (i + comb(4, 2)) mod size
+ ...
+ (i + comb(j, 2)) mod size
+
+flavor. The polynomial approach "scrambles" the probe indices better, but
+more importantly allows to get *some* additional bits of the hash code into
+play via computing the initial increment, thus giving a weak form of double
+hashing. Quadratic probing cannot be extended that way (the first probe
+offset must be 1, the second 3, the third 6, etc).
+
+Christian Tismer later contributed the idea of using polynomial division
+instead of multiplication. The problem is that the multiplicative method
+can't get *all* the bits of the hash code into play without expensive
+computations that slow down the initial index and/or initial increment
+computation. For a set of keys like [i << 16 for i in range(20000)], under
+the multiplicative method the initial index and increment were the same for
+all keys, so every key followed exactly the same probe sequence, and so
+this degenerated into a (very slow) linear search. The division method uses
+all the bits of the hash code naturally in the increment, although it *may*
+visit locations more than once until such time as all the high bits of the
+increment have been shifted away. It's also impossible to tell in advance
+whether incr is congruent to 0 modulo poly, so each iteration of the loop has
+to guard against incr becoming 0. These are minor costs, as we usually don't
+get into the probe loop, and when we do we usually get out on its first
+iteration.
*/
static long polys[] = {
@@ -204,7 +256,7 @@ static dictentry *
lookdict(dictobject *mp, PyObject *key, register long hash)
{
register int i;
- register unsigned incr;
+ register unsigned int incr;
register dictentry *freeslot;
register unsigned int mask = mp->ma_size-1;
dictentry *ep0 = mp->ma_table;
@@ -244,13 +296,14 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
}
/* Derive incr from hash, just to make it more arbitrary. Note that
incr must not be 0, or we will get into an infinite loop.*/
- incr = (hash ^ ((unsigned long)hash >> 3)) & mask;
- if (!incr)
- incr = mask;
+ incr = hash ^ ((unsigned long)hash >> 3);
+
/* In the loop, me_key == dummy is by far (factor of 100s) the
least likely outcome, so test for that last. */
for (;;) {
- ep = &ep0[(i+incr)&mask];
+ if (!incr)
+ incr = 1; /* and incr will never be 0 again */
+ ep = &ep0[(i + incr) & mask];
if (ep->me_key == NULL) {
if (restore_error)
PyErr_Restore(err_type, err_value, err_tb);
@@ -282,10 +335,10 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
}
else if (ep->me_key == dummy && freeslot == NULL)
freeslot = ep;
- /* Cycle through GF(2^n)-{0} */
- incr <<= 1;
- if (incr > mask)
- incr ^= mp->ma_poly; /* clears the highest bit */
+ /* Cycle through GF(2**n). */
+ if (incr & 1)
+ incr ^= mp->ma_poly; /* clears the lowest bit */
+ incr >>= 1;
}
}
@@ -303,7 +356,7 @@ static dictentry *
lookdict_string(dictobject *mp, PyObject *key, register long hash)
{
register int i;
- register unsigned incr;
+ register unsigned int incr;
register dictentry *freeslot;
register unsigned int mask = mp->ma_size-1;
dictentry *ep0 = mp->ma_table;
@@ -334,13 +387,14 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash)
}
/* Derive incr from hash, just to make it more arbitrary. Note that
incr must not be 0, or we will get into an infinite loop.*/
- incr = (hash ^ ((unsigned long)hash >> 3)) & mask;
- if (!incr)
- incr = mask;
+ incr = hash ^ ((unsigned long)hash >> 3);
+
/* In the loop, me_key == dummy is by far (factor of 100s) the
least likely outcome, so test for that last. */
for (;;) {
- ep = &ep0[(i+incr)&mask];
+ if (!incr)
+ incr = 1; /* and incr will never be 0 again */
+ ep = &ep0[(i + incr) & mask];
if (ep->me_key == NULL)
return freeslot == NULL ? ep : freeslot;
if (ep->me_key == key
@@ -350,10 +404,10 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash)
return ep;
if (ep->me_key == dummy && freeslot == NULL)
freeslot = ep;
- /* Cycle through GF(2^n)-{0} */
- incr <<= 1;
- if (incr > mask)
- incr ^= mp->ma_poly; /* clears the highest bit */
+ /* Cycle through GF(2**n). */
+ if (incr & 1)
+ incr ^= mp->ma_poly; /* clears the lowest bit */
+ incr >>= 1;
}
}