diff options
author | Tim Peters <tim.peters@gmail.com> | 2001-05-13 06:43:53 (GMT) |
---|---|---|
committer | Tim Peters <tim.peters@gmail.com> | 2001-05-13 06:43:53 (GMT) |
commit | 342c65e19ac0cc47bf2b21026c76e63440b23748 (patch) | |
tree | 55dc4a4a5302dbd46e54f0e17db376d028653aa3 | |
parent | 2f228e75e4d5ac8c3eb4a6334dbc43243bff1095 (diff) | |
download | cpython-342c65e19ac0cc47bf2b21026c76e63440b23748.zip cpython-342c65e19ac0cc47bf2b21026c76e63440b23748.tar.gz cpython-342c65e19ac0cc47bf2b21026c76e63440b23748.tar.bz2 |
Aggressive reordering of dict comparisons. In case of collision, it stands
to reason that me_key is much more likely to match the key we're looking
for than to match dummy, and if the key is absent me_key is much more
likely to be NULL than dummy: most dicts don't even have a dummy entry.
Running instrumented dict code over the test suite and some apps confirmed
that matching dummy was 200-300x less frequent than matching key in
practice. So this reorders the tests to try the common case first.
It can lose if a large dict with many collisions is mostly deleted, not
resized, and then frequently searched, but that's hardly a case we
should be favoring.
-rw-r--r-- | Objects/dictobject.c | 51 |
1 files changed, 21 insertions, 30 deletions
diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 65c09d6..b0121ed 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -219,26 +219,21 @@ lookdict(dictobject *mp, PyObject *key, register long hash) incr = (hash ^ ((unsigned long)hash >> 3)) & mask; if (!incr) incr = mask; + /* In the loop, me_key == dummy is by far (factor of 100s) the + least likely outcome, so test for that last. */ for (;;) { ep = &ep0[(i+incr)&mask]; if (ep->me_key == NULL) { if (restore_error) PyErr_Restore(err_type, err_value, err_tb); - if (freeslot != NULL) - return freeslot; - else - return ep; - } - if (ep->me_key == dummy) { - if (freeslot == NULL) - freeslot = ep; + return freeslot == NULL ? ep : freeslot; } - else if (ep->me_key == key) { + if (ep->me_key == key) { if (restore_error) PyErr_Restore(err_type, err_value, err_tb); return ep; } - else if (ep->me_hash == hash) { + else if (ep->me_hash == hash && ep->me_key != dummy) { if (!checked_error) { checked_error = 1; if (PyErr_Occurred()) { @@ -257,11 +252,12 @@ lookdict(dictobject *mp, PyObject *key, register long hash) else if (cmp < 0) PyErr_Clear(); } + else if (ep->me_key == dummy && freeslot == NULL) + freeslot = ep; /* Cycle through GF(2^n)-{0} */ - incr = incr << 1; + incr <<= 1; if (incr > mask) - incr ^= mp->ma_poly; /* This will implicitly clear - the highest bit */ + incr ^= mp->ma_poly; /* clears the highest bit */ } } @@ -314,28 +310,23 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash) incr = (hash ^ ((unsigned long)hash >> 3)) & mask; if (!incr) incr = mask; + /* In the loop, me_key == dummy is by far (factor of 100s) the + least likely outcome, so test for that last. */ for (;;) { ep = &ep0[(i+incr)&mask]; - if (ep->me_key == NULL) { - if (freeslot != NULL) - return freeslot; - else - return ep; - } - if (ep->me_key == dummy) { - if (freeslot == NULL) - freeslot = ep; - } - else if (ep->me_key == key - || (ep->me_hash == hash - && compare(ep->me_key, key) == 0)) { + if (ep->me_key == NULL) + return freeslot == NULL ? ep : freeslot; + if (ep->me_key == key + || (ep->me_hash == hash + && ep->me_key != dummy + && compare(ep->me_key, key) == 0)) return ep; - } + else if (ep->me_key == dummy && freeslot == NULL) + freeslot = ep; /* Cycle through GF(2^n)-{0} */ - incr = incr << 1; + incr <<= 1; if (incr > mask) - incr ^= mp->ma_poly; /* This will implicitly clear - the highest bit */ + incr ^= mp->ma_poly; /* clears the highest bit */ } } |