From 342c65e19ac0cc47bf2b21026c76e63440b23748 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Sun, 13 May 2001 06:43:53 +0000 Subject: Aggressive reordering of dict comparisons. In case of collision, it stands to reason that me_key is much more likely to match the key we're looking for than to match dummy, and if the key is absent me_key is much more likely to be NULL than dummy: most dicts don't even have a dummy entry. Running instrumented dict code over the test suite and some apps confirmed that matching dummy was 200-300x less frequent than matching key in practice. So this reorders the tests to try the common case first. It can lose if a large dict with many collisions is mostly deleted, not resized, and then frequently searched, but that's hardly a case we should be favoring. --- Objects/dictobject.c | 51 +++++++++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 65c09d6..b0121ed 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -219,26 +219,21 @@ lookdict(dictobject *mp, PyObject *key, register long hash) incr = (hash ^ ((unsigned long)hash >> 3)) & mask; if (!incr) incr = mask; + /* In the loop, me_key == dummy is by far (factor of 100s) the + least likely outcome, so test for that last. */ for (;;) { ep = &ep0[(i+incr)&mask]; if (ep->me_key == NULL) { if (restore_error) PyErr_Restore(err_type, err_value, err_tb); - if (freeslot != NULL) - return freeslot; - else - return ep; - } - if (ep->me_key == dummy) { - if (freeslot == NULL) - freeslot = ep; + return freeslot == NULL ? ep : freeslot; } - else if (ep->me_key == key) { + if (ep->me_key == key) { if (restore_error) PyErr_Restore(err_type, err_value, err_tb); return ep; } - else if (ep->me_hash == hash) { + else if (ep->me_hash == hash && ep->me_key != dummy) { if (!checked_error) { checked_error = 1; if (PyErr_Occurred()) { @@ -257,11 +252,12 @@ lookdict(dictobject *mp, PyObject *key, register long hash) else if (cmp < 0) PyErr_Clear(); } + else if (ep->me_key == dummy && freeslot == NULL) + freeslot = ep; /* Cycle through GF(2^n)-{0} */ - incr = incr << 1; + incr <<= 1; if (incr > mask) - incr ^= mp->ma_poly; /* This will implicitly clear - the highest bit */ + incr ^= mp->ma_poly; /* clears the highest bit */ } } @@ -314,28 +310,23 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash) incr = (hash ^ ((unsigned long)hash >> 3)) & mask; if (!incr) incr = mask; + /* In the loop, me_key == dummy is by far (factor of 100s) the + least likely outcome, so test for that last. */ for (;;) { ep = &ep0[(i+incr)&mask]; - if (ep->me_key == NULL) { - if (freeslot != NULL) - return freeslot; - else - return ep; - } - if (ep->me_key == dummy) { - if (freeslot == NULL) - freeslot = ep; - } - else if (ep->me_key == key - || (ep->me_hash == hash - && compare(ep->me_key, key) == 0)) { + if (ep->me_key == NULL) + return freeslot == NULL ? ep : freeslot; + if (ep->me_key == key + || (ep->me_hash == hash + && ep->me_key != dummy + && compare(ep->me_key, key) == 0)) return ep; - } + else if (ep->me_key == dummy && freeslot == NULL) + freeslot = ep; /* Cycle through GF(2^n)-{0} */ - incr = incr << 1; + incr <<= 1; if (incr > mask) - incr ^= mp->ma_poly; /* This will implicitly clear - the highest bit */ + incr ^= mp->ma_poly; /* clears the highest bit */ } } -- cgit v0.12