summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2001-05-13 06:43:53 (GMT)
committerTim Peters <tim.peters@gmail.com>2001-05-13 06:43:53 (GMT)
commit342c65e19ac0cc47bf2b21026c76e63440b23748 (patch)
tree55dc4a4a5302dbd46e54f0e17db376d028653aa3
parent2f228e75e4d5ac8c3eb4a6334dbc43243bff1095 (diff)
downloadcpython-342c65e19ac0cc47bf2b21026c76e63440b23748.zip
cpython-342c65e19ac0cc47bf2b21026c76e63440b23748.tar.gz
cpython-342c65e19ac0cc47bf2b21026c76e63440b23748.tar.bz2
Aggressive reordering of dict comparisons. In case of collision, it stands
to reason that me_key is much more likely to match the key we're looking for than to match dummy, and if the key is absent me_key is much more likely to be NULL than dummy: most dicts don't even have a dummy entry. Running instrumented dict code over the test suite and some apps confirmed that matching dummy was 200-300x less frequent than matching key in practice. So this reorders the tests to try the common case first. It can lose if a large dict with many collisions is mostly deleted, not resized, and then frequently searched, but that's hardly a case we should be favoring.
-rw-r--r--Objects/dictobject.c51
1 files changed, 21 insertions, 30 deletions
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 65c09d6..b0121ed 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -219,26 +219,21 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
incr = (hash ^ ((unsigned long)hash >> 3)) & mask;
if (!incr)
incr = mask;
+ /* In the loop, me_key == dummy is by far (factor of 100s) the
+ least likely outcome, so test for that last. */
for (;;) {
ep = &ep0[(i+incr)&mask];
if (ep->me_key == NULL) {
if (restore_error)
PyErr_Restore(err_type, err_value, err_tb);
- if (freeslot != NULL)
- return freeslot;
- else
- return ep;
- }
- if (ep->me_key == dummy) {
- if (freeslot == NULL)
- freeslot = ep;
+ return freeslot == NULL ? ep : freeslot;
}
- else if (ep->me_key == key) {
+ if (ep->me_key == key) {
if (restore_error)
PyErr_Restore(err_type, err_value, err_tb);
return ep;
}
- else if (ep->me_hash == hash) {
+ else if (ep->me_hash == hash && ep->me_key != dummy) {
if (!checked_error) {
checked_error = 1;
if (PyErr_Occurred()) {
@@ -257,11 +252,12 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
else if (cmp < 0)
PyErr_Clear();
}
+ else if (ep->me_key == dummy && freeslot == NULL)
+ freeslot = ep;
/* Cycle through GF(2^n)-{0} */
- incr = incr << 1;
+ incr <<= 1;
if (incr > mask)
- incr ^= mp->ma_poly; /* This will implicitly clear
- the highest bit */
+ incr ^= mp->ma_poly; /* clears the highest bit */
}
}
@@ -314,28 +310,23 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash)
incr = (hash ^ ((unsigned long)hash >> 3)) & mask;
if (!incr)
incr = mask;
+ /* In the loop, me_key == dummy is by far (factor of 100s) the
+ least likely outcome, so test for that last. */
for (;;) {
ep = &ep0[(i+incr)&mask];
- if (ep->me_key == NULL) {
- if (freeslot != NULL)
- return freeslot;
- else
- return ep;
- }
- if (ep->me_key == dummy) {
- if (freeslot == NULL)
- freeslot = ep;
- }
- else if (ep->me_key == key
- || (ep->me_hash == hash
- && compare(ep->me_key, key) == 0)) {
+ if (ep->me_key == NULL)
+ return freeslot == NULL ? ep : freeslot;
+ if (ep->me_key == key
+ || (ep->me_hash == hash
+ && ep->me_key != dummy
+ && compare(ep->me_key, key) == 0))
return ep;
- }
+ else if (ep->me_key == dummy && freeslot == NULL)
+ freeslot = ep;
/* Cycle through GF(2^n)-{0} */
- incr = incr << 1;
+ incr <<= 1;
if (incr > mask)
- incr ^= mp->ma_poly; /* This will implicitly clear
- the highest bit */
+ incr ^= mp->ma_poly; /* clears the highest bit */
}
}