diff options
author | Raymond Hettinger <python@rcn.com> | 2013-09-15 21:57:15 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2013-09-15 21:57:15 (GMT) |
commit | 8408dc581e2baaa306b57f14486cfa013fd68c68 (patch) | |
tree | da3f4c127dbdd665422f11b5ed1c661f3b8dabca | |
parent | e6d35dba3cee44aa2fc64b7866f0b3327151a7c4 (diff) | |
download | cpython-8408dc581e2baaa306b57f14486cfa013fd68c68.zip cpython-8408dc581e2baaa306b57f14486cfa013fd68c68.tar.gz cpython-8408dc581e2baaa306b57f14486cfa013fd68c68.tar.bz2 |
Issue 18771: Make it possible to set the number linear probes at compile-time.
-rw-r--r-- | Doc/whatsnew/3.4.rst | 11 | ||||
-rw-r--r-- | Objects/setobject.c | 24 |
2 files changed, 28 insertions, 7 deletions
diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst index 0749f03..e91ce2a 100644 --- a/Doc/whatsnew/3.4.rst +++ b/Doc/whatsnew/3.4.rst @@ -444,8 +444,15 @@ Major performance enhancements have been added: * The UTF-32 decoder is now 3x to 4x faster. * The cost of hash collisions for sets is now reduced. Each hash table - probe now checks a second key/hash pair for each cache line retrieved. - This exploits cache locality to make collision resolution less expensive. + probe now checks a series of consecutive, adjacent key/hash pairs before + continuing to make random probes through the hash table. This exploits + cache locality to make collision resolution less expensive. + + The collision resolution scheme can be described as a hybrid of linear + probing and open addressing. The number of additional linear probes + defaults to nine. This can be changed at compile-time by defining + LINEAR_PROBES to be any value. Set LINEAR_PROBES=0 to turn-off + linear probing entirely. (Contributed by Raymond Hettinger in :issue"`18771`.) diff --git a/Objects/setobject.c b/Objects/setobject.c index 23d624f..ece76bf 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -44,7 +44,9 @@ PyObject *_PySet_Dummy = dummy; /* ======= Begin logic for probing the hash table ========================= */ /* This should be >= PySet_MINSIZE - 1 */ +#ifndef LINEAR_PROBES #define LINEAR_PROBES 9 +#endif /* This must be >= 1 */ #define PERTURB_SHIFT 5 @@ -55,12 +57,14 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) setentry *table = so->table; setentry *freeslot = NULL; setentry *entry; - setentry *limit; size_t perturb = hash; size_t mask = so->mask; size_t i = (size_t)hash; /* Unsigned for defined overflow behavior. */ - size_t j; int cmp; +#if LINEAR_PROBES + setentry *limit; + size_t j; +#endif entry = &table[i & mask]; if (entry->key == NULL) @@ -84,6 +88,7 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; +#if LINEAR_PROBES limit = &table[mask]; for (j = 0 ; j < LINEAR_PROBES ; j++) { entry = (entry == limit) ? &table[0] : entry + 1; @@ -106,13 +111,14 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; } +#endif perturb >>= PERTURB_SHIFT; i = i * 5 + 1 + perturb; entry = &table[i & mask]; if (entry->key == NULL) - break; + goto found_null; } found_null: return freeslot == NULL ? entry : freeslot; @@ -129,11 +135,13 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) setentry *table = so->table; setentry *freeslot = NULL; setentry *entry; - setentry *limit; size_t perturb = hash; size_t mask = so->mask; size_t i = (size_t)hash; +#if LINEAR_PROBES + setentry *limit; size_t j; +#endif /* Make sure this function doesn't have to handle non-unicode keys, including subclasses of str; e.g., one reason to subclass @@ -157,6 +165,7 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; +#if LINEAR_PROBES limit = &table[mask]; for (j = 0 ; j < LINEAR_PROBES ; j++) { entry = (entry == limit) ? &table[0] : entry + 1; @@ -170,13 +179,14 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) if (entry->key == dummy && freeslot == NULL) freeslot = entry; } +#endif perturb >>= PERTURB_SHIFT; i = i * 5 + 1 + perturb; entry = &table[i & mask]; if (entry->key == NULL) - break; + goto found_null; } found_null: return freeslot == NULL ? entry : freeslot; @@ -198,17 +208,21 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) size_t perturb = hash; size_t mask = (size_t)so->mask; size_t i = (size_t)hash; +#if LINEAR_PROBES size_t j; +#endif while (1) { entry = &table[i & mask]; if (entry->key == NULL) goto found_null; +#if LINEAR_PROBES for (j = 1 ; j <= LINEAR_PROBES ; j++) { entry = &table[(i + j) & mask]; if (entry->key == NULL) goto found_null; } +#endif perturb >>= PERTURB_SHIFT; i = i * 5 + 1 + perturb; } |