summaryrefslogtreecommitdiffstats
path: root/Objects/setobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/setobject.c')
-rw-r--r--Objects/setobject.c324
1 files changed, 193 insertions, 131 deletions
diff --git a/Objects/setobject.c b/Objects/setobject.c
index ea5a24c..dabcc25 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -3,7 +3,7 @@
Written and maintained by Raymond D. Hettinger <python@rcn.com>
Derived from Lib/sets.py and Objects/dictobject.c.
- Copyright (c) 2003-2008 Python Software Foundation.
+ Copyright (c) 2003-2013 Python Software Foundation.
All rights reserved.
*/
@@ -29,15 +29,12 @@ set_key_error(PyObject *arg)
#define PERTURB_SHIFT 5
/* Object used as dummy key to fill deleted entries */
-static PyObject *dummy = NULL; /* Initialized by first call to make_new_set() */
+static PyObject _dummy_struct;
-#ifdef Py_REF_DEBUG
-PyObject *
-_PySet_Dummy(void)
-{
- return dummy;
-}
-#endif
+#define dummy (&_dummy_struct)
+
+/* Exported for the gdb plugin's benefit. */
+PyObject *_PySet_Dummy = dummy;
#define INIT_NONZERO_SET_SLOTS(so) do { \
(so)->table = (so)->smalltable; \
@@ -68,6 +65,12 @@ chaining would be substantial (100% with typical malloc overhead).
The initial probe index is computed as hash mod the table size. Subsequent
probe indices are computed as explained in Objects/dictobject.c.
+To improve cache locality, each probe inspects nearby entries before
+moving on to probes elsewhere in memory. Depending on alignment and the
+size of a cache line, the nearby entries are cheaper to inspect than
+other probes elsewhere in memory. This probe strategy reduces the cost
+of hash collisions.
+
All arithmetic on hash should ignore overflow.
Unlike the dictionary implementation, the lookkey functions can return
@@ -75,80 +78,88 @@ NULL if the rich comparison returns an error.
*/
static setentry *
-set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash)
+set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
{
- register size_t i; /* Unsigned for defined overflow behavior. */
- register size_t perturb;
- register setentry *freeslot;
- register size_t mask = so->mask;
setentry *table = so->table;
- register setentry *entry;
- register int cmp;
- PyObject *startkey;
+ setentry *freeslot = NULL;
+ setentry *entry;
+ size_t perturb = hash;
+ size_t mask = so->mask;
+ size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior. */
+ size_t j = i;
+ int cmp;
- i = (size_t)hash & mask;
entry = &table[i];
- if (entry->key == NULL || entry->key == key)
+ if (entry->key == NULL)
return entry;
- if (entry->key == dummy)
- freeslot = entry;
- else {
- if (entry->hash == hash) {
- startkey = entry->key;
+ while (1) {
+ if (entry->key == key)
+ return entry;
+ if (entry->hash == hash && entry->key != dummy) {
+ PyObject *startkey = entry->key;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0)
return NULL;
- if (table == so->table && entry->key == startkey) {
- if (cmp > 0)
- return entry;
- }
- else {
- /* The compare did major nasty stuff to the
- * set: start over.
- */
+ if (table != so->table || entry->key != startkey)
return set_lookkey(so, key, hash);
- }
+ if (cmp > 0)
+ return entry;
}
- freeslot = NULL;
- }
+ if (entry->key == dummy && freeslot == NULL)
+ freeslot = entry;
- /* In the loop, key == dummy is by far (factor of 100s) the
- least likely outcome, so test for that last. */
- for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
- i = (i << 2) + i + perturb + 1;
- entry = &table[i & mask];
- if (entry->key == NULL) {
- if (freeslot != NULL)
- entry = freeslot;
+ entry = &table[j ^ 1];
+ if (entry->key == NULL)
break;
- }
if (entry->key == key)
+ return entry;
+ if (entry->hash == hash && entry->key != dummy) {
+ PyObject *startkey = entry->key;
+ Py_INCREF(startkey);
+ cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
+ Py_DECREF(startkey);
+ if (cmp < 0)
+ return NULL;
+ if (table != so->table || entry->key != startkey)
+ return set_lookkey(so, key, hash);
+ if (cmp > 0)
+ return entry;
+ }
+ if (entry->key == dummy && freeslot == NULL)
+ freeslot = entry;
+
+ entry = &table[j ^ 2];
+ if (entry->key == NULL)
break;
+ if (entry->key == key)
+ return entry;
if (entry->hash == hash && entry->key != dummy) {
- startkey = entry->key;
+ PyObject *startkey = entry->key;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0)
return NULL;
- if (table == so->table && entry->key == startkey) {
- if (cmp > 0)
- break;
- }
- else {
- /* The compare did major nasty stuff to the
- * set: start over.
- */
+ if (table != so->table || entry->key != startkey)
return set_lookkey(so, key, hash);
- }
+ if (cmp > 0)
+ return entry;
}
- else if (entry->key == dummy && freeslot == NULL)
+ if (entry->key == dummy && freeslot == NULL)
freeslot = entry;
+
+ i = i * 5 + perturb + 1;
+ j = i & mask;
+ perturb >>= PERTURB_SHIFT;
+
+ entry = &table[j];
+ if (entry->key == NULL)
+ break;
}
- return entry;
+ return freeslot == NULL ? entry : freeslot;
}
/*
@@ -157,14 +168,15 @@ set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash)
* see if the comparison altered the table.
*/
static setentry *
-set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash)
+set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash)
{
- register size_t i; /* Unsigned for defined overflow behavior. */
- register size_t perturb;
- register setentry *freeslot;
- register size_t mask = so->mask;
setentry *table = so->table;
- register setentry *entry;
+ setentry *freeslot = NULL;
+ setentry *entry;
+ size_t perturb = hash;
+ size_t mask = so->mask;
+ size_t i = (size_t)hash & mask;
+ size_t j = i;
/* Make sure this function doesn't have to handle non-unicode keys,
including subclasses of str; e.g., one reason to subclass
@@ -174,25 +186,23 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash)
so->lookup = set_lookkey;
return set_lookkey(so, key, hash);
}
- i = (size_t)hash & mask;
+
entry = &table[i];
- if (entry->key == NULL || entry->key == key)
+ if (entry->key == NULL)
return entry;
- if (entry->key == dummy)
- freeslot = entry;
- else {
- if (entry->hash == hash && unicode_eq(entry->key, key))
+
+ while (1) {
+ if (entry->key == key
+ || (entry->hash == hash
+ && entry->key != dummy
+ && unicode_eq(entry->key, key)))
return entry;
- freeslot = NULL;
- }
+ if (entry->key == dummy && freeslot == NULL)
+ freeslot = entry;
- /* In the loop, key == dummy is by far (factor of 100s) the
- least likely outcome, so test for that last. */
- for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
- i = (i << 2) + i + perturb + 1;
- entry = &table[i & mask];
+ entry = &table[j ^ 1];
if (entry->key == NULL)
- return freeslot == NULL ? entry : freeslot;
+ break;
if (entry->key == key
|| (entry->hash == hash
&& entry->key != dummy
@@ -200,9 +210,27 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash)
return entry;
if (entry->key == dummy && freeslot == NULL)
freeslot = entry;
+
+ entry = &table[j ^ 2];
+ if (entry->key == NULL)
+ break;
+ if (entry->key == key
+ || (entry->hash == hash
+ && entry->key != dummy
+ && unicode_eq(entry->key, key)))
+ return entry;
+ if (entry->key == dummy && freeslot == NULL)
+ freeslot = entry;
+
+ i = i * 5 + perturb + 1;
+ j = i & mask;
+ perturb >>= PERTURB_SHIFT;
+
+ entry = &table[j];
+ if (entry->key == NULL)
+ break;
}
- assert(0); /* NOT REACHED */
- return 0;
+ return freeslot == NULL ? entry : freeslot;
}
/*
@@ -211,9 +239,9 @@ Used by the public insert routine.
Eats a reference to key.
*/
static int
-set_insert_key(register PySetObject *so, PyObject *key, Py_hash_t hash)
+set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
{
- register setentry *entry;
+ setentry *entry;
assert(so->lookup != NULL);
entry = so->lookup(so, key, hash);
@@ -230,7 +258,6 @@ set_insert_key(register PySetObject *so, PyObject *key, Py_hash_t hash)
entry->key = key;
entry->hash = hash;
so->used++;
- Py_DECREF(dummy);
} else {
/* ACTIVE */
Py_DECREF(key);
@@ -247,19 +274,28 @@ Note that no refcounts are changed by this routine; if needed, the caller
is responsible for incref'ing `key`.
*/
static void
-set_insert_clean(register PySetObject *so, PyObject *key, Py_hash_t hash)
+set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash)
{
- register size_t i;
- register size_t perturb;
- register size_t mask = (size_t)so->mask;
setentry *table = so->table;
- register setentry *entry;
+ setentry *entry;
+ size_t perturb = hash;
+ size_t mask = (size_t)so->mask;
+ size_t i, j;
- i = (size_t)hash & mask;
- entry = &table[i];
- for (perturb = hash; entry->key != NULL; perturb >>= PERTURB_SHIFT) {
- i = (i << 2) + i + perturb + 1;
- entry = &table[i & mask];
+ i = j = (size_t)hash & mask;
+ while (1) {
+ entry = &table[j];
+ if (entry->key == NULL)
+ break;
+ entry = &table[j ^ 1];
+ if (entry->key == NULL)
+ break;
+ entry = &table[j ^ 2];
+ if (entry->key == NULL)
+ break;
+ i = i * 5 + perturb + 1;
+ j = i & mask;
+ perturb >>= PERTURB_SHIFT;
}
so->fill++;
entry->key = key;
@@ -330,23 +366,14 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
so->table = newtable;
so->mask = newsize - 1;
memset(newtable, 0, sizeof(setentry) * newsize);
+ i = so->used;
so->used = 0;
- i = so->fill;
so->fill = 0;
/* Copy the data over; this is refcount-neutral for active entries;
dummy entries aren't copied over, of course */
for (entry = oldtable; i > 0; entry++) {
- if (entry->key == NULL) {
- /* UNUSED */
- ;
- } else if (entry->key == dummy) {
- /* DUMMY */
- --i;
- assert(entry->key == dummy);
- Py_DECREF(entry->key);
- } else {
- /* ACTIVE */
+ if (entry->key != NULL && entry->key != dummy) {
--i;
set_insert_clean(so, entry->key, entry->hash);
}
@@ -360,9 +387,9 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
/* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
static int
-set_add_entry(register PySetObject *so, setentry *entry)
+set_add_entry(PySetObject *so, setentry *entry)
{
- register Py_ssize_t n_used;
+ Py_ssize_t n_used;
PyObject *key = entry->key;
Py_hash_t hash = entry->hash;
@@ -379,10 +406,10 @@ set_add_entry(register PySetObject *so, setentry *entry)
}
static int
-set_add_key(register PySetObject *so, PyObject *key)
+set_add_key(PySetObject *so, PyObject *key)
{
- register Py_hash_t hash;
- register Py_ssize_t n_used;
+ Py_hash_t hash;
+ Py_ssize_t n_used;
if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
@@ -407,7 +434,7 @@ set_add_key(register PySetObject *so, PyObject *key)
static int
set_discard_entry(PySetObject *so, setentry *oldentry)
-{ register setentry *entry;
+{ setentry *entry;
PyObject *old_key;
entry = (so->lookup)(so, oldentry->key, oldentry->hash);
@@ -416,7 +443,6 @@ set_discard_entry(PySetObject *so, setentry *oldentry)
if (entry->key == NULL || entry->key == dummy)
return DISCARD_NOTFOUND;
old_key = entry->key;
- Py_INCREF(dummy);
entry->key = dummy;
so->used--;
Py_DECREF(old_key);
@@ -426,8 +452,8 @@ set_discard_entry(PySetObject *so, setentry *oldentry)
static int
set_discard_key(PySetObject *so, PyObject *key)
{
- register Py_hash_t hash;
- register setentry *entry;
+ Py_hash_t hash;
+ setentry *entry;
PyObject *old_key;
assert (PyAnySet_Check(so));
@@ -444,7 +470,6 @@ set_discard_key(PySetObject *so, PyObject *key)
if (entry->key == NULL || entry->key == dummy)
return DISCARD_NOTFOUND;
old_key = entry->key;
- Py_INCREF(dummy);
entry->key = dummy;
so->used--;
Py_DECREF(old_key);
@@ -502,7 +527,8 @@ set_clear_internal(PySetObject *so)
#endif
if (entry->key) {
--fill;
- Py_DECREF(entry->key);
+ if (entry->key != dummy)
+ Py_DECREF(entry->key);
}
#ifdef Py_DEBUG
else
@@ -533,7 +559,7 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
{
Py_ssize_t i;
Py_ssize_t mask;
- register setentry *table;
+ setentry *table;
assert (PyAnySet_Check(so));
i = *pos_ptr;
@@ -553,7 +579,7 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
static void
set_dealloc(PySetObject *so)
{
- register setentry *entry;
+ setentry *entry;
Py_ssize_t fill = so->fill;
PyObject_GC_UnTrack(so);
Py_TRASHCAN_SAFE_BEGIN(so)
@@ -563,7 +589,8 @@ set_dealloc(PySetObject *so)
for (entry = so->table; fill > 0; entry++) {
if (entry->key) {
--fill;
- Py_DECREF(entry->key);
+ if (entry->key != dummy)
+ Py_DECREF(entry->key);
}
}
if (so->table != so->smalltable)
@@ -632,8 +659,8 @@ set_merge(PySetObject *so, PyObject *otherset)
PySetObject *other;
PyObject *key;
Py_hash_t hash;
- register Py_ssize_t i;
- register setentry *entry;
+ Py_ssize_t i;
+ setentry *entry;
assert (PyAnySet_Check(so));
assert (PyAnySet_Check(otherset));
@@ -701,8 +728,8 @@ set_contains_entry(PySetObject *so, setentry *entry)
static PyObject *
set_pop(PySetObject *so)
{
- register Py_ssize_t i = 0;
- register setentry *entry;
+ Py_ssize_t i = 0;
+ setentry *entry;
PyObject *key;
assert (PyAnySet_Check(so));
@@ -734,7 +761,6 @@ set_pop(PySetObject *so)
}
}
key = entry->key;
- Py_INCREF(dummy);
entry->key = dummy;
so->used--;
so->table[0].hash = i + 1; /* next place to start */
@@ -869,8 +895,8 @@ static PyMethodDef setiter_methods[] = {
static PyObject *setiter_iternext(setiterobject *si)
{
PyObject *key;
- register Py_ssize_t i, mask;
- register setentry *entry;
+ Py_ssize_t i, mask;
+ setentry *entry;
PySetObject *so = si->si_set;
if (so == NULL)
@@ -1024,13 +1050,7 @@ PyDoc_STRVAR(update_doc,
static PyObject *
make_new_set(PyTypeObject *type, PyObject *iterable)
{
- register PySetObject *so = NULL;
-
- if (dummy == NULL) { /* Auto-initialize dummy */
- dummy = PyUnicode_FromString("<dummy key>");
- if (dummy == NULL)
- return NULL;
- }
+ PySetObject *so = NULL;
/* create PySetObject structure */
if (numfree &&
@@ -1128,7 +1148,6 @@ void
PySet_Fini(void)
{
PySet_ClearFreeList();
- Py_CLEAR(dummy);
Py_CLEAR(emptyfrozenset);
}
@@ -2537,3 +2556,46 @@ test_c_api(PySetObject *so)
#undef assertRaises
#endif
+
+/***** Dummy Struct *************************************************/
+
+static PyObject *
+dummy_repr(PyObject *op)
+{
+ return PyUnicode_FromString("<dummy key>");
+}
+
+static void
+dummy_dealloc(PyObject* ignore)
+{
+ Py_FatalError("deallocating <dummy key>");
+}
+
+static PyTypeObject _PySetDummy_Type = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ "<dummy key> type",
+ 0,
+ 0,
+ dummy_dealloc, /*tp_dealloc*/ /*never called*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ 0, /*tp_reserved*/
+ dummy_repr, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash */
+ 0, /*tp_call */
+ 0, /*tp_str */
+ 0, /*tp_getattro */
+ 0, /*tp_setattro */
+ 0, /*tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /*tp_flags */
+};
+
+static PyObject _dummy_struct = {
+ _PyObject_EXTRA_INIT
+ 2, &_PySetDummy_Type
+};
+