diff options
Diffstat (limited to 'Objects/setobject.c')
-rw-r--r-- | Objects/setobject.c | 615 |
1 files changed, 337 insertions, 278 deletions
diff --git a/Objects/setobject.c b/Objects/setobject.c index 4ef692d..6dd403f 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -26,7 +26,6 @@ #include "Python.h" #include "structmember.h" -#include "stringlib/eq.h" /* Object used as dummy key to fill deleted entries */ static PyObject _dummy_struct; @@ -48,19 +47,20 @@ static PyObject _dummy_struct; static setentry * set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) { - setentry *table = so->table; - setentry *freeslot = NULL; + setentry *table; setentry *entry; - size_t perturb = hash; + size_t perturb; size_t mask = so->mask; size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */ size_t j; int cmp; - entry = &table[i]; + entry = &so->table[i]; if (entry->key == NULL) return entry; + perturb = hash; + while (1) { if (entry->hash == hash) { PyObject *startkey = entry->key; @@ -70,8 +70,9 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) return entry; if (PyUnicode_CheckExact(startkey) && PyUnicode_CheckExact(key) - && unicode_eq(startkey, key)) + && _PyUnicode_EQ(startkey, key)) return entry; + table = so->table; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); Py_DECREF(startkey); @@ -83,14 +84,12 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) return entry; mask = so->mask; /* help avoid a register spill */ } - if (entry->hash == -1 && freeslot == NULL) - freeslot = entry; if (i + LINEAR_PROBES <= mask) { for (j = 0 ; j < LINEAR_PROBES ; j++) { entry++; - if (entry->key == NULL) - goto found_null; + if (entry->hash == 0 && entry->key == NULL) + return entry; if (entry->hash == hash) { PyObject *startkey = entry->key; assert(startkey != dummy); @@ -98,8 +97,9 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) return entry; if (PyUnicode_CheckExact(startkey) && PyUnicode_CheckExact(key) - && unicode_eq(startkey, key)) + && _PyUnicode_EQ(startkey, key)) return entry; + table = so->table; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); Py_DECREF(startkey); @@ -111,7 +111,104 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) return entry; mask = so->mask; } - if (entry->hash == -1 && freeslot == NULL) + } + } + + perturb >>= PERTURB_SHIFT; + i = (i * 5 + 1 + perturb) & mask; + + entry = &so->table[i]; + if (entry->key == NULL) + return entry; + } +} + +static int set_table_resize(PySetObject *, Py_ssize_t); + +static int +set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash) +{ + setentry *table; + setentry *freeslot; + setentry *entry; + size_t perturb; + size_t mask; + size_t i; /* Unsigned for defined overflow behavior */ + size_t j; + int cmp; + + /* Pre-increment is necessary to prevent arbitrary code in the rich + comparison from deallocating the key just before the insertion. */ + Py_INCREF(key); + + restart: + + mask = so->mask; + i = (size_t)hash & mask; + + entry = &so->table[i]; + if (entry->key == NULL) + goto found_unused; + + freeslot = NULL; + perturb = hash; + + while (1) { + if (entry->hash == hash) { + PyObject *startkey = entry->key; + /* startkey cannot be a dummy because the dummy hash field is -1 */ + assert(startkey != dummy); + if (startkey == key) + goto found_active; + if (PyUnicode_CheckExact(startkey) + && PyUnicode_CheckExact(key) + && _PyUnicode_EQ(startkey, key)) + goto found_active; + table = so->table; + Py_INCREF(startkey); + cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp > 0) /* likely */ + goto found_active; + if (cmp < 0) + goto comparison_error; + /* Continuing the search from the current entry only makes + sense if the table and entry are unchanged; otherwise, + we have to restart from the beginning */ + if (table != so->table || entry->key != startkey) + goto restart; + mask = so->mask; /* help avoid a register spill */ + } + else if (entry->hash == -1 && freeslot == NULL) + freeslot = entry; + + if (i + LINEAR_PROBES <= mask) { + for (j = 0 ; j < LINEAR_PROBES ; j++) { + entry++; + if (entry->hash == 0 && entry->key == NULL) + goto found_unused_or_dummy; + if (entry->hash == hash) { + PyObject *startkey = entry->key; + assert(startkey != dummy); + if (startkey == key) + goto found_active; + if (PyUnicode_CheckExact(startkey) + && PyUnicode_CheckExact(key) + && _PyUnicode_EQ(startkey, key)) + goto found_active; + table = so->table; + Py_INCREF(startkey); + cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp > 0) + goto found_active; + if (cmp < 0) + goto comparison_error; + if (table != so->table || entry->key != startkey) + goto restart; + mask = so->mask; + } + else if (entry->hash == -1 && freeslot == NULL) freeslot = entry; } } @@ -119,29 +216,51 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) perturb >>= PERTURB_SHIFT; i = (i * 5 + 1 + perturb) & mask; - entry = &table[i]; + entry = &so->table[i]; if (entry->key == NULL) - goto found_null; + goto found_unused_or_dummy; } - found_null: - return freeslot == NULL ? entry : freeslot; + + found_unused_or_dummy: + if (freeslot == NULL) + goto found_unused; + so->used++; + freeslot->key = key; + freeslot->hash = hash; + return 0; + + found_unused: + so->fill++; + so->used++; + entry->key = key; + entry->hash = hash; + if ((size_t)so->fill*3 < mask*2) + return 0; + return set_table_resize(so, so->used); + + found_active: + Py_DECREF(key); + return 0; + + comparison_error: + Py_DECREF(key); + return -1; } /* Internal routine used by set_table_resize() to insert an item which is known to be absent from the set. This routine also assumes that the set contains no deleted entries. Besides the performance benefit, -using set_insert_clean() in set_table_resize() is dangerous (SF bug #1456209). -Note that no refcounts are changed by this routine; if needed, the caller -is responsible for incref'ing `key`. +there is also safety benefit since using set_add_entry() risks making +a callback in the middle of a set_table_resize(), see issue 1456209. +The caller is responsible for updating the key's reference count and +the setobject's fill and used fields. */ static void -set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) +set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash) { - setentry *table = so->table; setentry *entry; size_t perturb = hash; - size_t mask = (size_t)so->mask; size_t i = (size_t)hash & mask; size_t j; @@ -162,45 +281,11 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) found_null: entry->key = key; entry->hash = hash; - so->fill++; - so->used++; } /* ======== End logic for probing the hash table ========================== */ /* ======================================================================== */ - -/* -Internal routine to insert a new key into the table. -Used by the public insert routine. -Eats a reference to key. -*/ -static int -set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash) -{ - setentry *entry; - - entry = set_lookkey(so, key, hash); - if (entry == NULL) - return -1; - if (entry->key == NULL) { - /* UNUSED */ - entry->key = key; - entry->hash = hash; - so->fill++; - so->used++; - } else if (entry->key == dummy) { - /* DUMMY */ - entry->key = key; - entry->hash = hash; - so->used++; - } else { - /* ACTIVE */ - Py_DECREF(key); - } - return 0; -} - /* Restructure the table by allocating a new table and reinserting all keys again. When entries have been deleted, the new table may @@ -213,10 +298,13 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) setentry *oldtable, *newtable, *entry; Py_ssize_t oldfill = so->fill; Py_ssize_t oldused = so->used; + Py_ssize_t oldmask = so->mask; + size_t newmask; int is_oldtable_malloced; setentry small_copy[PySet_MINSIZE]; assert(minused >= 0); + minused = (minused > 50000) ? minused * 2 : minused * 4; /* Find the smallest table size > minused. */ /* XXX speed-up with intrinsics */ @@ -264,25 +352,24 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) /* Make the set empty, using the new table. */ assert(newtable != oldtable); memset(newtable, 0, sizeof(setentry) * newsize); - so->fill = 0; - so->used = 0; + so->fill = oldused; + so->used = oldused; so->mask = newsize - 1; so->table = newtable; /* Copy the data over; this is refcount-neutral for active entries; dummy entries aren't copied over, of course */ + newmask = (size_t)so->mask; if (oldfill == oldused) { - for (entry = oldtable; oldused > 0; entry++) { + for (entry = oldtable; entry <= oldtable + oldmask; entry++) { if (entry->key != NULL) { - oldused--; - set_insert_clean(so, entry->key, entry->hash); + set_insert_clean(newtable, newmask, entry->key, entry->hash); } } } else { - for (entry = oldtable; oldused > 0; entry++) { + for (entry = oldtable; entry <= oldtable + oldmask; entry++) { if (entry->key != NULL && entry->key != dummy) { - oldused--; - set_insert_clean(so, entry->key, entry->hash); + set_insert_clean(newtable, newmask, entry->key, entry->hash); } } } @@ -292,31 +379,42 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) return 0; } -/* CAUTION: set_add_key/entry() must guarantee it won't resize the table */ +static int +set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash) +{ + setentry *entry; + + entry = set_lookkey(so, key, hash); + if (entry != NULL) + return entry->key != NULL; + return -1; +} + +#define DISCARD_NOTFOUND 0 +#define DISCARD_FOUND 1 static int -set_add_entry(PySetObject *so, setentry *entry) +set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash) { - Py_ssize_t n_used; - PyObject *key = entry->key; - Py_hash_t hash = entry->hash; + setentry *entry; + PyObject *old_key; - assert(so->fill <= so->mask); /* at least one empty slot */ - n_used = so->used; - Py_INCREF(key); - if (set_insert_key(so, key, hash)) { - Py_DECREF(key); + entry = set_lookkey(so, key, hash); + if (entry == NULL) return -1; - } - if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2)) - return 0; - return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4); + if (entry->key == NULL) + return DISCARD_NOTFOUND; + old_key = entry->key; + entry->key = dummy; + entry->hash = -1; + so->used--; + Py_DECREF(old_key); + return DISCARD_FOUND; } static int set_add_key(PySetObject *so, PyObject *key) { - setentry entry; Py_hash_t hash; if (!PyUnicode_CheckExact(key) || @@ -325,50 +423,35 @@ set_add_key(PySetObject *so, PyObject *key) if (hash == -1) return -1; } - entry.key = key; - entry.hash = hash; - return set_add_entry(so, &entry); + return set_add_entry(so, key, hash); } -#define DISCARD_NOTFOUND 0 -#define DISCARD_FOUND 1 - static int -set_discard_entry(PySetObject *so, setentry *oldentry) +set_contains_key(PySetObject *so, PyObject *key) { - setentry *entry; - PyObject *old_key; + Py_hash_t hash; - entry = set_lookkey(so, oldentry->key, oldentry->hash); - if (entry == NULL) - return -1; - if (entry->key == NULL || entry->key == dummy) - return DISCARD_NOTFOUND; - old_key = entry->key; - entry->key = dummy; - entry->hash = -1; - so->used--; - Py_DECREF(old_key); - return DISCARD_FOUND; + if (!PyUnicode_CheckExact(key) || + (hash = ((PyASCIIObject *) key)->hash) == -1) { + hash = PyObject_Hash(key); + if (hash == -1) + return -1; + } + return set_contains_entry(so, key, hash); } static int set_discard_key(PySetObject *so, PyObject *key) { - setentry entry; Py_hash_t hash; - assert (PyAnySet_Check(so)); - if (!PyUnicode_CheckExact(key) || (hash = ((PyASCIIObject *) key)->hash) == -1) { hash = PyObject_Hash(key); if (hash == -1) return -1; } - entry.key = key; - entry.hash = hash; - return set_discard_entry(so, &entry); + return set_discard_entry(so, key, hash); } static void @@ -449,20 +532,22 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr) { Py_ssize_t i; Py_ssize_t mask; - setentry *table; + setentry *entry; assert (PyAnySet_Check(so)); i = *pos_ptr; assert(i >= 0); - table = so->table; mask = so->mask; - while (i <= mask && (table[i].key == NULL || table[i].key == dummy)) + entry = &so->table[i]; + while (i <= mask && (entry->key == NULL || entry->key == dummy)) { i++; + entry++; + } *pos_ptr = i+1; if (i > mask) return 0; - assert(table[i].key != NULL); - *entry_ptr = &table[i]; + assert(entry != NULL); + *entry_ptr = entry; return 1; } @@ -560,8 +645,8 @@ set_merge(PySetObject *so, PyObject *otherset) * incrementally resizing as we insert new keys. Expect * that there will be no (or few) overlapping keys. */ - if ((so->fill + other->used)*3 >= (so->mask+1)*2) { - if (set_table_resize(so, (so->used + other->used)*2) != 0) + if ((so->fill + other->used)*3 >= so->mask*2) { + if (set_table_resize(so, so->used + other->used) != 0) return -1; } so_entry = so->table; @@ -586,11 +671,15 @@ set_merge(PySetObject *so, PyObject *otherset) /* If our table is empty, we can use set_insert_clean() */ if (so->fill == 0) { - for (i = 0; i <= other->mask; i++, other_entry++) { + setentry *newtable = so->table; + size_t newmask = (size_t)so->mask; + so->fill = other->used; + so->used = other->used; + for (i = other->mask + 1; i > 0 ; i--, other_entry++) { key = other_entry->key; if (key != NULL && key != dummy) { Py_INCREF(key); - set_insert_clean(so, key, other_entry->hash); + set_insert_clean(newtable, newmask, key, other_entry->hash); } } return 0; @@ -601,46 +690,13 @@ set_merge(PySetObject *so, PyObject *otherset) other_entry = &other->table[i]; key = other_entry->key; if (key != NULL && key != dummy) { - Py_INCREF(key); - if (set_insert_key(so, key, other_entry->hash)) { - Py_DECREF(key); + if (set_add_entry(so, key, other_entry->hash)) return -1; - } } } return 0; } -static int -set_contains_entry(PySetObject *so, setentry *entry) -{ - PyObject *key; - setentry *lu_entry; - - lu_entry = set_lookkey(so, entry->key, entry->hash); - if (lu_entry == NULL) - return -1; - key = lu_entry->key; - return key != NULL && key != dummy; -} - -static int -set_contains_key(PySetObject *so, PyObject *key) -{ - setentry entry; - Py_hash_t hash; - - if (!PyUnicode_CheckExact(key) || - (hash = ((PyASCIIObject *) key)->hash) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) - return -1; - } - entry.key = key; - entry.hash = hash; - return set_contains_entry(so, &entry); -} - static PyObject * set_pop(PySetObject *so) { @@ -682,43 +738,64 @@ set_traverse(PySetObject *so, visitproc visit, void *arg) return 0; } -static Py_hash_t -frozenset_hash(PyObject *self) +/* Work to increase the bit dispersion for closely spaced hash values. + This is important because some use cases have many combinations of a + small number of elements with nearby hashes so that many distinct + combinations collapse to only a handful of distinct hash values. */ + +static Py_uhash_t +_shuffle_bits(Py_uhash_t h) { - /* Most of the constants in this hash algorithm are randomly choosen - large primes with "interesting bit patterns" and that passed - tests for good collision statistics on a variety of problematic - datasets such as: + return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL; +} - ps = [] - for r in range(21): - ps += itertools.combinations(range(20), r) - num_distinct_hashes = len({hash(frozenset(s)) for s in ps}) +/* Most of the constants in this hash algorithm are randomly chosen + large primes with "interesting bit patterns" and that passed tests + for good collision statistics on a variety of problematic datasets + including powersets and graph structures (such as David Eppstein's + graph recipes in Lib/test/test_set.py) */ - */ +static Py_hash_t +frozenset_hash(PyObject *self) +{ PySetObject *so = (PySetObject *)self; - Py_uhash_t h, hash = 1927868237UL; + Py_uhash_t hash = 0; setentry *entry; - Py_ssize_t pos = 0; if (so->hash != -1) return so->hash; - hash *= (Py_uhash_t)PySet_GET_SIZE(self) + 1; - while (set_next(so, &pos, &entry)) { - /* Work to increase the bit dispersion for closely spaced hash - values. This is important because some use cases have many - combinations of a small number of elements with nearby - hashes so that many distinct combinations collapse to only - a handful of distinct hash values. */ - h = entry->hash; - hash ^= ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL; - } - /* Make the final result spread-out in a different pattern - than the algorithm for tuples or other python objects. */ + /* Xor-in shuffled bits from every entry's hash field because xor is + commutative and a frozenset hash should be independent of order. + + For speed, include null entries and dummy entries and then + subtract out their effect afterwards so that the final hash + depends only on active entries. This allows the code to be + vectorized by the compiler and it saves the unpredictable + branches that would arise when trying to exclude null and dummy + entries on every iteration. */ + + for (entry = so->table; entry <= &so->table[so->mask]; entry++) + hash ^= _shuffle_bits(entry->hash); + + /* Remove the effect of an odd number of NULL entries */ + if ((so->mask + 1 - so->fill) & 1) + hash ^= _shuffle_bits(0); + + /* Remove the effect of an odd number of dummy entries */ + if ((so->fill - so->used) & 1) + hash ^= _shuffle_bits(-1); + + /* Factor in the number of active entries */ + hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL; + + /* Disperse patterns arising in nested frozensets */ hash = hash * 69069U + 907133923UL; + + /* -1 is reserved as an error code */ if (hash == (Py_uhash_t)-1) hash = 590923713UL; + so->hash = hash; return hash; } @@ -865,7 +942,7 @@ PyTypeObject PySetIter_Type = { PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 0, /* tp_doc */ (traverseproc)setiter_traverse, /* tp_traverse */ 0, /* tp_clear */ @@ -910,18 +987,14 @@ set_update_internal(PySetObject *so, PyObject *other) * incrementally resizing as we insert new keys. Expect * that there will be no (or few) overlapping keys. */ - if (dictsize == -1) + if (dictsize < 0) return -1; - if ((so->fill + dictsize)*3 >= (so->mask+1)*2) { - if (set_table_resize(so, (so->used + dictsize)*2) != 0) + if ((so->fill + dictsize)*3 >= so->mask*2) { + if (set_table_resize(so, so->used + dictsize) != 0) return -1; } while (_PyDict_Next(other, &pos, &key, &value, &hash)) { - setentry an_entry; - - an_entry.hash = hash; - an_entry.key = key; - if (set_add_entry(so, &an_entry)) + if (set_add_entry(so, key, hash)) return -1; } return 0; @@ -970,9 +1043,8 @@ PyDoc_STRVAR(update_doc, static PyObject * make_new_set(PyTypeObject *type, PyObject *iterable) { - PySetObject *so = NULL; + PySetObject *so; - /* create PySetObject structure */ so = (PySetObject *)type->tp_alloc(type, 0); if (so == NULL) return NULL; @@ -1015,7 +1087,8 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *iterable = NULL, *result; - if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset()", kwds)) + if (kwds != NULL && type == &PyFrozenSet_Type + && !_PyArg_NoKeywords("frozenset()", kwds)) return NULL; if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable)) @@ -1042,24 +1115,9 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return emptyfrozenset; } -int -PySet_ClearFreeList(void) -{ - return 0; -} - -void -PySet_Fini(void) -{ - Py_CLEAR(emptyfrozenset); -} - static PyObject * set_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - if (type == &PySet_Type && !_PyArg_NoKeywords("set()", kwds)) - return NULL; - return make_new_set(type, NULL); } @@ -1201,6 +1259,8 @@ set_intersection(PySetObject *so, PyObject *other) { PySetObject *result; PyObject *key, *it, *tmp; + Py_hash_t hash; + int rv; if ((PyObject *)so == other) return set_copy(so); @@ -1220,13 +1280,15 @@ set_intersection(PySetObject *so, PyObject *other) } while (set_next((PySetObject *)other, &pos, &entry)) { - int rv = set_contains_entry(so, entry); - if (rv == -1) { + key = entry->key; + hash = entry->hash; + rv = set_contains_entry(so, key, hash); + if (rv < 0) { Py_DECREF(result); return NULL; } if (rv) { - if (set_add_entry(result, entry)) { + if (set_add_entry(result, key, hash)) { Py_DECREF(result); return NULL; } @@ -1242,32 +1304,15 @@ set_intersection(PySetObject *so, PyObject *other) } while ((key = PyIter_Next(it)) != NULL) { - int rv; - setentry entry; - Py_hash_t hash = PyObject_Hash(key); - - if (hash == -1) { - Py_DECREF(it); - Py_DECREF(result); - Py_DECREF(key); - return NULL; - } - entry.hash = hash; - entry.key = key; - rv = set_contains_entry(so, &entry); - if (rv == -1) { - Py_DECREF(it); - Py_DECREF(result); - Py_DECREF(key); - return NULL; - } + hash = PyObject_Hash(key); + if (hash == -1) + goto error; + rv = set_contains_entry(so, key, hash); + if (rv < 0) + goto error; if (rv) { - if (set_add_entry(result, &entry)) { - Py_DECREF(it); - Py_DECREF(result); - Py_DECREF(key); - return NULL; - } + if (set_add_entry(result, key, hash)) + goto error; } Py_DECREF(key); } @@ -1277,6 +1322,11 @@ set_intersection(PySetObject *so, PyObject *other) return NULL; } return (PyObject *)result; + error: + Py_DECREF(it); + Py_DECREF(result); + Py_DECREF(key); + return NULL; } static PyObject * @@ -1363,6 +1413,7 @@ static PyObject * set_isdisjoint(PySetObject *so, PyObject *other) { PyObject *key, *it, *tmp; + int rv; if ((PyObject *)so == other) { if (PySet_GET_SIZE(so) == 0) @@ -1381,8 +1432,8 @@ set_isdisjoint(PySetObject *so, PyObject *other) other = tmp; } while (set_next((PySetObject *)other, &pos, &entry)) { - int rv = set_contains_entry(so, entry); - if (rv == -1) + rv = set_contains_entry(so, entry->key, entry->hash); + if (rv < 0) return NULL; if (rv) Py_RETURN_FALSE; @@ -1395,8 +1446,6 @@ set_isdisjoint(PySetObject *so, PyObject *other) return NULL; while ((key = PyIter_Next(it)) != NULL) { - int rv; - setentry entry; Py_hash_t hash = PyObject_Hash(key); if (hash == -1) { @@ -1404,11 +1453,9 @@ set_isdisjoint(PySetObject *so, PyObject *other) Py_DECREF(it); return NULL; } - entry.hash = hash; - entry.key = key; - rv = set_contains_entry(so, &entry); + rv = set_contains_entry(so, key, hash); Py_DECREF(key); - if (rv == -1) { + if (rv < 0) { Py_DECREF(it); return NULL; } @@ -1437,7 +1484,7 @@ set_difference_update_internal(PySetObject *so, PyObject *other) Py_ssize_t pos = 0; while (set_next((PySetObject *)other, &pos, &entry)) - if (set_discard_entry(so, entry) == -1) + if (set_discard_entry(so, entry->key, entry->hash) < 0) return -1; } else { PyObject *key, *it; @@ -1446,7 +1493,7 @@ set_difference_update_internal(PySetObject *so, PyObject *other) return -1; while ((key = PyIter_Next(it)) != NULL) { - if (set_discard_key(so, key) == -1) { + if (set_discard_key(so, key) < 0) { Py_DECREF(it); Py_DECREF(key); return -1; @@ -1457,10 +1504,10 @@ set_difference_update_internal(PySetObject *so, PyObject *other) if (PyErr_Occurred()) return -1; } - /* If more than 1/5 are dummies, then resize them away. */ - if ((so->fill - so->used) * 5 < so->mask) + /* If more than 1/4th are dummies, then resize them away. */ + if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4) return 0; - return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4); + return set_table_resize(so, so->used); } static PyObject * @@ -1487,7 +1534,7 @@ set_copy_and_difference(PySetObject *so, PyObject *other) result = set_copy(so); if (result == NULL) return NULL; - if (set_difference_update_internal((PySetObject *) result, other) != -1) + if (set_difference_update_internal((PySetObject *) result, other) == 0) return result; Py_DECREF(result); return NULL; @@ -1497,8 +1544,11 @@ static PyObject * set_difference(PySetObject *so, PyObject *other) { PyObject *result; + PyObject *key; + Py_hash_t hash; setentry *entry; Py_ssize_t pos = 0; + int rv; if (!PyAnySet_Check(other) && !PyDict_CheckExact(other)) { return set_copy_and_difference(so, other); @@ -1516,17 +1566,15 @@ set_difference(PySetObject *so, PyObject *other) if (PyDict_CheckExact(other)) { while (set_next(so, &pos, &entry)) { - setentry entrycopy; - int rv; - entrycopy.hash = entry->hash; - entrycopy.key = entry->key; - rv = _PyDict_Contains(other, entry->key, entry->hash); + key = entry->key; + hash = entry->hash; + rv = _PyDict_Contains(other, key, hash); if (rv < 0) { Py_DECREF(result); return NULL; } if (!rv) { - if (set_add_entry((PySetObject *)result, &entrycopy)) { + if (set_add_entry((PySetObject *)result, key, hash)) { Py_DECREF(result); return NULL; } @@ -1537,13 +1585,15 @@ set_difference(PySetObject *so, PyObject *other) /* Iterate over so, checking for common elements in other. */ while (set_next(so, &pos, &entry)) { - int rv = set_contains_entry((PySetObject *)other, entry); - if (rv == -1) { + key = entry->key; + hash = entry->hash; + rv = set_contains_entry((PySetObject *)other, key, hash); + if (rv < 0) { Py_DECREF(result); return NULL; } if (!rv) { - if (set_add_entry((PySetObject *)result, entry)) { + if (set_add_entry((PySetObject *)result, key, hash)) { Py_DECREF(result); return NULL; } @@ -1605,29 +1655,24 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) PySetObject *otherset; PyObject *key; Py_ssize_t pos = 0; + Py_hash_t hash; setentry *entry; + int rv; if ((PyObject *)so == other) return set_clear(so); if (PyDict_CheckExact(other)) { PyObject *value; - int rv; - Py_hash_t hash; while (_PyDict_Next(other, &pos, &key, &value, &hash)) { - setentry an_entry; - Py_INCREF(key); - an_entry.hash = hash; - an_entry.key = key; - - rv = set_discard_entry(so, &an_entry); - if (rv == -1) { + rv = set_discard_entry(so, key, hash); + if (rv < 0) { Py_DECREF(key); return NULL; } if (rv == DISCARD_NOTFOUND) { - if (set_add_entry(so, &an_entry)) { + if (set_add_entry(so, key, hash)) { Py_DECREF(key); return NULL; } @@ -1647,13 +1692,15 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) } while (set_next(otherset, &pos, &entry)) { - int rv = set_discard_entry(so, entry); - if (rv == -1) { + key = entry->key; + hash = entry->hash; + rv = set_discard_entry(so, key, hash); + if (rv < 0) { Py_DECREF(otherset); return NULL; } if (rv == DISCARD_NOTFOUND) { - if (set_add_entry(so, entry)) { + if (set_add_entry(so, key, hash)) { Py_DECREF(otherset); return NULL; } @@ -1715,6 +1762,7 @@ set_issubset(PySetObject *so, PyObject *other) { setentry *entry; Py_ssize_t pos = 0; + int rv; if (!PyAnySet_Check(other)) { PyObject *tmp, *result; @@ -1729,8 +1777,8 @@ set_issubset(PySetObject *so, PyObject *other) Py_RETURN_FALSE; while (set_next(so, &pos, &entry)) { - int rv = set_contains_entry((PySetObject *)other, entry); - if (rv == -1) + rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash); + if (rv < 0) return NULL; if (!rv) Py_RETURN_FALSE; @@ -1821,7 +1869,7 @@ set_contains(PySetObject *so, PyObject *key) int rv; rv = set_contains_key(so, key); - if (rv == -1) { + if (rv < 0) { if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError)) return -1; PyErr_Clear(); @@ -1840,7 +1888,7 @@ set_direct_contains(PySetObject *so, PyObject *key) long result; result = set_contains(so, key); - if (result == -1) + if (result < 0) return NULL; return PyBool_FromLong(result); } @@ -1854,7 +1902,7 @@ set_remove(PySetObject *so, PyObject *key) int rv; rv = set_discard_key(so, key); - if (rv == -1) { + if (rv < 0) { if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError)) return NULL; PyErr_Clear(); @@ -1863,7 +1911,7 @@ set_remove(PySetObject *so, PyObject *key) return NULL; rv = set_discard_key(so, tmpkey); Py_DECREF(tmpkey); - if (rv == -1) + if (rv < 0) return NULL; } @@ -1886,7 +1934,7 @@ set_discard(PySetObject *so, PyObject *key) int rv; rv = set_discard_key(so, key); - if (rv == -1) { + if (rv < 0) { if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError)) return NULL; PyErr_Clear(); @@ -1895,7 +1943,7 @@ set_discard(PySetObject *so, PyObject *key) return NULL; rv = set_discard_key(so, tmpkey); Py_DECREF(tmpkey); - if (rv == -1) + if (rv < 0) return NULL; } Py_RETURN_NONE; @@ -1949,13 +1997,12 @@ set_init(PySetObject *self, PyObject *args, PyObject *kwds) { PyObject *iterable = NULL; - if (!PyAnySet_Check(self)) - return -1; - if (PySet_Check(self) && !_PyArg_NoKeywords("set()", kwds)) + if (kwds != NULL && !_PyArg_NoKeywords("set()", kwds)) return -1; if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable)) return -1; - set_clear_internal(self); + if (self->fill) + set_clear_internal(self); self->hash = -1; if (iterable == NULL) return 0; @@ -2122,7 +2169,7 @@ static PyMethodDef frozenset_methods[] = { copy_doc}, {"difference", (PyCFunction)set_difference_multi, METH_VARARGS, difference_doc}, - {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS, + {"intersection", (PyCFunction)set_intersection_multi, METH_VARARGS, intersection_doc}, {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O, isdisjoint_doc}, @@ -2193,7 +2240,7 @@ PyTypeObject PyFrozenSet_Type = { (traverseproc)set_traverse, /* tp_traverse */ (inquiry)set_clear_internal, /* tp_clear */ (richcmpfunc)set_richcompare, /* tp_richcompare */ - offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */ + offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */ (getiterfunc)set_iter, /* tp_iter */ 0, /* tp_iternext */ frozenset_methods, /* tp_methods */ @@ -2277,6 +2324,18 @@ PySet_Add(PyObject *anyset, PyObject *key) } int +PySet_ClearFreeList(void) +{ + return 0; +} + +void +PySet_Fini(void) +{ + Py_CLEAR(emptyfrozenset); +} + +int _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash) { setentry *entry; |