diff options
author | Raymond Hettinger <python@rcn.com> | 2005-07-31 01:16:36 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2005-07-31 01:16:36 (GMT) |
commit | 9f1a6796eb83a2884df5fd93487634e46d8830a7 (patch) | |
tree | 53e9ef211666eb6f534d0d5040969146fdaeee0e | |
parent | fe256431922c44375fa1e6c21bca69f5cb65481d (diff) | |
download | cpython-9f1a6796eb83a2884df5fd93487634e46d8830a7.zip cpython-9f1a6796eb83a2884df5fd93487634e46d8830a7.tar.gz cpython-9f1a6796eb83a2884df5fd93487634e46d8830a7.tar.bz2 |
Revised the set() and frozenset() implementaion to use its own internal
data structure instead of using dictionaries. Reduces memory consumption
by 1/3 and provides modest speed-ups for most set operations.
-rw-r--r-- | Include/setobject.h | 58 | ||||
-rw-r--r-- | Misc/NEWS | 4 | ||||
-rw-r--r-- | Objects/setobject.c | 1074 |
3 files changed, 912 insertions, 224 deletions
diff --git a/Include/setobject.h b/Include/setobject.h index cc2d683..8569ed1 100644 --- a/Include/setobject.h +++ b/Include/setobject.h @@ -1,4 +1,3 @@ - /* Set object interface */ #ifndef Py_SETOBJECT_H @@ -7,28 +6,61 @@ extern "C" { #endif + /* -This data structure is shared by set and frozenset objects. +There are three kinds of slots in the table: + +1. Unused: key == NULL +2. Active: key != NULL and key != dummy +3. Dummy: key == dummy */ +#define PySet_MINSIZE 8 + typedef struct { + long hash; /* cached hash code for the entry key */ + PyObject *key; +} setentry; + + +/* +This data structure is shared by set and frozenset objects. +*/ + +typedef struct _setobject PySetObject; +struct _setobject { PyObject_HEAD - PyObject *data; - long hash; /* only used by frozenset objects */ - PyObject *weakreflist; /* List of weak references */ - - /* Invariants: - * data is a dictionary whose values are all True. - * data points to the same dict for the whole life of the set. - * For frozensets only: - * data is immutable. - * hash is the hash of the frozenset or -1 if not computed yet. + + int fill; /* # Active + # Dummy */ + int used; /* # Active */ + + /* The table contains mask + 1 slots, and that's a power of 2. + * We store the mask instead of the size because the mask is more + * frequently needed. */ -} PySetObject; + int mask; + + /* table points to smalltable for small tables, else to + * additional malloc'ed memory. table is never NULL! This rule + * saves repeated runtime null-tests in the workhorse getitem and + * setitem calls. + */ + setentry *table; + setentry *(*lookup)(PySetObject *so, PyObject *key, long hash); + setentry smalltable[PySet_MINSIZE]; + + long hash; /* only used by frozenset objects */ + PyObject *weakreflist; /* List of weak references */ +}; PyAPI_DATA(PyTypeObject) PySet_Type; PyAPI_DATA(PyTypeObject) PyFrozenSet_Type; +/* Invariants for frozensets only: + * data is immutable. + * hash is the hash of the frozenset or -1 if not computed yet. + */ + #define PyFrozenSet_CheckExact(ob) ((ob)->ob_type == &PyFrozenSet_Type) #define PyAnySet_Check(ob) \ ((ob)->ob_type == &PySet_Type || (ob)->ob_type == &PyFrozenSet_Type || \ @@ -12,6 +12,10 @@ What's New in Python 2.5 alpha 1? Core and builtins ----------------- +- The implementation of set() and frozenset() was revised to use its + own internal data structure. Memory consumption is reduced by 1/3 + and there are modest speed-ups as well. The API is unchanged. + - SF bug #1238681: freed pointer is used in longobject.c:long_pow(). - SF bug #1229429: PyObject_CallMethod failed to decrement some diff --git a/Objects/setobject.c b/Objects/setobject.c index f9448c9..e922b6e 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1,4 +1,641 @@ + +/* Set object implementation using a hash table + Functions adapted from dictobject.c +*/ + #include "Python.h" + +/* This must be >= 1. */ +#define PERTURB_SHIFT 5 + +/* Object used as dummy key to fill deleted entries */ +static PyObject *dummy; /* Initialized by first call to make_new_set() */ + +#define EMPTY_TO_MINSIZE(so) do { \ + memset((so)->smalltable, 0, sizeof((so)->smalltable)); \ + (so)->used = (so)->fill = 0; \ + (so)->table = (so)->smalltable; \ + (so)->mask = PySet_MINSIZE - 1; \ + } while(0) + + +/* +The basic lookup function used by all operations. +This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. +Open addressing is preferred over chaining since the link overhead for +chaining would be substantial (100% with typical malloc overhead). + +The initial probe index is computed as hash mod the table size. Subsequent +probe indices are computed as explained earlier. + +All arithmetic on hash should ignore overflow. + +This function must never return NULL; failures are indicated by returning +a setentry* for which the value field is NULL. Exceptions are never +reported by this function, and outstanding exceptions are maintained. +*/ + +static setentry * +set_lookkey(PySetObject *so, PyObject *key, register long hash) +{ + register int i; + register unsigned int perturb; + register setentry *freeslot; + register unsigned int mask = so->mask; + setentry *ep0 = so->table; + register setentry *ep; + register int restore_error; + register int checked_error; + register int cmp; + PyObject *err_type, *err_value, *err_tb; + PyObject *startkey; + + i = hash & mask; + ep = &ep0[i]; + if (ep->key == NULL || ep->key == key) + return ep; + + restore_error = checked_error = 0; + if (ep->key == dummy) + freeslot = ep; + else { + if (ep->hash == hash) { + /* error can't have been checked yet */ + checked_error = 1; + if (PyErr_Occurred()) { + restore_error = 1; + PyErr_Fetch(&err_type, &err_value, &err_tb); + } + startkey = ep->key; + cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + if (cmp < 0) + PyErr_Clear(); + if (ep0 == so->table && ep->key == startkey) { + if (cmp > 0) + goto Done; + } + else { + /* The compare did major nasty stuff to the + * set: start over. + */ + ep = set_lookkey(so, key, hash); + goto Done; + } + } + freeslot = NULL; + } + + /* In the loop, key == dummy is by far (factor of 100s) the + least likely outcome, so test for that last. */ + for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { + i = (i << 2) + i + perturb + 1; + ep = &ep0[i & mask]; + if (ep->key == NULL) { + if (freeslot != NULL) + ep = freeslot; + break; + } + if (ep->key == key) + break; + if (ep->hash == hash && ep->key != dummy) { + if (!checked_error) { + checked_error = 1; + if (PyErr_Occurred()) { + restore_error = 1; + PyErr_Fetch(&err_type, &err_value, + &err_tb); + } + } + startkey = ep->key; + cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + if (cmp < 0) + PyErr_Clear(); + if (ep0 == so->table && ep->key == startkey) { + if (cmp > 0) + break; + } + else { + /* The compare did major nasty stuff to the + * set: start over. + */ + ep = set_lookkey(so, key, hash); + break; + } + } + else if (ep->key == dummy && freeslot == NULL) + freeslot = ep; + } + +Done: + if (restore_error) + PyErr_Restore(err_type, err_value, err_tb); + return ep; +} + +/* + * Hacked up version of set_lookkey which can assume keys are always strings; + * this assumption allows testing for errors during PyObject_Compare() to + * be dropped; string-string comparisons never raise exceptions. This also + * means we don't need to go through PyObject_Compare(); we can always use + * _PyString_Eq directly. + * + * This is valuable because the general-case error handling in set_lookkey() is + * expensive, and sets with pure-string keys may be very common. + */ +static setentry * +set_lookkey_string(PySetObject *so, PyObject *key, register long hash) +{ + register int i; + register unsigned int perturb; + register setentry *freeslot; + register unsigned int mask = so->mask; + setentry *ep0 = so->table; + register setentry *ep; + + /* Make sure this function doesn't have to handle non-string keys, + including subclasses of str; e.g., one reason to subclass + strings is to override __eq__, and for speed we don't cater to + that here. */ + if (!PyString_CheckExact(key)) { + so->lookup = set_lookkey; + return set_lookkey(so, key, hash); + } + i = hash & mask; + ep = &ep0[i]; + if (ep->key == NULL || ep->key == key) + return ep; + if (ep->key == dummy) + freeslot = ep; + else { + if (ep->hash == hash && _PyString_Eq(ep->key, key)) + return ep; + freeslot = NULL; + } + + /* In the loop, key == dummy is by far (factor of 100s) the + least likely outcome, so test for that last. */ + for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { + i = (i << 2) + i + perturb + 1; + ep = &ep0[i & mask]; + if (ep->key == NULL) + return freeslot == NULL ? ep : freeslot; + if (ep->key == key + || (ep->hash == hash + && ep->key != dummy + && _PyString_Eq(ep->key, key))) + return ep; + if (ep->key == dummy && freeslot == NULL) + freeslot = ep; + } +} + +/* +Internal routine to insert a new item into the table. +Used both by the internal resize routine and by the public insert routine. +Eats a reference to key. +*/ +static void +set_insert_key(register PySetObject *so, PyObject *key, long hash) +{ + register setentry *ep; + typedef setentry *(*lookupfunc)(PySetObject *, PyObject *, long); + + assert(so->lookup != NULL); + + ep = so->lookup(so, key, hash); + if (ep->key == NULL) { + /* UNUSED */ + so->fill++; + ep->key = key; + ep->hash = hash; + so->used++; + } else if (ep->key == dummy) { + /* DUMMY */ + ep->key = key; + ep->hash = hash; + so->used++; + Py_DECREF(dummy); + } else { + /* ACTIVE */ + Py_DECREF(key); + } +} + +/* +Restructure the table by allocating a new table and reinserting all +items again. When entries have been deleted, the new table may +actually be smaller than the old one. +*/ +static int +set_table_resize(PySetObject *so, int minused) +{ + int newsize; + setentry *oldtable, *newtable, *ep; + int i; + int is_oldtable_malloced; + setentry small_copy[PySet_MINSIZE]; + + assert(minused >= 0); + + /* Find the smallest table size > minused. */ + for (newsize = PySet_MINSIZE; + newsize <= minused && newsize > 0; + newsize <<= 1) + ; + if (newsize <= 0) { + PyErr_NoMemory(); + return -1; + } + + /* Get space for a new table. */ + oldtable = so->table; + assert(oldtable != NULL); + is_oldtable_malloced = oldtable != so->smalltable; + + if (newsize == PySet_MINSIZE) { + /* A large table is shrinking, or we can't get any smaller. */ + newtable = so->smalltable; + if (newtable == oldtable) { + if (so->fill == so->used) { + /* No dummies, so no point doing anything. */ + return 0; + } + /* We're not going to resize it, but rebuild the + table anyway to purge old dummy entries. + Subtle: This is *necessary* if fill==size, + as set_lookkey needs at least one virgin slot to + terminate failing searches. If fill < size, it's + merely desirable, as dummies slow searches. */ + assert(so->fill > so->used); + memcpy(small_copy, oldtable, sizeof(small_copy)); + oldtable = small_copy; + } + } + else { + newtable = PyMem_NEW(setentry, newsize); + if (newtable == NULL) { + PyErr_NoMemory(); + return -1; + } + } + + /* Make the set empty, using the new table. */ + assert(newtable != oldtable); + so->table = newtable; + so->mask = newsize - 1; + memset(newtable, 0, sizeof(setentry) * newsize); + so->used = 0; + i = so->fill; + so->fill = 0; + + /* Copy the data over; this is refcount-neutral for active entries; + dummy entries aren't copied over, of course */ + for (ep = oldtable; i > 0; ep++) { + if (ep->key == NULL) { + /* UNUSED */ + ; + } else if (ep->key == dummy) { + /* DUMMY */ + --i; + assert(ep->key == dummy); + Py_DECREF(ep->key); + } else { + /* ACTIVE */ + --i; + set_insert_key(so, ep->key, ep->hash); + } + } + + if (is_oldtable_malloced) + PyMem_DEL(oldtable); + return 0; +} + +/*** Internal functions (derived from public dictionary api functions ) ***/ + + +/* CAUTION: set_add_internal() must guarantee that it won't resize the table */ +static int +set_add_internal(register PySetObject *so, PyObject *key) +{ + register long hash; + register int n_used; + + if (PyString_CheckExact(key)) { + hash = ((PyStringObject *)key)->ob_shash; + if (hash == -1) + hash = PyObject_Hash(key); + } else { + hash = PyObject_Hash(key); + if (hash == -1) + return -1; + } + assert(so->fill <= so->mask); /* at least one empty slot */ + n_used = so->used; + Py_INCREF(key); + set_insert_key(so, key, hash); + if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2)) + return 0; + return set_table_resize(so, so->used*(so->used>50000 ? 2 : 4)); +} + +#define DISCARD_NOTFOUND 0 +#define DISCARD_FOUND 1 + +static int +set_discard_internal(PySetObject *so, PyObject *key) +{ + register long hash; + register setentry *ep; + PyObject *old_key; + + assert (PyAnySet_Check(so)); + if (!PyString_CheckExact(key) || + (hash = ((PyStringObject *) key)->ob_shash) == -1) { + hash = PyObject_Hash(key); + if (hash == -1) + return -1; + } + ep = (so->lookup)(so, key, hash); + if (ep->key == NULL || ep->key == dummy) + return DISCARD_NOTFOUND; + old_key = ep->key; + Py_INCREF(dummy); + ep->key = dummy; + so->used--; + Py_DECREF(old_key); + return DISCARD_FOUND; +} + +static void +set_clear_internal(PySetObject *so) +{ + setentry *ep, *table; + int table_is_malloced; + int fill; + setentry small_copy[PySet_MINSIZE]; +#ifdef Py_DEBUG + int i, n; +#endif + + assert (PyAnySet_Check(so)); +#ifdef Py_DEBUG + n = so->mask + 1; + i = 0; +#endif + + table = so->table; + assert(table != NULL); + table_is_malloced = table != so->smalltable; + + /* This is delicate. During the process of clearing the set, + * decrefs can cause the set to mutate. To avoid fatal confusion + * (voice of experience), we have to make the set empty before + * clearing the slots, and never refer to anything via mp->ref while + * clearing. + */ + fill = so->fill; + if (table_is_malloced) + EMPTY_TO_MINSIZE(so); + + else if (fill > 0) { + /* It's a small table with something that needs to be cleared. + * Afraid the only safe way is to copy the set entries into + * another small table first. + */ + memcpy(small_copy, table, sizeof(small_copy)); + table = small_copy; + EMPTY_TO_MINSIZE(so); + } + /* else it's a small table that's already empty */ + + /* Now we can finally clear things. If C had refcounts, we could + * assert that the refcount on table is 1 now, i.e. that this function + * has unique access to it, so decref side-effects can't alter it. + */ + for (ep = table; fill > 0; ++ep) { +#ifdef Py_DEBUG + assert(i < n); + ++i; +#endif + if (ep->key) { + --fill; + Py_DECREF(ep->key); + } +#ifdef Py_DEBUG + else + assert(ep->key == NULL || ep->key == dummy); +#endif + } + + if (table_is_malloced) + PyMem_DEL(table); +} + +/* + * Iterate over a set table. Use like so: + * + * int i; + * PyObject *key; + * i = 0; # important! i should not otherwise be changed by you + * while (set_next_internal(yourset, &i, &key)) { + * Refer to borrowed reference in key. + * } + * + * CAUTION: In general, it isn't safe to use set_next_internal in a loop that + * mutates the table. + */ +static int +set_next_internal(PySetObject *so, int *ppos, PyObject **pkey) +{ + register int i, mask; + register setentry *ep; + + assert (PyAnySet_Check(so)); + i = *ppos; + if (i < 0) + return 0; + ep = so->table; + mask = so->mask; + while (i <= mask && (ep[i].key == NULL || ep[i].key == dummy)) + i++; + *ppos = i+1; + if (i > mask) + return 0; + if (pkey) + *pkey = ep[i].key; + return 1; +} + +/* Methods */ + +static int +set_merge_internal(PySetObject *so, PyObject *b) +{ + register PySetObject *other; + register int i; + setentry *entry; + + assert (PyAnySet_Check(so)); + assert (PyAnySet_Check(b)); + + other = (PySetObject*)b; + if (other == so || other->used == 0) + /* a.update(a) or a.update({}); nothing to do */ + return 0; + /* Do one big resize at the start, rather than + * incrementally resizing as we insert new items. Expect + * that there will be no (or few) overlapping keys. + */ + if ((so->fill + other->used)*3 >= (so->mask+1)*2) { + if (set_table_resize(so, (so->used + other->used)*2) != 0) + return -1; + } + for (i = 0; i <= other->mask; i++) { + entry = &other->table[i]; + if (entry->key != NULL && + entry->key != dummy) { + Py_INCREF(entry->key); + set_insert_key(so, entry->key, entry->hash); + } + } + return 0; +} + +static int +set_contains_internal(PySetObject *so, PyObject *key) +{ + long hash; + + if (!PyString_CheckExact(key) || + (hash = ((PyStringObject *) key)->ob_shash) == -1) { + hash = PyObject_Hash(key); + if (hash == -1) + return -1; + } + key = (so->lookup)(so, key, hash)->key; + return key != NULL && key != dummy; +} + +static PyTypeObject PySetIter_Type; /* Forward */ + +/* Set iterator types */ + +typedef struct { + PyObject_HEAD + PySetObject *si_set; /* Set to NULL when iterator is exhausted */ + int si_used; + int si_pos; + long len; +} setiterobject; + +static PyObject * +set_iter(PySetObject *so) +{ + setiterobject *si = PyObject_New(setiterobject, &PySetIter_Type); + if (si == NULL) + return NULL; + Py_INCREF(so); + si->si_set = so; + si->si_used = so->used; + si->si_pos = 0; + si->len = so->used; + return (PyObject *)si; +} + +static void +setiter_dealloc(setiterobject *si) +{ + Py_XDECREF(si->si_set); + PyObject_Del(si); +} + +static int +setiter_len(setiterobject *si) +{ + if (si->si_set != NULL && si->si_used == si->si_set->used) + return si->len; + return 0; +} + +static PySequenceMethods setiter_as_sequence = { + (inquiry)setiter_len, /* sq_length */ + 0, /* sq_concat */ +}; + +static PyObject *setiter_iternextkey(setiterobject *si) +{ + PyObject *key; + register int i, mask; + register setentry *ep; + PySetObject *d = si->si_set; + + if (d == NULL) + return NULL; + assert (PyAnySet_Check(d)); + + if (si->si_used != d->used) { + PyErr_SetString(PyExc_RuntimeError, + "Set changed size during iteration"); + si->si_used = -1; /* Make this state sticky */ + return NULL; + } + + i = si->si_pos; + if (i < 0) + goto fail; + ep = d->table; + mask = d->mask; + while (i <= mask && (ep[i].key == NULL || ep[i].key == dummy)) + i++; + si->si_pos = i+1; + if (i > mask) + goto fail; + si->len--; + key = ep[i].key; + Py_INCREF(key); + return key; + +fail: + Py_DECREF(d); + si->si_set = NULL; + return NULL; +} + +PyTypeObject PySetIter_Type = { + PyObject_HEAD_INIT(&PyType_Type) + 0, /* ob_size */ + "Set-keyiterator", /* tp_name */ + sizeof(setiterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)setiter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + &setiter_as_sequence, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)setiter_iternextkey, /* tp_iternext */ +}; + +/***** Derived functions (table accesses only done with above primitives *****/ + #include "structmember.h" /* set object implementation @@ -6,28 +643,37 @@ derived from sets.py written by Greg V. Wilson, Alex Martelli, Guido van Rossum, Raymond Hettinger, and Tim Peters. - Copyright (c) 2003 Python Software Foundation. + Copyright (c) 2003-5 Python Software Foundation. All rights reserved. */ static PyObject * set_update(PySetObject *so, PyObject *other) { - PyObject *item, *data, *it; + PyObject *item, *it; if (PyAnySet_Check(other)) { - if (PyDict_Merge(so->data, ((PySetObject *)other)->data, 1) == -1) + if (set_merge_internal(so, other) == -1) return NULL; Py_RETURN_NONE; } + if (PyDict_Check(other)) { + PyObject *value, *item; + int pos = 0; + while (PyDict_Next(other, &pos, &item, &value)) { + if (set_add_internal(so, item) == -1) + return NULL; + } + Py_RETURN_NONE; + } + it = PyObject_GetIter(other); if (it == NULL) return NULL; - data = so->data; while ((item = PyIter_Next(it)) != NULL) { - if (PyDict_SetItem(data, item, Py_True) == -1) { + if (set_add_internal(so, item) == -1) { Py_DECREF(it); Py_DECREF(item); return NULL; @@ -46,21 +692,22 @@ PyDoc_STRVAR(update_doc, static PyObject * make_new_set(PyTypeObject *type, PyObject *iterable) { - PyObject *data = NULL; PyObject *tmp; - PySetObject *so = NULL; + register PySetObject *so = NULL; - data = PyDict_New(); - if (data == NULL) - return NULL; + if (dummy == NULL) { /* Auto-initialize dummy */ + dummy = PyString_FromString("<dummy key>"); + if (dummy == NULL) + return NULL; + } /* create PySetObject structure */ so = (PySetObject *)type->tp_alloc(type, 0); - if (so == NULL) { - Py_DECREF(data); + if (so == NULL) return NULL; - } - so->data = data; + + EMPTY_TO_MINSIZE(so); + so->lookup = set_lookkey_string; so->hash = -1; so->weakreflist = NULL; @@ -80,10 +727,19 @@ static PyObject * frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *iterable = NULL; + static PyObject *emptyfrozenset = NULL; if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable)) return NULL; - if (iterable != NULL && PyFrozenSet_CheckExact(iterable)) { + if (iterable == NULL) { + if (type == &PyFrozenSet_Type) { + if (emptyfrozenset == NULL) + emptyfrozenset = make_new_set(type, NULL); + else + Py_INCREF(emptyfrozenset); + return emptyfrozenset; + } + } else if (PyFrozenSet_CheckExact(iterable)) { Py_INCREF(iterable); return iterable; } @@ -96,49 +752,77 @@ set_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return make_new_set(type, NULL); } -static PyObject * -frozenset_dict_wrapper(PyObject *d) -{ - PySetObject *w; - - assert(PyDict_Check(d)); - w = (PySetObject *)make_new_set(&PyFrozenSet_Type, NULL); - if (w == NULL) - return NULL; - Py_CLEAR(w->data); - Py_INCREF(d); - w->data = d; - return (PyObject *)w; -} - static void set_dealloc(PySetObject *so) { + register setentry *ep; + int fill = so->fill; + PyObject_GC_UnTrack(so); + Py_TRASHCAN_SAFE_BEGIN(so) if (so->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) so); - Py_XDECREF(so->data); + + for (ep = so->table; fill > 0; ep++) { + if (ep->key) { + --fill; + Py_DECREF(ep->key); + } + } + if (so->table != so->smalltable) + PyMem_DEL(so->table); + so->ob_type->tp_free(so); + Py_TRASHCAN_SAFE_END(so) } static int set_traverse(PySetObject *so, visitproc visit, void *arg) { - if (so->data) - return visit(so->data, arg); + int i = 0; + PyObject *pk; + + while (set_next_internal(so, &i, &pk)) + Py_VISIT(pk); return 0; } -static PyObject * -set_iter(PySetObject *so) +static int +set_len(PyObject *so) { - return PyObject_GetIter(so->data); + return ((PySetObject *)so)->used; } -static int -set_len(PySetObject *so) +static void +set_swap_bodies(PySetObject *a, PySetObject *b) { - return PyDict_Size(so->data); + int t; + setentry *u; + setentry *(*f)(PySetObject *so, PyObject *key, long hash); + setentry tab[PySet_MINSIZE]; + long h; + + t = a->fill; a->fill = b->fill; b->fill = t; + t = a->used; a->used = b->used; b->used = t; + t = a->mask; a->mask = b->mask; b->mask = t; + + u = a->table; + if (a->table == a->smalltable) + u = b->smalltable; + a->table = b->table; + if (b->table == b->smalltable) + a->table = a->smalltable; + b->table = u; + + f = a->lookup; a->lookup = b->lookup; b->lookup = f; + + if (a->table == a->smalltable || b->table == b->smalltable) { + memcpy(tab, a->smalltable, sizeof(tab)); + memcpy(a->smalltable, b->smalltable, sizeof(tab)); + memcpy(b->smalltable, tab, sizeof(tab)); + } + + h = a->hash; a->hash = b->hash; b->hash = h; } static int @@ -147,13 +831,15 @@ set_contains(PySetObject *so, PyObject *key) PyObject *tmp; int result; - result = PyDict_Contains(so->data, key); + result = set_contains_internal(so, key); if (result == -1 && PyAnySet_Check(key)) { PyErr_Clear(); - tmp = frozenset_dict_wrapper(((PySetObject *)(key))->data); + tmp = make_new_set(&PyFrozenSet_Type, NULL); if (tmp == NULL) return -1; - result = PyDict_Contains(so->data, tmp); + set_swap_bodies((PySetObject *)tmp, (PySetObject *)key); + result = set_contains_internal(so, tmp); + set_swap_bodies((PySetObject *)tmp, (PySetObject *)key); Py_DECREF(tmp); } return result; @@ -244,29 +930,23 @@ static PyObject * set_intersection(PySetObject *so, PyObject *other) { PySetObject *result; - PyObject *item, *selfdata, *tgtdata, *it, *tmp; + PyObject *item, *it, *tmp; result = (PySetObject *)make_new_set(so->ob_type, NULL); if (result == NULL) return NULL; - tgtdata = result->data; - selfdata = so->data; - if (PyAnySet_Check(other)) - other = ((PySetObject *)other)->data; - - if (PyDict_Check(other) && PyDict_Size(other) > PyDict_Size(selfdata)) { - tmp = selfdata; - selfdata = other; + if (PyAnySet_Check(other) && set_len(other) > set_len((PyObject *)so)) { + tmp = (PyObject *)so; + so = (PySetObject *)other; other = tmp; } - if (PyDict_CheckExact(other)) { - PyObject *value; + if (PyAnySet_Check(other)) { int pos = 0; - while (PyDict_Next(other, &pos, &item, &value)) { - if (PyDict_Contains(selfdata, item)) { - if (PyDict_SetItem(tgtdata, item, Py_True) == -1) { + while (set_next_internal((PySetObject *)other, &pos, &item)) { + if (set_contains_internal(so, item)) { + if (set_add_internal(result, item) == -1) { Py_DECREF(result); return NULL; } @@ -282,8 +962,8 @@ set_intersection(PySetObject *so, PyObject *other) } while ((item = PyIter_Next(it)) != NULL) { - if (PyDict_Contains(selfdata, item)) { - if (PyDict_SetItem(tgtdata, item, Py_True) == -1) { + if (set_contains_internal(so, item)) { + if (set_add_internal(result, item) == -1) { Py_DECREF(it); Py_DECREF(result); Py_DECREF(item); @@ -308,37 +988,12 @@ PyDoc_STRVAR(intersection_doc, static PyObject * set_intersection_update(PySetObject *so, PyObject *other) { - PyObject *item, *selfdata, *it, *newdict, *tmp; - - newdict = PyDict_New(); - if (newdict == NULL) - return newdict; - - it = PyObject_GetIter(other); - if (it == NULL) { - Py_DECREF(newdict); - return NULL; - } + PyObject *tmp; - selfdata = so->data; - while ((item = PyIter_Next(it)) != NULL) { - if (PyDict_Contains(selfdata, item)) { - if (PyDict_SetItem(newdict, item, Py_True) == -1) { - Py_DECREF(newdict); - Py_DECREF(it); - Py_DECREF(item); - return NULL; - } - } - Py_DECREF(item); - } - Py_DECREF(it); - if (PyErr_Occurred()) { - Py_DECREF(newdict); + tmp = set_intersection(so, other); + if (tmp == NULL) return NULL; - } - tmp = so->data; - so->data = newdict; + set_swap_bodies(so, (PySetObject *)tmp); Py_DECREF(tmp); Py_RETURN_NONE; } @@ -376,22 +1031,17 @@ set_iand(PySetObject *so, PyObject *other) static PyObject * set_difference_update(PySetObject *so, PyObject *other) { - PyObject *item, *tgtdata, *it; + PyObject *item, *it; it = PyObject_GetIter(other); if (it == NULL) return NULL; - tgtdata = so->data; while ((item = PyIter_Next(it)) != NULL) { - if (PyDict_DelItem(tgtdata, item) == -1) { - if (PyErr_ExceptionMatches(PyExc_KeyError)) - PyErr_Clear(); - else { - Py_DECREF(it); - Py_DECREF(item); - return NULL; - } + if (set_discard_internal(so, item) == -1) { + Py_DECREF(it); + Py_DECREF(item); + return NULL; } Py_DECREF(item); } @@ -407,16 +1057,10 @@ PyDoc_STRVAR(difference_update_doc, static PyObject * set_difference(PySetObject *so, PyObject *other) { - PyObject *result, *tmp; - PyObject *otherdata, *tgtdata; - PyObject *key, *value; + PyObject *tmp, *key, *result; int pos = 0; - if (PyDict_Check(other)) - otherdata = other; - else if (PyAnySet_Check(other)) - otherdata = ((PySetObject *)other)->data; - else { + if (!PyAnySet_Check(other) && !PyDict_Check(other)) { result = set_copy(so); if (result == NULL) return result; @@ -432,11 +1076,20 @@ set_difference(PySetObject *so, PyObject *other) result = make_new_set(so->ob_type, NULL); if (result == NULL) return NULL; - tgtdata = ((PySetObject *)result)->data; - while (PyDict_Next(so->data, &pos, &key, &value)) { - if (!PyDict_Contains(otherdata, key)) { - if (PyDict_SetItem(tgtdata, key, Py_True) == -1) + if (PyDict_Check(other)) { + while (set_next_internal(so, &pos, &key)) { + if (!PyDict_Contains(other, key)) { + if (set_add_internal((PySetObject *)result, key) == -1) + return NULL; + } + } + return result; + } + + while (set_next_internal(so, &pos, &key)) { + if (!set_contains_internal((PySetObject *)other, key)) { + if (set_add_internal((PySetObject *)result, key) == -1) return NULL; } } @@ -477,37 +1130,48 @@ set_isub(PySetObject *so, PyObject *other) static PyObject * set_symmetric_difference_update(PySetObject *so, PyObject *other) { - PyObject *selfdata, *otherdata; - PySetObject *otherset = NULL; - PyObject *key, *value; + PySetObject *otherset; + PyObject *key; int pos = 0; - selfdata = so->data; - if (PyDict_Check(other)) - otherdata = other; - else if (PyAnySet_Check(other)) - otherdata = ((PySetObject *)other)->data; - else { + if (PyDict_Check(other)) { + PyObject *value; + int rv; + while (PyDict_Next(other, &pos, &key, &value)) { + rv = set_discard_internal(so, key); + if (rv == -1) + return NULL; + if (rv == DISCARD_NOTFOUND) { + if (set_add_internal(so, key) == -1) + return NULL; + } + } + Py_RETURN_NONE; + } + + if (PyAnySet_Check(other)) { + Py_INCREF(other); + otherset = (PySetObject *)other; + } else { otherset = (PySetObject *)make_new_set(so->ob_type, other); if (otherset == NULL) return NULL; - otherdata = otherset->data; } - while (PyDict_Next(otherdata, &pos, &key, &value)) { - if (PyDict_Contains(selfdata, key)) { - if (PyDict_DelItem(selfdata, key) == -1) { - Py_XDECREF(otherset); - return NULL; - } - } else { - if (PyDict_SetItem(selfdata, key, Py_True) == -1) { + while (set_next_internal(otherset, &pos, &key)) { + int rv = set_discard_internal(so, key); + if (rv == -1) { + Py_XDECREF(otherset); + return NULL; + } + if (rv == DISCARD_NOTFOUND) { + if (set_add_internal(so, key) == -1) { Py_XDECREF(otherset); return NULL; } } } - Py_XDECREF(otherset); + Py_DECREF(otherset); Py_RETURN_NONE; } @@ -517,52 +1181,17 @@ PyDoc_STRVAR(symmetric_difference_update_doc, static PyObject * set_symmetric_difference(PySetObject *so, PyObject *other) { - PySetObject *result; - PyObject *selfdata, *otherdata, *tgtdata, *rv, *otherset; - PyObject *key, *value; - int pos = 0; - - if (PyDict_Check(other)) - otherdata = other; - else if (PyAnySet_Check(other)) - otherdata = ((PySetObject *)other)->data; - else { - otherset = make_new_set(so->ob_type, other); - if (otherset == NULL) - return NULL; - rv = set_symmetric_difference_update((PySetObject *)otherset, (PyObject *)so); - if (rv == NULL) - return NULL; - Py_DECREF(rv); - return otherset; - } + PyObject *rv; + PySetObject *otherset; - result = (PySetObject *)make_new_set(so->ob_type, NULL); - if (result == NULL) + otherset = (PySetObject *)make_new_set(so->ob_type, other); + if (otherset == NULL) return NULL; - tgtdata = result->data; - selfdata = so->data; - - while (PyDict_Next(otherdata, &pos, &key, &value)) { - if (!PyDict_Contains(selfdata, key)) { - if (PyDict_SetItem(tgtdata, key, Py_True) == -1) { - Py_DECREF(result); - return NULL; - } - } - } - - pos = 0; - while (PyDict_Next(selfdata, &pos, &key, &value)) { - if (!PyDict_Contains(otherdata, key)) { - if (PyDict_SetItem(tgtdata, key, Py_True) == -1) { - Py_DECREF(result); - return NULL; - } - } - } - - return (PyObject *)result; + rv = set_symmetric_difference_update(otherset, (PyObject *)so); + if (rv == NULL) + return NULL; + Py_DECREF(rv); + return (PyObject *)otherset; } PyDoc_STRVAR(symmetric_difference_doc, @@ -600,8 +1229,8 @@ set_ixor(PySetObject *so, PyObject *other) static PyObject * set_issubset(PySetObject *so, PyObject *other) { - PyObject *otherdata, *tmp, *result; - PyObject *key, *value; + PyObject *tmp, *result; + PyObject *key; int pos = 0; if (!PyAnySet_Check(other)) { @@ -612,12 +1241,11 @@ set_issubset(PySetObject *so, PyObject *other) Py_DECREF(tmp); return result; } - if (set_len(so) > set_len((PySetObject *)other)) + if (set_len((PyObject *)so) > set_len(other)) Py_RETURN_FALSE; - otherdata = ((PySetObject *)other)->data; - while (PyDict_Next(((PySetObject *)so)->data, &pos, &key, &value)) { - if (!PyDict_Contains(otherdata, key)) + while (set_next_internal(so, &pos, &key)) { + if (!set_contains_internal((PySetObject *)other, key)) Py_RETURN_FALSE; } Py_RETURN_TRUE; @@ -660,16 +1288,16 @@ set_nocmp(PyObject *self) static long frozenset_hash(PyObject *self) { + PyObject *key; PySetObject *so = (PySetObject *)self; - PyObject *key, *value; int pos = 0; long hash = 1927868237L; if (so->hash != -1) return so->hash; - hash *= (PyDict_Size(so->data) + 1); - while (PyDict_Next(so->data, &pos, &key, &value)) { + hash *= set_len(self) + 1; + while (set_next_internal(so, &pos, &key)) { /* Work to increase the bit dispersion for closely spaced hash values. The is important because some use cases have many combinations of a small number of elements with nearby @@ -688,6 +1316,8 @@ frozenset_hash(PyObject *self) static PyObject * set_richcompare(PySetObject *v, PyObject *w, int op) { + PyObject *r1, *r2; + if(!PyAnySet_Check(w)) { if (op == Py_EQ) Py_RETURN_FALSE; @@ -698,21 +1328,29 @@ set_richcompare(PySetObject *v, PyObject *w, int op) } switch (op) { case Py_EQ: + if (set_len((PyObject *)v) != set_len(w)) + Py_RETURN_FALSE; + return set_issubset(v, w); case Py_NE: - return PyObject_RichCompare(((PySetObject *)v)->data, - ((PySetObject *)w)->data, op); + if (set_len((PyObject *)v) != set_len(w)) + Py_RETURN_TRUE; + r1 = set_issubset(v, w); + assert (r1 != NULL); + r2 = PyBool_FromLong(PyObject_Not(r1)); + Py_DECREF(r1); + return r2; case Py_LE: - return set_issubset((PySetObject *)v, w); + return set_issubset(v, w); case Py_GE: - return set_issuperset((PySetObject *)v, w); + return set_issuperset(v, w); case Py_LT: - if (set_len(v) >= set_len((PySetObject *)w)) + if (set_len((PyObject *)v) >= set_len(w)) Py_RETURN_FALSE; - return set_issubset((PySetObject *)v, w); + return set_issubset(v, w); case Py_GT: - if (set_len(v) <= set_len((PySetObject *)w)) + if (set_len((PyObject *)v) <= set_len(w)) Py_RETURN_FALSE; - return set_issuperset((PySetObject *)v, w); + return set_issuperset(v, w); } Py_INCREF(Py_NotImplemented); return Py_NotImplemented; @@ -723,7 +1361,7 @@ set_repr(PySetObject *so) { PyObject *keys, *result, *listrepr; - keys = PyDict_Keys(so->data); + keys = PySequence_List((PyObject *)so); if (keys == NULL) return NULL; listrepr = PyObject_Repr(keys); @@ -740,13 +1378,13 @@ set_repr(PySetObject *so) static int set_tp_print(PySetObject *so, FILE *fp, int flags) { - PyObject *key, *value; + PyObject *key; int pos=0; char *emit = ""; /* No separator emitted on first pass */ char *separator = ", "; fprintf(fp, "%s([", so->ob_type->tp_name); - while (PyDict_Next(so->data, &pos, &key, &value)) { + while (set_next_internal(so, &pos, &key)) { fputs(emit, fp); emit = separator; if (PyObject_Print(key, fp, 0) != 0) @@ -759,7 +1397,7 @@ set_tp_print(PySetObject *so, FILE *fp, int flags) static PyObject * set_clear(PySetObject *so) { - PyDict_Clear(so->data); + set_clear_internal(so); so->hash = -1; Py_RETURN_NONE; } @@ -769,7 +1407,7 @@ PyDoc_STRVAR(clear_doc, "Remove all elements from this set."); static int set_tp_clear(PySetObject *so) { - PyDict_Clear(so->data); + set_clear_internal(so); so->hash = -1; return 0; } @@ -777,7 +1415,7 @@ set_tp_clear(PySetObject *so) static PyObject * set_add(PySetObject *so, PyObject *item) { - if (PyDict_SetItem(so->data, item, Py_True) == -1) + if (set_add_internal(so, item) == -1) return NULL; Py_RETURN_NONE; } @@ -791,18 +1429,26 @@ static PyObject * set_remove(PySetObject *so, PyObject *item) { PyObject *tmp, *result; + int rv; if (PyType_IsSubtype(item->ob_type, &PySet_Type)) { - tmp = frozenset_dict_wrapper(((PySetObject *)(item))->data); + tmp = make_new_set(&PyFrozenSet_Type, NULL); if (tmp == NULL) return NULL; + set_swap_bodies((PySetObject *)item, (PySetObject *)tmp); result = set_remove(so, tmp); + set_swap_bodies((PySetObject *)item, (PySetObject *)tmp); Py_DECREF(tmp); return result; } - if (PyDict_DelItem(so->data, item) == -1) + rv = set_discard_internal(so, item); + if (rv == -1) return NULL; + else if (rv == DISCARD_NOTFOUND) { + PyErr_SetObject(PyExc_KeyError, item); + return NULL; + } Py_RETURN_NONE; } @@ -817,19 +1463,18 @@ set_discard(PySetObject *so, PyObject *item) PyObject *tmp, *result; if (PyType_IsSubtype(item->ob_type, &PySet_Type)) { - tmp = frozenset_dict_wrapper(((PySetObject *)(item))->data); + tmp = make_new_set(&PyFrozenSet_Type, NULL); if (tmp == NULL) return NULL; + set_swap_bodies((PySetObject *)item, (PySetObject *)tmp); result = set_discard(so, tmp); + set_swap_bodies((PySetObject *)item, (PySetObject *)tmp); Py_DECREF(tmp); return result; } - if (PyDict_DelItem(so->data, item) == -1) { - if (!PyErr_ExceptionMatches(PyExc_KeyError)) - return NULL; - PyErr_Clear(); - } + if (set_discard_internal(so, item) == -1) + return NULL; Py_RETURN_NONE; } @@ -841,16 +1486,23 @@ If the element is not a member, do nothing."); static PyObject * set_pop(PySetObject *so) { - PyObject *key, *value; + PyObject *key; int pos = 0; + int rv; - if (!PyDict_Next(so->data, &pos, &key, &value)) { + if (!set_next_internal(so, &pos, &key)) { PyErr_SetString(PyExc_KeyError, "pop from an empty set"); return NULL; } Py_INCREF(key); - if (PyDict_DelItem(so->data, key) == -1) { + + rv = set_discard_internal(so, key); + if (rv == -1) { + Py_DECREF(key); + return NULL; + } else if (rv == DISCARD_NOTFOUND) { Py_DECREF(key); + PyErr_SetObject(PyExc_KeyError, key); return NULL; } return key; @@ -863,7 +1515,7 @@ set_reduce(PySetObject *so) { PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL; - keys = PyDict_Keys(so->data); + keys = PySequence_List((PyObject *)so); if (keys == NULL) goto done; args = PyTuple_Pack(1, keys); @@ -895,7 +1547,7 @@ set_init(PySetObject *self, PyObject *args, PyObject *kwds) return -1; if (!PyArg_UnpackTuple(args, self->ob_type->tp_name, 0, 1, &iterable)) return -1; - PyDict_Clear(self->data); + set_clear_internal(self); self->hash = -1; if (iterable == NULL) return 0; @@ -1025,13 +1677,13 @@ PyTypeObject PySet_Type = { 0, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_CHECKTYPES | - Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */ + Py_TPFLAGS_BASETYPE, /* tp_flags */ set_doc, /* tp_doc */ (traverseproc)set_traverse, /* tp_traverse */ (inquiry)set_tp_clear, /* tp_clear */ (richcmpfunc)set_richcompare, /* tp_richcompare */ offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */ - (getiterfunc)set_iter, /* tp_iter */ + (getiterfunc)set_iter, /* tp_iter */ 0, /* tp_iternext */ set_methods, /* tp_methods */ 0, /* tp_members */ @@ -1120,7 +1772,7 @@ PyTypeObject PyFrozenSet_Type = { 0, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_CHECKTYPES | - Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */ + Py_TPFLAGS_BASETYPE, /* tp_flags */ frozenset_doc, /* tp_doc */ (traverseproc)set_traverse, /* tp_traverse */ 0, /* tp_clear */ |