diff options
author | Raymond Hettinger <python@rcn.com> | 2005-08-11 07:58:45 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2005-08-11 07:58:45 (GMT) |
commit | c991db240ca8ea838c6acb0c8dc5300dca23c39e (patch) | |
tree | b9b4faf8eb589db00390913a6f6d9af56f5df84a | |
parent | 9f3ae3e69d436f474bf46b802d514013807bca18 (diff) | |
download | cpython-c991db240ca8ea838c6acb0c8dc5300dca23c39e.zip cpython-c991db240ca8ea838c6acb0c8dc5300dca23c39e.tar.gz cpython-c991db240ca8ea838c6acb0c8dc5300dca23c39e.tar.bz2 |
* Add short-circuit code for in-place operations with self (such as
s|=s, s&=s, s-=s, or s^=s). Add related tests.
* Improve names for several variables and functions.
* Provide alternate table access functions (next, contains, add, and discard)
that work with an entry argument instead of just a key. This improves
set-vs-set operations because we already have a hash value for each key
and can avoid unnecessary calls to PyObject_Hash(). Provides a 5% to 20%
speed-up for quick hashing elements like strings and integers. Provides
much more substantial improvements for slow hashing elements like tuples
or objects defining a custom __hash__() function.
* Have difference operations resize() when 1/5 of the elements are dummies.
Formerly, it was 1/6. The new ratio triggers less frequently and only
in cases that it can resize quicker and with greater benefit. The right
answer is probably either 1/4, 1/5, or 1/6. Picked the middle value for
an even trade-off between resize time and the space/time costs of dummy
entries.
-rw-r--r-- | Lib/test/test_set.py | 12 | ||||
-rw-r--r-- | Objects/setobject.c | 242 |
2 files changed, 165 insertions, 89 deletions
diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py index cfb17b0..a21d53c 100644 --- a/Lib/test/test_set.py +++ b/Lib/test/test_set.py @@ -370,6 +370,18 @@ class TestSet(TestJointOps): else: self.assert_(c not in self.s) + def test_inplace_on_self(self): + t = self.s.copy() + t |= t + self.assertEqual(t, self.s) + t &= t + self.assertEqual(t, self.s) + t -= t + self.assertEqual(t, self.thetype()) + t = self.s.copy() + t ^= t + self.assertEqual(t, self.thetype()) + def test_weakref(self): s = self.thetype('gallahad') p = proxy(s) diff --git a/Objects/setobject.c b/Objects/setobject.c index 54b1c28..4fd3672 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1,3 +1,4 @@ + /* set object implementation Written and maintained by Raymond D. Hettinger <python@rcn.com> Derived from Lib/sets.py and Objects/dictobject.c. @@ -226,7 +227,6 @@ set_insert_key(register PySetObject *so, PyObject *key, long hash) typedef setentry *(*lookupfunc)(PySetObject *, PyObject *, long); assert(so->lookup != NULL); - entry = so->lookup(so, key, hash); if (entry->key == NULL) { /* UNUSED */ @@ -336,18 +336,30 @@ set_table_resize(PySetObject *so, int minused) return 0; } -/* CAUTION: set_add_internal() must guarantee that it won't resize the table */ +/* CAUTION: set_add_key/entry() must guarantee it won't resize the table */ + +static int +set_add_entry(register PySetObject *so, setentry *entry) +{ + register int n_used; + + assert(so->fill <= so->mask); /* at least one empty slot */ + n_used = so->used; + Py_INCREF(entry->key); + set_insert_key(so, entry->key, entry->hash); + if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2)) + return 0; + return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4); +} + static int -set_add_internal(register PySetObject *so, PyObject *key) +set_add_key(register PySetObject *so, PyObject *key) { register long hash; register int n_used; - if (PyString_CheckExact(key)) { - hash = ((PyStringObject *)key)->ob_shash; - if (hash == -1) - hash = PyObject_Hash(key); - } else { + if (!PyString_CheckExact(key) || + (hash = ((PyStringObject *) key)->ob_shash) == -1) { hash = PyObject_Hash(key); if (hash == -1) return -1; @@ -365,7 +377,23 @@ set_add_internal(register PySetObject *so, PyObject *key) #define DISCARD_FOUND 1 static int -set_discard_internal(PySetObject *so, PyObject *key) +set_discard_entry(PySetObject *so, setentry *oldentry) +{ register setentry *entry; + PyObject *old_key; + + entry = (so->lookup)(so, oldentry->key, oldentry->hash); + if (entry->key == NULL || entry->key == dummy) + return DISCARD_NOTFOUND; + old_key = entry->key; + Py_INCREF(dummy); + entry->key = dummy; + so->used--; + Py_DECREF(old_key); + return DISCARD_FOUND; +} + +static int +set_discard_key(PySetObject *so, PyObject *key) { register long hash; register setentry *entry; @@ -457,39 +485,39 @@ set_clear_internal(PySetObject *so) * Iterate over a set table. Use like so: * * int pos; - * PyObject *key; + * setentry *entry; * pos = 0; # important! pos should not otherwise be changed by you - * while (set_next_internal(yourset, &pos, &key)) { - * Refer to borrowed reference in key. + * while (set_next(yourset, &pos, &entry)) { + * Refer to borrowed reference in entry->key. * } * - * CAUTION: In general, it isn't safe to use set_next_internal in a loop that + * CAUTION: In general, it isn't safe to use set_next in a loop that * mutates the table. */ static int -set_next_internal(PySetObject *so, int *pos, PyObject **key) +set_next(PySetObject *so, int *pos_ptr, setentry **entry_ptr) { register int i, mask; - register setentry *entry; + register setentry *table; assert (PyAnySet_Check(so)); - i = *pos; + i = *pos_ptr; if (i < 0) return 0; - entry = so->table; + table = so->table; mask = so->mask; - while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy)) + while (i <= mask && (table[i].key == NULL || table[i].key == dummy)) i++; - *pos = i+1; + *pos_ptr = i+1; if (i > mask) return 0; - if (key) - *key = entry[i].key; + if (table[i].key) + *entry_ptr = &table[i]; return 1; } static int -set_merge_internal(PySetObject *so, PyObject *otherset) +set_merge(PySetObject *so, PyObject *otherset) { PySetObject *other; register int i; @@ -525,7 +553,7 @@ set_merge_internal(PySetObject *so, PyObject *otherset) } static int -set_contains_internal(PySetObject *so, PyObject *key) +set_contains_key(PySetObject *so, PyObject *key) { long hash; @@ -539,6 +567,15 @@ set_contains_internal(PySetObject *so, PyObject *key) return key != NULL && key != dummy; } +static int +set_contains_entry(PySetObject *so, setentry *entry) +{ + PyObject *key; + + key = (so->lookup)(so, entry->key, entry->hash)->key; + return key != NULL && key != dummy; +} + /***** Set iterator type ***********************************************/ static PyTypeObject PySetIter_Type; /* Forward */ @@ -667,13 +704,13 @@ set_update_internal(PySetObject *so, PyObject *other) PyObject *key, *it; if (PyAnySet_Check(other)) - return set_merge_internal(so, other); + return set_merge(so, other); if (PyDict_Check(other)) { PyObject *key, *value; int pos = 0; while (PyDict_Next(other, &pos, &key, &value)) { - if (set_add_internal(so, key) == -1) + if (set_add_key(so, key) == -1) return -1; } return 0; @@ -684,7 +721,7 @@ set_update_internal(PySetObject *so, PyObject *other) return -1; while ((key = PyIter_Next(it)) != NULL) { - if (set_add_internal(so, key) == -1) { + if (set_add_key(so, key) == -1) { Py_DECREF(it); Py_DECREF(key); return -1; @@ -833,10 +870,10 @@ static int set_traverse(PySetObject *so, visitproc visit, void *arg) { int pos = 0; - PyObject *key; + setentry *entry; - while (set_next_internal(so, &pos, &key)) - Py_VISIT(key); + while (set_next(so, &pos, &entry)) + Py_VISIT(entry->key); return 0; } @@ -897,14 +934,14 @@ set_contains(PySetObject *so, PyObject *key) PyObject *tmpkey; int result; - result = set_contains_internal(so, key); + result = set_contains_key(so, key); if (result == -1 && PyAnySet_Check(key)) { PyErr_Clear(); tmpkey = make_new_set(&PyFrozenSet_Type, NULL); if (tmpkey == NULL) return -1; set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key); - result = set_contains_internal(so, tmpkey); + result = set_contains_key(so, tmpkey); set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key); Py_DECREF(tmpkey); } @@ -943,6 +980,15 @@ frozenset_copy(PySetObject *so) PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set."); static PyObject * +set_clear(PySetObject *so) +{ + set_clear_internal(so); + Py_RETURN_NONE; +} + +PyDoc_STRVAR(clear_doc, "Remove all elements from this set."); + +static PyObject * set_union(PySetObject *so, PyObject *other) { PySetObject *result; @@ -991,6 +1037,11 @@ set_intersection(PySetObject *so, PyObject *other) PySetObject *result; PyObject *key, *it, *tmp; + if ((PyObject *)so == other) { + Py_INCREF(other); + return other; + } + result = (PySetObject *)make_new_set(so->ob_type, NULL); if (result == NULL) return NULL; @@ -1001,11 +1052,12 @@ set_intersection(PySetObject *so, PyObject *other) other = tmp; } - if (PyAnySet_Check(other)) { + if (PyAnySet_Check(other)) { int pos = 0; - while (set_next_internal((PySetObject *)other, &pos, &key)) { - if (set_contains_internal(so, key)) { - if (set_add_internal(result, key) == -1) { + setentry *entry; + while (set_next((PySetObject *)other, &pos, &entry)) { + if (set_contains_entry(so, entry)) { + if (set_add_entry(result, entry) == -1) { Py_DECREF(result); return NULL; } @@ -1021,8 +1073,8 @@ set_intersection(PySetObject *so, PyObject *other) } while ((key = PyIter_Next(it)) != NULL) { - if (set_contains_internal(so, key)) { - if (set_add_internal(result, key) == -1) { + if (set_contains_key(so, key)) { + if (set_add_key(result, key) == -1) { Py_DECREF(it); Py_DECREF(result); Py_DECREF(key); @@ -1087,32 +1139,48 @@ set_iand(PySetObject *so, PyObject *other) return (PyObject *)so; } -static PyObject * -set_difference_update(PySetObject *so, PyObject *other) +int +set_difference_update_internal(PySetObject *so, PyObject *other) { - PyObject *key, *it; + if ((PyObject *)so == other) + return set_clear_internal(so); - it = PyObject_GetIter(other); - if (it == NULL) - return NULL; + if (PyAnySet_Check(other)) { + setentry *entry; + int pos = 0; - while ((key = PyIter_Next(it)) != NULL) { - if (set_discard_internal(so, key) == -1) { - Py_DECREF(it); + while (set_next((PySetObject *)other, &pos, &entry)) + set_discard_entry(so, entry); + } else { + PyObject *key, *it; + it = PyObject_GetIter(other); + if (it == NULL) + return -1; + + while ((key = PyIter_Next(it)) != NULL) { + if (set_discard_key(so, key) == -1) { + Py_DECREF(it); + Py_DECREF(key); + return -1; + } Py_DECREF(key); - return NULL; } - Py_DECREF(key); + Py_DECREF(it); + if (PyErr_Occurred()) + return -1; } - Py_DECREF(it); - if (PyErr_Occurred()) - return NULL; - /* If more than 1/6 are dummies, then resize them away. */ - if ((so->fill - so->used) * 6 < so->mask) + /* If more than 1/5 are dummies, then resize them away. */ + if ((so->fill - so->used) * 5 < so->mask) + return 0; + return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4); +} + +static PyObject * +set_difference_update(PySetObject *so, PyObject *other) +{ + if (set_difference_update_internal(so, other) != -1) Py_RETURN_NONE; - if (set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4) == -1) - return NULL; - Py_RETURN_NONE; + return NULL; } PyDoc_STRVAR(difference_update_doc, @@ -1121,18 +1189,16 @@ PyDoc_STRVAR(difference_update_doc, static PyObject * set_difference(PySetObject *so, PyObject *other) { - PyObject *tmp, *key, *result; + PyObject *result; + setentry *entry; int pos = 0; if (!PyAnySet_Check(other) && !PyDict_Check(other)) { result = set_copy(so); if (result == NULL) + return NULL; + if (set_difference_update_internal((PySetObject *)result, other) != -1) return result; - tmp = set_difference_update((PySetObject *)result, other); - if (tmp != NULL) { - Py_DECREF(tmp); - return result; - } Py_DECREF(result); return NULL; } @@ -1142,18 +1208,21 @@ set_difference(PySetObject *so, PyObject *other) return NULL; if (PyDict_Check(other)) { - while (set_next_internal(so, &pos, &key)) { - if (!PyDict_Contains(other, key)) { - if (set_add_internal((PySetObject *)result, key) == -1) + while (set_next(so, &pos, &entry)) { + setentry entrycopy; + entrycopy.hash = entry->hash; + entrycopy.key = entry->key; + if (!PyDict_Contains(other, entry->key)) { + if (set_add_entry((PySetObject *)result, &entrycopy) == -1) return NULL; } } return result; } - while (set_next_internal(so, &pos, &key)) { - if (!set_contains_internal((PySetObject *)other, key)) { - if (set_add_internal((PySetObject *)result, key) == -1) + while (set_next(so, &pos, &entry)) { + if (!set_contains_entry((PySetObject *)other, entry)) { + if (set_add_entry((PySetObject *)result, entry) == -1) return NULL; } } @@ -1197,16 +1266,20 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) PySetObject *otherset; PyObject *key; int pos = 0; + setentry *entry; + + if ((PyObject *)so == other) + return set_clear(so); if (PyDict_Check(other)) { PyObject *value; int rv; while (PyDict_Next(other, &pos, &key, &value)) { - rv = set_discard_internal(so, key); + rv = set_discard_key(so, key); if (rv == -1) return NULL; if (rv == DISCARD_NOTFOUND) { - if (set_add_internal(so, key) == -1) + if (set_add_key(so, key) == -1) return NULL; } } @@ -1222,14 +1295,14 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) return NULL; } - while (set_next_internal(otherset, &pos, &key)) { - int rv = set_discard_internal(so, key); + while (set_next(otherset, &pos, &entry)) { + int rv = set_discard_entry(so, entry); if (rv == -1) { Py_XDECREF(otherset); return NULL; } if (rv == DISCARD_NOTFOUND) { - if (set_add_internal(so, key) == -1) { + if (set_add_entry(so, entry) == -1) { Py_XDECREF(otherset); return NULL; } @@ -1312,7 +1385,7 @@ set_issubset(PySetObject *so, PyObject *other) for (i=so->used ; i ; entry++, i--) { while (entry->key == NULL || entry->key==dummy) entry++; - if (!set_contains_internal((PySetObject *)other, entry->key)) + if (!set_contains_entry((PySetObject *)other, entry)) Py_RETURN_FALSE; } Py_RETURN_TRUE; @@ -1448,16 +1521,16 @@ set_repr(PySetObject *so) static int set_tp_print(PySetObject *so, FILE *fp, int flags) { - PyObject *key; + setentry *entry; int pos=0; char *emit = ""; /* No separator emitted on first pass */ char *separator = ", "; fprintf(fp, "%s([", so->ob_type->tp_name); - while (set_next_internal(so, &pos, &key)) { + while (set_next(so, &pos, &entry)) { fputs(emit, fp); emit = separator; - if (PyObject_Print(key, fp, 0) != 0) + if (PyObject_Print(entry->key, fp, 0) != 0) return -1; } fputs("])", fp); @@ -1465,18 +1538,9 @@ set_tp_print(PySetObject *so, FILE *fp, int flags) } static PyObject * -set_clear(PySetObject *so) -{ - set_clear_internal(so); - Py_RETURN_NONE; -} - -PyDoc_STRVAR(clear_doc, "Remove all elements from this set."); - -static PyObject * set_add(PySetObject *so, PyObject *key) { - if (set_add_internal(so, key) == -1) + if (set_add_key(so, key) == -1) return NULL; Py_RETURN_NONE; } @@ -1503,7 +1567,7 @@ set_remove(PySetObject *so, PyObject *key) return result; } - rv = set_discard_internal(so, key); + rv = set_discard_key(so, key); if (rv == -1) return NULL; else if (rv == DISCARD_NOTFOUND) { @@ -1534,7 +1598,7 @@ set_discard(PySetObject *so, PyObject *key) return result; } - if (set_discard_internal(so, key) == -1) + if (set_discard_key(so, key) == -1) return NULL; Py_RETURN_NONE; } |