summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2003-11-20 22:54:33 (GMT)
committerRaymond Hettinger <python@rcn.com>2003-11-20 22:54:33 (GMT)
commit82d73dd459d911dcb12a48db12b7f8b3eb0f6b1b (patch)
treeb217ea12af549fcf9c1ab085884797a22d77331f /Objects
parentdff9dbdb38b38a94ac14eefc16e17437b118dfd6 (diff)
downloadcpython-82d73dd459d911dcb12a48db12b7f8b3eb0f6b1b.zip
cpython-82d73dd459d911dcb12a48db12b7f8b3eb0f6b1b.tar.gz
cpython-82d73dd459d911dcb12a48db12b7f8b3eb0f6b1b.tar.bz2
Three minor performance improvements:
* Improve the hash function to increase the chance that distinct sets will have distinct xor'd hash totals. * Use PyDict_Merge where possible (it is faster than an equivalent iter/set pair). * Don't rebuild dictionaries where the input already has one.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/setobject.c53
1 files changed, 41 insertions, 12 deletions
diff --git a/Objects/setobject.c b/Objects/setobject.c
index 2a53cba..d7190b5 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -138,6 +138,15 @@ set_union(PySetObject *so, PyObject *other)
result = (PySetObject *)set_copy(so);
if (result == NULL)
return NULL;
+
+ if (PyAnySet_Check(other)) {
+ if (PyDict_Merge(result->data, ((PySetObject *)other)->data, 0) == -1) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ return (PyObject *)result;
+ }
+
it = PyObject_GetIter(other);
if (it == NULL) {
Py_DECREF(result);
@@ -171,6 +180,13 @@ set_union_update(PySetObject *so, PyObject *other)
{
PyObject *item, *data, *it;
+ if (PyAnySet_Check(other)) {
+ if (PyDict_Merge(so->data, ((PySetObject *)other)->data, 0) == -1)
+ return NULL;
+ Py_INCREF(so);
+ return (PyObject *)so;
+ }
+
it = PyObject_GetIter(other);
if (it == NULL)
return NULL;
@@ -434,7 +450,7 @@ set_isub(PySetObject *so, PyObject *other)
static PyObject *
set_symmetric_difference(PySetObject *so, PyObject *other)
{
- PySetObject *result, *otherset;
+ PySetObject *result, *otherset=NULL;
PyObject *item, *selfdata, *otherdata, *tgtdata, *it;
selfdata = so->data;
@@ -444,16 +460,22 @@ set_symmetric_difference(PySetObject *so, PyObject *other)
return NULL;
tgtdata = result->data;
- otherset = (PySetObject *)make_new_set(so->ob_type, other);
- if (otherset == NULL) {
- Py_DECREF(result);
- return NULL;
- }
- otherdata = otherset->data;
+ if (PyDict_Check(other))
+ otherdata = other;
+ else if (PyAnySet_Check(other))
+ otherdata = ((PySetObject *)other)->data;
+ else {
+ otherset = (PySetObject *)make_new_set(so->ob_type, other);
+ if (otherset == NULL) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ otherdata = otherset->data;
+ }
it = PyObject_GetIter(otherdata);
if (it == NULL) {
- Py_DECREF(otherset);
+ Py_XDECREF(otherset);
Py_DECREF(result);
return NULL;
}
@@ -463,7 +485,7 @@ set_symmetric_difference(PySetObject *so, PyObject *other)
PyErr_Clear();
if (PyDict_SetItem(tgtdata, item, Py_True) == -1) {
Py_DECREF(it);
- Py_DECREF(otherset);
+ Py_XDECREF(otherset);
Py_DECREF(result);
Py_DECREF(item);
return NULL;
@@ -472,7 +494,7 @@ set_symmetric_difference(PySetObject *so, PyObject *other)
Py_DECREF(item);
}
Py_DECREF(it);
- Py_DECREF(otherset);
+ Py_XDECREF(otherset);
if (PyErr_Occurred()) {
Py_DECREF(result);
return NULL;
@@ -627,7 +649,7 @@ frozenset_hash(PyObject *self)
{
PyObject *it, *item;
PySetObject *so = (PySetObject *)self;
- long hash = 0;
+ long hash = 0, x;
if (so->hash != -1)
return so->hash;
@@ -637,7 +659,14 @@ frozenset_hash(PyObject *self)
return -1;
while ((item = PyIter_Next(it)) != NULL) {
- hash ^= PyObject_Hash(item);
+ x = PyObject_Hash(item);
+ /* Applying x*(x+1) breaks-up linear relationships so that
+ h(1) ^ h(2) will be less likely to coincide with hash(3).
+ Multiplying by a large prime increases the dispersion
+ between consecutive hashes. Adding one bit from the
+ original restores the one bit lost during the multiply
+ (all the products are even numbers). */
+ hash ^= (x * (x+1) * 3644798167) | (x&1);
Py_DECREF(item);
}
Py_DECREF(it);