summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2003-11-23 02:49:05 (GMT)
committerRaymond Hettinger <python@rcn.com>2003-11-23 02:49:05 (GMT)
commit49ba4c39c49ff6b1db666e6c90365fc1361c4a64 (patch)
tree442b73b24c7d6590eb24f4b4d3b1a95465a23c23
parentbaf0f8f24da7a541a403cd9848cebc451beb069d (diff)
downloadcpython-49ba4c39c49ff6b1db666e6c90365fc1361c4a64.zip
cpython-49ba4c39c49ff6b1db666e6c90365fc1361c4a64.tar.gz
cpython-49ba4c39c49ff6b1db666e6c90365fc1361c4a64.tar.bz2
* Simplify hash function and add test to show effectiveness of the hash
function. * Add a better test for deepcopying. * Add tests to show the __init__() function works like it does for list and tuple. Add related test. * Have shallow copies of frozensets return self. Add related test. * Have frozenset(f) return f if f is already a frozenset. Add related test. * Beefed-up some existing tests.
-rw-r--r--Lib/test/test_set.py85
-rw-r--r--Objects/setobject.c40
2 files changed, 98 insertions, 27 deletions
diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py
index 85f87f7..939e490 100644
--- a/Lib/test/test_set.py
+++ b/Lib/test/test_set.py
@@ -38,15 +38,11 @@ class TestJointOps(unittest.TestCase):
s = self.thetype([frozenset(self.letters)])
self.assert_(self.thetype(self.letters) in s)
- def test_copy(self):
- dup = self.s.copy()
- self.assertEqual(self.s, dup)
- self.assertNotEqual(id(self.s), id(dup))
-
def test_union(self):
u = self.s.union(self.otherword)
for c in self.letters:
self.assertEqual(c in u, c in self.d or c in self.otherword)
+ self.assertEqual(self.s, self.thetype(self.word))
self.assertEqual(type(u), self.thetype)
self.assertRaises(PassThru, self.s.union, check_pass_thru())
self.assertRaises(TypeError, self.s.union, [[]])
@@ -66,6 +62,7 @@ class TestJointOps(unittest.TestCase):
i = self.s.intersection(self.otherword)
for c in self.letters:
self.assertEqual(c in i, c in self.d and c in self.otherword)
+ self.assertEqual(self.s, self.thetype(self.word))
self.assertEqual(type(i), self.thetype)
self.assertRaises(PassThru, self.s.intersection, check_pass_thru())
@@ -84,6 +81,7 @@ class TestJointOps(unittest.TestCase):
i = self.s.difference(self.otherword)
for c in self.letters:
self.assertEqual(c in i, c in self.d and c not in self.otherword)
+ self.assertEqual(self.s, self.thetype(self.word))
self.assertEqual(type(i), self.thetype)
self.assertRaises(PassThru, self.s.difference, check_pass_thru())
self.assertRaises(TypeError, self.s.difference, [[]])
@@ -103,6 +101,7 @@ class TestJointOps(unittest.TestCase):
i = self.s.symmetric_difference(self.otherword)
for c in self.letters:
self.assertEqual(c in i, (c in self.d) ^ (c in self.otherword))
+ self.assertEqual(self.s, self.thetype(self.word))
self.assertEqual(type(i), self.thetype)
self.assertRaises(PassThru, self.s.symmetric_difference, check_pass_thru())
self.assertRaises(TypeError, self.s.symmetric_difference, [[]])
@@ -155,16 +154,38 @@ class TestJointOps(unittest.TestCase):
dup = pickle.loads(p)
self.assertEqual(self.s, dup, "%s != %s" % (self.s, dup))
+ def test_deepcopy(self):
+ class Tracer:
+ def __init__(self, value):
+ self.value = value
+ def __hash__(self):
+ return self.value
+ def __deepcopy__(self, memo=None):
+ return Tracer(self.value + 1)
+ t = Tracer(10)
+ s = self.thetype([t])
+ dup = copy.deepcopy(s)
+ self.assertNotEqual(id(s), id(dup))
+ for elem in dup:
+ newt = elem
+ self.assertNotEqual(id(t), id(newt))
+ self.assertEqual(t.value + 1, newt.value)
+
class TestSet(TestJointOps):
thetype = set
def test_init(self):
- s = set()
+ s = self.thetype()
s.__init__(self.word)
self.assertEqual(s, set(self.word))
s.__init__(self.otherword)
self.assertEqual(s, set(self.otherword))
+ def test_constructor_identity(self):
+ s = self.thetype(range(3))
+ t = self.thetype(s)
+ self.assertNotEqual(id(s), id(t))
+
def test_hash(self):
self.assertRaises(TypeError, hash, self.s)
@@ -172,6 +193,11 @@ class TestSet(TestJointOps):
self.s.clear()
self.assertEqual(self.s, set([]))
+ def test_copy(self):
+ dup = self.s.copy()
+ self.assertEqual(self.s, dup)
+ self.assertNotEqual(id(self.s), id(dup))
+
def test_add(self):
self.s.add('Q')
self.assert_('Q' in self.s)
@@ -285,17 +311,27 @@ class TestFrozenSet(TestJointOps):
thetype = frozenset
def test_init(self):
- s = frozenset()
- s.__init__(self.word)
- self.assertEqual(s, frozenset())
+ s = self.thetype(self.word)
+ s.__init__(self.otherword)
+ self.assertEqual(s, set(self.word))
+
+ def test_constructor_identity(self):
+ s = self.thetype(range(3))
+ t = self.thetype(s)
+ self.assertEqual(id(s), id(t))
def test_hash(self):
- self.assertEqual(hash(frozenset('abcdeb')), hash(frozenset('ebecda')))
+ self.assertEqual(hash(self.thetype('abcdeb')),
+ hash(self.thetype('ebecda')))
+
+ def test_copy(self):
+ dup = self.s.copy()
+ self.assertEqual(id(self.s), id(dup))
def test_frozen_as_dictkey(self):
seq = range(10) + list('abcdefg') + ['apple']
- key1 = frozenset(seq)
- key2 = frozenset(reversed(seq))
+ key1 = self.thetype(seq)
+ key2 = self.thetype(reversed(seq))
self.assertEqual(key1, key2)
self.assertNotEqual(id(key1), id(key2))
d = {}
@@ -303,15 +339,38 @@ class TestFrozenSet(TestJointOps):
self.assertEqual(d[key2], 42)
def test_hash_caching(self):
- f = frozenset('abcdcda')
+ f = self.thetype('abcdcda')
self.assertEqual(hash(f), hash(f))
+ def test_hash_effectiveness(self):
+ n = 13
+ rng = range(n)
+ hashvalues = set()
+ for i in xrange(2**n):
+ combination = [j for j in rng if (1<<j)&i]
+ hashvalues.add(hash(self.thetype(combination)))
+ self.assert_(len(hashvalues) >= 2**(n-2))
+
class FrozenSetSubclass(frozenset):
pass
class TestFrozenSetSubclass(TestFrozenSet):
thetype = FrozenSetSubclass
+ def test_constructor_identity(self):
+ s = self.thetype(range(3))
+ t = self.thetype(s)
+ self.assertNotEqual(id(s), id(t))
+
+ def test_copy(self):
+ dup = self.s.copy()
+ self.assertNotEqual(id(self.s), id(dup))
+
+ def test_nested_empty_constructor(self):
+ s = self.thetype()
+ t = self.thetype(s)
+ self.assertEqual(s, t)
+
# Tests taken from test_sets.py =============================================
empty_set = set()
diff --git a/Objects/setobject.c b/Objects/setobject.c
index be73954..fab07fb 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -64,6 +64,10 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
return NULL;
+ if (iterable != NULL && iterable->ob_type == &PyFrozenSet_Type) {
+ Py_INCREF(iterable);
+ return iterable;
+ }
return make_new_set(type, iterable);
}
@@ -154,6 +158,16 @@ set_copy(PySetObject *so)
return (PyObject *)newso;
}
+static PyObject *
+frozenset_copy(PySetObject *so)
+{
+ if (so->ob_type == &PyFrozenSet_Type) {
+ Py_INCREF(so);
+ return (PyObject *)so;
+ }
+ return set_copy(so);
+}
+
PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
static PyObject *
@@ -686,7 +700,7 @@ frozenset_hash(PyObject *self)
{
PyObject *it, *item;
PySetObject *so = (PySetObject *)self;
- long hash = 0, x;
+ long hash = 0;
if (so->hash != -1)
return so->hash;
@@ -696,14 +710,12 @@ frozenset_hash(PyObject *self)
return -1;
while ((item = PyIter_Next(it)) != NULL) {
- x = PyObject_Hash(item);
- /* Applying x*(x+1) breaks-up linear relationships so that
- h(1) ^ h(2) will be less likely to coincide with hash(3).
- Multiplying by a large prime increases the dispersion
- between consecutive hashes. Adding one bit from the
- original restores the one bit lost during the multiply
- (all the products are even numbers). */
- hash ^= (x * (x+1) * 3644798167) | (x&1);
+ /* Multiplying by a large prime increases the bit dispersion for
+ closely spaced hash values. The is important because some
+ use cases have many combinations of a small number of
+ elements with nearby hashes so that many distinct combinations
+ collapse to only a handful of distinct hash values. */
+ hash ^= PyObject_Hash(item) * 3644798167;
Py_DECREF(item);
}
Py_DECREF(it);
@@ -1096,17 +1108,17 @@ PyTypeObject PySet_Type = {
static PyMethodDef frozenset_methods[] = {
- {"copy", (PyCFunction)set_copy, METH_NOARGS,
+ {"copy", (PyCFunction)frozenset_copy, METH_NOARGS,
copy_doc},
- {"__copy__", (PyCFunction)set_copy, METH_NOARGS,
+ {"__copy__", (PyCFunction)frozenset_copy, METH_NOARGS,
copy_doc},
- {"difference",(PyCFunction)set_difference, METH_O,
+ {"difference", (PyCFunction)set_difference, METH_O,
difference_doc},
{"intersection",(PyCFunction)set_intersection, METH_O,
intersection_doc},
- {"issubset",(PyCFunction)set_issubset, METH_O,
+ {"issubset", (PyCFunction)set_issubset, METH_O,
issubset_doc},
- {"issuperset",(PyCFunction)set_issuperset, METH_O,
+ {"issuperset", (PyCFunction)set_issuperset, METH_O,
issuperset_doc},
{"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
reduce_doc},