diff options
author | Raymond Hettinger <python@rcn.com> | 2003-11-23 02:49:05 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2003-11-23 02:49:05 (GMT) |
commit | 49ba4c39c49ff6b1db666e6c90365fc1361c4a64 (patch) | |
tree | 442b73b24c7d6590eb24f4b4d3b1a95465a23c23 | |
parent | baf0f8f24da7a541a403cd9848cebc451beb069d (diff) | |
download | cpython-49ba4c39c49ff6b1db666e6c90365fc1361c4a64.zip cpython-49ba4c39c49ff6b1db666e6c90365fc1361c4a64.tar.gz cpython-49ba4c39c49ff6b1db666e6c90365fc1361c4a64.tar.bz2 |
* Simplify hash function and add test to show effectiveness of the hash
function.
* Add a better test for deepcopying.
* Add tests to show the __init__() function works like it does for list
and tuple. Add related test.
* Have shallow copies of frozensets return self. Add related test.
* Have frozenset(f) return f if f is already a frozenset. Add related test.
* Beefed-up some existing tests.
-rw-r--r-- | Lib/test/test_set.py | 85 | ||||
-rw-r--r-- | Objects/setobject.c | 40 |
2 files changed, 98 insertions, 27 deletions
diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py index 85f87f7..939e490 100644 --- a/Lib/test/test_set.py +++ b/Lib/test/test_set.py @@ -38,15 +38,11 @@ class TestJointOps(unittest.TestCase): s = self.thetype([frozenset(self.letters)]) self.assert_(self.thetype(self.letters) in s) - def test_copy(self): - dup = self.s.copy() - self.assertEqual(self.s, dup) - self.assertNotEqual(id(self.s), id(dup)) - def test_union(self): u = self.s.union(self.otherword) for c in self.letters: self.assertEqual(c in u, c in self.d or c in self.otherword) + self.assertEqual(self.s, self.thetype(self.word)) self.assertEqual(type(u), self.thetype) self.assertRaises(PassThru, self.s.union, check_pass_thru()) self.assertRaises(TypeError, self.s.union, [[]]) @@ -66,6 +62,7 @@ class TestJointOps(unittest.TestCase): i = self.s.intersection(self.otherword) for c in self.letters: self.assertEqual(c in i, c in self.d and c in self.otherword) + self.assertEqual(self.s, self.thetype(self.word)) self.assertEqual(type(i), self.thetype) self.assertRaises(PassThru, self.s.intersection, check_pass_thru()) @@ -84,6 +81,7 @@ class TestJointOps(unittest.TestCase): i = self.s.difference(self.otherword) for c in self.letters: self.assertEqual(c in i, c in self.d and c not in self.otherword) + self.assertEqual(self.s, self.thetype(self.word)) self.assertEqual(type(i), self.thetype) self.assertRaises(PassThru, self.s.difference, check_pass_thru()) self.assertRaises(TypeError, self.s.difference, [[]]) @@ -103,6 +101,7 @@ class TestJointOps(unittest.TestCase): i = self.s.symmetric_difference(self.otherword) for c in self.letters: self.assertEqual(c in i, (c in self.d) ^ (c in self.otherword)) + self.assertEqual(self.s, self.thetype(self.word)) self.assertEqual(type(i), self.thetype) self.assertRaises(PassThru, self.s.symmetric_difference, check_pass_thru()) self.assertRaises(TypeError, self.s.symmetric_difference, [[]]) @@ -155,16 +154,38 @@ class TestJointOps(unittest.TestCase): dup = pickle.loads(p) self.assertEqual(self.s, dup, "%s != %s" % (self.s, dup)) + def test_deepcopy(self): + class Tracer: + def __init__(self, value): + self.value = value + def __hash__(self): + return self.value + def __deepcopy__(self, memo=None): + return Tracer(self.value + 1) + t = Tracer(10) + s = self.thetype([t]) + dup = copy.deepcopy(s) + self.assertNotEqual(id(s), id(dup)) + for elem in dup: + newt = elem + self.assertNotEqual(id(t), id(newt)) + self.assertEqual(t.value + 1, newt.value) + class TestSet(TestJointOps): thetype = set def test_init(self): - s = set() + s = self.thetype() s.__init__(self.word) self.assertEqual(s, set(self.word)) s.__init__(self.otherword) self.assertEqual(s, set(self.otherword)) + def test_constructor_identity(self): + s = self.thetype(range(3)) + t = self.thetype(s) + self.assertNotEqual(id(s), id(t)) + def test_hash(self): self.assertRaises(TypeError, hash, self.s) @@ -172,6 +193,11 @@ class TestSet(TestJointOps): self.s.clear() self.assertEqual(self.s, set([])) + def test_copy(self): + dup = self.s.copy() + self.assertEqual(self.s, dup) + self.assertNotEqual(id(self.s), id(dup)) + def test_add(self): self.s.add('Q') self.assert_('Q' in self.s) @@ -285,17 +311,27 @@ class TestFrozenSet(TestJointOps): thetype = frozenset def test_init(self): - s = frozenset() - s.__init__(self.word) - self.assertEqual(s, frozenset()) + s = self.thetype(self.word) + s.__init__(self.otherword) + self.assertEqual(s, set(self.word)) + + def test_constructor_identity(self): + s = self.thetype(range(3)) + t = self.thetype(s) + self.assertEqual(id(s), id(t)) def test_hash(self): - self.assertEqual(hash(frozenset('abcdeb')), hash(frozenset('ebecda'))) + self.assertEqual(hash(self.thetype('abcdeb')), + hash(self.thetype('ebecda'))) + + def test_copy(self): + dup = self.s.copy() + self.assertEqual(id(self.s), id(dup)) def test_frozen_as_dictkey(self): seq = range(10) + list('abcdefg') + ['apple'] - key1 = frozenset(seq) - key2 = frozenset(reversed(seq)) + key1 = self.thetype(seq) + key2 = self.thetype(reversed(seq)) self.assertEqual(key1, key2) self.assertNotEqual(id(key1), id(key2)) d = {} @@ -303,15 +339,38 @@ class TestFrozenSet(TestJointOps): self.assertEqual(d[key2], 42) def test_hash_caching(self): - f = frozenset('abcdcda') + f = self.thetype('abcdcda') self.assertEqual(hash(f), hash(f)) + def test_hash_effectiveness(self): + n = 13 + rng = range(n) + hashvalues = set() + for i in xrange(2**n): + combination = [j for j in rng if (1<<j)&i] + hashvalues.add(hash(self.thetype(combination))) + self.assert_(len(hashvalues) >= 2**(n-2)) + class FrozenSetSubclass(frozenset): pass class TestFrozenSetSubclass(TestFrozenSet): thetype = FrozenSetSubclass + def test_constructor_identity(self): + s = self.thetype(range(3)) + t = self.thetype(s) + self.assertNotEqual(id(s), id(t)) + + def test_copy(self): + dup = self.s.copy() + self.assertNotEqual(id(self.s), id(dup)) + + def test_nested_empty_constructor(self): + s = self.thetype() + t = self.thetype(s) + self.assertEqual(s, t) + # Tests taken from test_sets.py ============================================= empty_set = set() diff --git a/Objects/setobject.c b/Objects/setobject.c index be73954..fab07fb 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -64,6 +64,10 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable)) return NULL; + if (iterable != NULL && iterable->ob_type == &PyFrozenSet_Type) { + Py_INCREF(iterable); + return iterable; + } return make_new_set(type, iterable); } @@ -154,6 +158,16 @@ set_copy(PySetObject *so) return (PyObject *)newso; } +static PyObject * +frozenset_copy(PySetObject *so) +{ + if (so->ob_type == &PyFrozenSet_Type) { + Py_INCREF(so); + return (PyObject *)so; + } + return set_copy(so); +} + PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set."); static PyObject * @@ -686,7 +700,7 @@ frozenset_hash(PyObject *self) { PyObject *it, *item; PySetObject *so = (PySetObject *)self; - long hash = 0, x; + long hash = 0; if (so->hash != -1) return so->hash; @@ -696,14 +710,12 @@ frozenset_hash(PyObject *self) return -1; while ((item = PyIter_Next(it)) != NULL) { - x = PyObject_Hash(item); - /* Applying x*(x+1) breaks-up linear relationships so that - h(1) ^ h(2) will be less likely to coincide with hash(3). - Multiplying by a large prime increases the dispersion - between consecutive hashes. Adding one bit from the - original restores the one bit lost during the multiply - (all the products are even numbers). */ - hash ^= (x * (x+1) * 3644798167) | (x&1); + /* Multiplying by a large prime increases the bit dispersion for + closely spaced hash values. The is important because some + use cases have many combinations of a small number of + elements with nearby hashes so that many distinct combinations + collapse to only a handful of distinct hash values. */ + hash ^= PyObject_Hash(item) * 3644798167; Py_DECREF(item); } Py_DECREF(it); @@ -1096,17 +1108,17 @@ PyTypeObject PySet_Type = { static PyMethodDef frozenset_methods[] = { - {"copy", (PyCFunction)set_copy, METH_NOARGS, + {"copy", (PyCFunction)frozenset_copy, METH_NOARGS, copy_doc}, - {"__copy__", (PyCFunction)set_copy, METH_NOARGS, + {"__copy__", (PyCFunction)frozenset_copy, METH_NOARGS, copy_doc}, - {"difference",(PyCFunction)set_difference, METH_O, + {"difference", (PyCFunction)set_difference, METH_O, difference_doc}, {"intersection",(PyCFunction)set_intersection, METH_O, intersection_doc}, - {"issubset",(PyCFunction)set_issubset, METH_O, + {"issubset", (PyCFunction)set_issubset, METH_O, issubset_doc}, - {"issuperset",(PyCFunction)set_issuperset, METH_O, + {"issuperset", (PyCFunction)set_issuperset, METH_O, issuperset_doc}, {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS, reduce_doc}, |