summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2005-08-07 13:02:53 (GMT)
committerRaymond Hettinger <python@rcn.com>2005-08-07 13:02:53 (GMT)
commitbc841a1464b53ddbaa989e4cae97024fbe111abf (patch)
tree426c7eaff8772527ca64cb47f64bd71de938fd3b
parente9fe7e0ef33f2a0c8da7079f3e6878c88975a8a1 (diff)
downloadcpython-bc841a1464b53ddbaa989e4cae97024fbe111abf.zip
cpython-bc841a1464b53ddbaa989e4cae97024fbe111abf.tar.gz
cpython-bc841a1464b53ddbaa989e4cae97024fbe111abf.tar.bz2
* Bring in INIT_NONZERO_SET_SLOTS macro from dictionary code.
* Bring in free list from dictionary code. * Improve several comments. * Differencing can leave many dummy entries. If more than 1/6 are dummies, then resize them away. * Factor-out common code with new macro, PyAnySet_CheckExact.
-rw-r--r--Include/setobject.h6
-rw-r--r--Objects/setobject.c69
2 files changed, 56 insertions, 19 deletions
diff --git a/Include/setobject.h b/Include/setobject.h
index f720a82..6a829f0 100644
--- a/Include/setobject.h
+++ b/Include/setobject.h
@@ -59,12 +59,16 @@ struct _setobject {
PyAPI_DATA(PyTypeObject) PySet_Type;
PyAPI_DATA(PyTypeObject) PyFrozenSet_Type;
-/* Invariants for frozensets only:
+/* Invariants for frozensets:
* data is immutable.
* hash is the hash of the frozenset or -1 if not computed yet.
+ * Invariants for sets:
+ * hash is -1
*/
#define PyFrozenSet_CheckExact(ob) ((ob)->ob_type == &PyFrozenSet_Type)
+#define PyAnySet_CheckExact(ob) \
+ ((ob)->ob_type == &PySet_Type || (ob)->ob_type == &PyFrozenSet_Type)
#define PyAnySet_Check(ob) \
((ob)->ob_type == &PySet_Type || (ob)->ob_type == &PyFrozenSet_Type || \
PyType_IsSubtype((ob)->ob_type, &PySet_Type) || \
diff --git a/Objects/setobject.c b/Objects/setobject.c
index 771d512..54b1c28 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -15,13 +15,22 @@
/* Object used as dummy key to fill deleted entries */
static PyObject *dummy = NULL; /* Initialized by first call to make_new_set() */
+#define INIT_NONZERO_SET_SLOTS(so) do { \
+ (so)->table = (so)->smalltable; \
+ (so)->mask = PySet_MINSIZE - 1; \
+ (so)->hash = -1; \
+ } while(0)
+
#define EMPTY_TO_MINSIZE(so) do { \
memset((so)->smalltable, 0, sizeof((so)->smalltable)); \
(so)->used = (so)->fill = 0; \
- (so)->table = (so)->smalltable; \
- (so)->mask = PySet_MINSIZE - 1; \
+ INIT_NONZERO_SET_SLOTS(so); \
} while(0)
+/* Reuse scheme to save calls to malloc, free, and memset */
+#define MAXFREESETS 80
+static PySetObject *free_sets[MAXFREESETS];
+static int num_free_sets = 0;
/*
The basic lookup function used by all operations.
@@ -30,13 +39,15 @@ Open addressing is preferred over chaining since the link overhead for
chaining would be substantial (100% with typical malloc overhead).
The initial probe index is computed as hash mod the table size. Subsequent
-probe indices are computed as explained earlier.
+probe indices are computed as explained in Objects/dictobject.c.
All arithmetic on hash should ignore overflow.
-This function must never return NULL; failures are indicated by returning
-a setentry* for which the value field is NULL. Exceptions are never
-reported by this function, and outstanding exceptions are maintained.
+The lookup function always succeeds and nevers return NULL. This simplifies
+and speeds client functions which do won't have to test for and handle
+errors. To meet that requirement, any errors generated by a user defined
+__cmp__() function are simply cleared and ignored.
+Previously outstanding exceptions are maintained.
*/
static setentry *
@@ -187,7 +198,7 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
freeslot = entry;
}
} else {
- /* Simplified loop that can assume are no dummy entries */
+ /* Simplified loop when there are no dummy entries. */
if (entry->hash == hash && _PyString_Eq(entry->key, key))
return entry;
for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
@@ -347,7 +358,7 @@ set_add_internal(register PySetObject *so, PyObject *key)
set_insert_key(so, key, hash);
if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
return 0;
- return set_table_resize(so, so->used*(so->used>50000 ? 2 : 4));
+ return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
}
#define DISCARD_NOTFOUND 0
@@ -439,7 +450,6 @@ set_clear_internal(PySetObject *so)
if (table_is_malloced)
PyMem_DEL(table);
- so->hash = -1;
return 0;
}
@@ -710,16 +720,24 @@ make_new_set(PyTypeObject *type, PyObject *iterable)
}
/* create PySetObject structure */
- so = (PySetObject *)type->tp_alloc(type, 0);
- if (so == NULL)
- return NULL;
+ if (num_free_sets &&
+ (type == &PySet_Type || type == &PyFrozenSet_Type)) {
+ so = free_sets[--num_free_sets];
+ assert (so != NULL && PyAnySet_CheckExact(so));
+ so->ob_type = type;
+ _Py_NewReference((PyObject *)so);
+ EMPTY_TO_MINSIZE(so);
+ PyObject_GC_Track(so);
+ } else {
+ so = (PySetObject *)type->tp_alloc(type, 0);
+ if (so == NULL)
+ return NULL;
+ /* tp_alloc has already zeroed the structure */
+ assert(so->table == NULL && so->fill == 0 && so->used == 0);
+ INIT_NONZERO_SET_SLOTS(so);
+ }
- /* tp_alloc has already zeroed the structure */
- assert(so->table == NULL && so->fill == 0 && so->used == 0);
- so->table = so->smalltable;
- so->mask = PySet_MINSIZE - 1;
so->lookup = set_lookkey_string;
- so->hash = -1;
so->weakreflist = NULL;
if (iterable != NULL) {
@@ -767,6 +785,13 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
void
PySet_Fini(void)
{
+ PySetObject *so;
+
+ while (num_free_sets) {
+ num_free_sets--;
+ so = free_sets[num_free_sets];
+ PyObject_GC_Del(so);
+ }
Py_XDECREF(dummy);
Py_XDECREF(emptyfrozenset);
}
@@ -797,7 +822,10 @@ set_dealloc(PySetObject *so)
if (so->table != so->smalltable)
PyMem_DEL(so->table);
- so->ob_type->tp_free(so);
+ if (num_free_sets < MAXFREESETS && PyAnySet_CheckExact(so))
+ free_sets[num_free_sets++] = so;
+ else
+ so->ob_type->tp_free(so);
Py_TRASHCAN_SAFE_END(so)
}
@@ -1079,6 +1107,11 @@ set_difference_update(PySetObject *so, PyObject *other)
Py_DECREF(it);
if (PyErr_Occurred())
return NULL;
+ /* If more than 1/6 are dummies, then resize them away. */
+ if ((so->fill - so->used) * 6 < so->mask)
+ Py_RETURN_NONE;
+ if (set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4) == -1)
+ return NULL;
Py_RETURN_NONE;
}