diff options
author | Raymond Hettinger <python@rcn.com> | 2005-08-07 13:02:53 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2005-08-07 13:02:53 (GMT) |
commit | bc841a1464b53ddbaa989e4cae97024fbe111abf (patch) | |
tree | 426c7eaff8772527ca64cb47f64bd71de938fd3b | |
parent | e9fe7e0ef33f2a0c8da7079f3e6878c88975a8a1 (diff) | |
download | cpython-bc841a1464b53ddbaa989e4cae97024fbe111abf.zip cpython-bc841a1464b53ddbaa989e4cae97024fbe111abf.tar.gz cpython-bc841a1464b53ddbaa989e4cae97024fbe111abf.tar.bz2 |
* Bring in INIT_NONZERO_SET_SLOTS macro from dictionary code.
* Bring in free list from dictionary code.
* Improve several comments.
* Differencing can leave many dummy entries. If more than
1/6 are dummies, then resize them away.
* Factor-out common code with new macro, PyAnySet_CheckExact.
-rw-r--r-- | Include/setobject.h | 6 | ||||
-rw-r--r-- | Objects/setobject.c | 69 |
2 files changed, 56 insertions, 19 deletions
diff --git a/Include/setobject.h b/Include/setobject.h index f720a82..6a829f0 100644 --- a/Include/setobject.h +++ b/Include/setobject.h @@ -59,12 +59,16 @@ struct _setobject { PyAPI_DATA(PyTypeObject) PySet_Type; PyAPI_DATA(PyTypeObject) PyFrozenSet_Type; -/* Invariants for frozensets only: +/* Invariants for frozensets: * data is immutable. * hash is the hash of the frozenset or -1 if not computed yet. + * Invariants for sets: + * hash is -1 */ #define PyFrozenSet_CheckExact(ob) ((ob)->ob_type == &PyFrozenSet_Type) +#define PyAnySet_CheckExact(ob) \ + ((ob)->ob_type == &PySet_Type || (ob)->ob_type == &PyFrozenSet_Type) #define PyAnySet_Check(ob) \ ((ob)->ob_type == &PySet_Type || (ob)->ob_type == &PyFrozenSet_Type || \ PyType_IsSubtype((ob)->ob_type, &PySet_Type) || \ diff --git a/Objects/setobject.c b/Objects/setobject.c index 771d512..54b1c28 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -15,13 +15,22 @@ /* Object used as dummy key to fill deleted entries */ static PyObject *dummy = NULL; /* Initialized by first call to make_new_set() */ +#define INIT_NONZERO_SET_SLOTS(so) do { \ + (so)->table = (so)->smalltable; \ + (so)->mask = PySet_MINSIZE - 1; \ + (so)->hash = -1; \ + } while(0) + #define EMPTY_TO_MINSIZE(so) do { \ memset((so)->smalltable, 0, sizeof((so)->smalltable)); \ (so)->used = (so)->fill = 0; \ - (so)->table = (so)->smalltable; \ - (so)->mask = PySet_MINSIZE - 1; \ + INIT_NONZERO_SET_SLOTS(so); \ } while(0) +/* Reuse scheme to save calls to malloc, free, and memset */ +#define MAXFREESETS 80 +static PySetObject *free_sets[MAXFREESETS]; +static int num_free_sets = 0; /* The basic lookup function used by all operations. @@ -30,13 +39,15 @@ Open addressing is preferred over chaining since the link overhead for chaining would be substantial (100% with typical malloc overhead). The initial probe index is computed as hash mod the table size. Subsequent -probe indices are computed as explained earlier. +probe indices are computed as explained in Objects/dictobject.c. All arithmetic on hash should ignore overflow. -This function must never return NULL; failures are indicated by returning -a setentry* for which the value field is NULL. Exceptions are never -reported by this function, and outstanding exceptions are maintained. +The lookup function always succeeds and nevers return NULL. This simplifies +and speeds client functions which do won't have to test for and handle +errors. To meet that requirement, any errors generated by a user defined +__cmp__() function are simply cleared and ignored. +Previously outstanding exceptions are maintained. */ static setentry * @@ -187,7 +198,7 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash) freeslot = entry; } } else { - /* Simplified loop that can assume are no dummy entries */ + /* Simplified loop when there are no dummy entries. */ if (entry->hash == hash && _PyString_Eq(entry->key, key)) return entry; for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { @@ -347,7 +358,7 @@ set_add_internal(register PySetObject *so, PyObject *key) set_insert_key(so, key, hash); if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2)) return 0; - return set_table_resize(so, so->used*(so->used>50000 ? 2 : 4)); + return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4); } #define DISCARD_NOTFOUND 0 @@ -439,7 +450,6 @@ set_clear_internal(PySetObject *so) if (table_is_malloced) PyMem_DEL(table); - so->hash = -1; return 0; } @@ -710,16 +720,24 @@ make_new_set(PyTypeObject *type, PyObject *iterable) } /* create PySetObject structure */ - so = (PySetObject *)type->tp_alloc(type, 0); - if (so == NULL) - return NULL; + if (num_free_sets && + (type == &PySet_Type || type == &PyFrozenSet_Type)) { + so = free_sets[--num_free_sets]; + assert (so != NULL && PyAnySet_CheckExact(so)); + so->ob_type = type; + _Py_NewReference((PyObject *)so); + EMPTY_TO_MINSIZE(so); + PyObject_GC_Track(so); + } else { + so = (PySetObject *)type->tp_alloc(type, 0); + if (so == NULL) + return NULL; + /* tp_alloc has already zeroed the structure */ + assert(so->table == NULL && so->fill == 0 && so->used == 0); + INIT_NONZERO_SET_SLOTS(so); + } - /* tp_alloc has already zeroed the structure */ - assert(so->table == NULL && so->fill == 0 && so->used == 0); - so->table = so->smalltable; - so->mask = PySet_MINSIZE - 1; so->lookup = set_lookkey_string; - so->hash = -1; so->weakreflist = NULL; if (iterable != NULL) { @@ -767,6 +785,13 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds) void PySet_Fini(void) { + PySetObject *so; + + while (num_free_sets) { + num_free_sets--; + so = free_sets[num_free_sets]; + PyObject_GC_Del(so); + } Py_XDECREF(dummy); Py_XDECREF(emptyfrozenset); } @@ -797,7 +822,10 @@ set_dealloc(PySetObject *so) if (so->table != so->smalltable) PyMem_DEL(so->table); - so->ob_type->tp_free(so); + if (num_free_sets < MAXFREESETS && PyAnySet_CheckExact(so)) + free_sets[num_free_sets++] = so; + else + so->ob_type->tp_free(so); Py_TRASHCAN_SAFE_END(so) } @@ -1079,6 +1107,11 @@ set_difference_update(PySetObject *so, PyObject *other) Py_DECREF(it); if (PyErr_Occurred()) return NULL; + /* If more than 1/6 are dummies, then resize them away. */ + if ((so->fill - so->used) * 6 < so->mask) + Py_RETURN_NONE; + if (set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4) == -1) + return NULL; Py_RETURN_NONE; } |