summaryrefslogtreecommitdiffstats
path: root/Objects/setobject.c
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2015-07-04 00:21:17 (GMT)
committerRaymond Hettinger <python@rcn.com>2015-07-04 00:21:17 (GMT)
commit15f08696096a80d026ed21164f892a661fc72e98 (patch)
tree5c8b25b7c0924225da104a3ca6aa7b96df4d4bd1 /Objects/setobject.c
parentdf418b67aba4a31ba9fc8a80372d58b82b768b72 (diff)
downloadcpython-15f08696096a80d026ed21164f892a661fc72e98.zip
cpython-15f08696096a80d026ed21164f892a661fc72e98.tar.gz
cpython-15f08696096a80d026ed21164f892a661fc72e98.tar.bz2
Move insertion resize logic into set_insert_key().
Simplifies the code a little bit and does the resize check only when a new key is added (giving a small speed up in the case where the key already exists). Fixes possible bug in set_merge() where the set_insert_key() call relies on a big resize at the start to make enough room for the keys but is vulnerable to a comparision callback that could cause the table to shrink in the middle of the merge. Also, changed the resize threshold from two-thirds of the mask+1 to just two-thirds. The plus one offset gave no real benefit (afterall, the two-thirds mark is just a heuristic and isn't a precise cut-off).
Diffstat (limited to 'Objects/setobject.c')
-rw-r--r--Objects/setobject.c69
1 files changed, 24 insertions, 45 deletions
diff --git a/Objects/setobject.c b/Objects/setobject.c
index 09d1129..56084c1 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -125,6 +125,8 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
}
}
+static int set_table_resize(PySetObject *, Py_ssize_t);
+
static int
set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
{
@@ -139,7 +141,7 @@ set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
entry = &table[i];
if (entry->key == NULL)
- goto found_null_first;
+ goto found_unused;
freeslot = NULL;
perturb = hash;
@@ -173,7 +175,7 @@ set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
for (j = 0 ; j < LINEAR_PROBES ; j++) {
entry++;
if (entry->hash == 0 && entry->key == NULL)
- goto found_null;
+ goto found_unused_or_dummy;
if (entry->hash == hash) {
PyObject *startkey = entry->key;
assert(startkey != dummy);
@@ -204,30 +206,27 @@ set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
entry = &table[i];
if (entry->key == NULL)
- goto found_null;
+ goto found_unused_or_dummy;
}
- found_null_first:
+ found_unused_or_dummy:
+ if (freeslot == NULL)
+ goto found_unused;
Py_INCREF(key);
- so->fill++;
so->used++;
- entry->key = key;
- entry->hash = hash;
+ freeslot->key = key;
+ freeslot->hash = hash;
return 0;
- found_null:
+ found_unused:
Py_INCREF(key);
- if (freeslot == NULL) {
- /* UNUSED */
- so->fill++;
- } else {
- /* DUMMY */
- entry = freeslot;
- }
+ so->fill++;
so->used++;
entry->key = key;
entry->hash = hash;
- return 0;
+ if ((size_t)so->fill*3 < mask*2)
+ return 0;
+ return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
found_active:
return 0;
@@ -366,28 +365,15 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
return 0;
}
-/* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
-
static int
set_add_entry(PySetObject *so, setentry *entry)
{
- Py_ssize_t n_used;
- PyObject *key = entry->key;
- Py_hash_t hash = entry->hash;
-
- assert(so->fill <= so->mask); /* at least one empty slot */
- n_used = so->used;
- if (set_insert_key(so, key, hash))
- return -1;
- if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
- return 0;
- return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
+ return set_insert_key(so, entry->key, entry->hash);
}
static int
set_add_key(PySetObject *so, PyObject *key)
{
- setentry entry;
Py_hash_t hash;
if (!PyUnicode_CheckExact(key) ||
@@ -396,9 +382,7 @@ set_add_key(PySetObject *so, PyObject *key)
if (hash == -1)
return -1;
}
- entry.key = key;
- entry.hash = hash;
- return set_add_entry(so, &entry);
+ return set_insert_key(so, key, hash);
}
#define DISCARD_NOTFOUND 0
@@ -631,7 +615,7 @@ set_merge(PySetObject *so, PyObject *otherset)
* incrementally resizing as we insert new keys. Expect
* that there will be no (or few) overlapping keys.
*/
- if ((so->fill + other->used)*3 >= (so->mask+1)*2) {
+ if ((so->fill + other->used)*3 >= so->mask*2) {
if (set_table_resize(so, (so->used + other->used)*2) != 0)
return -1;
}
@@ -979,16 +963,12 @@ set_update_internal(PySetObject *so, PyObject *other)
*/
if (dictsize == -1)
return -1;
- if ((so->fill + dictsize)*3 >= (so->mask+1)*2) {
+ if ((so->fill + dictsize)*3 >= so->mask*2) {
if (set_table_resize(so, (so->used + dictsize)*2) != 0)
return -1;
}
while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
- setentry an_entry;
-
- an_entry.hash = hash;
- an_entry.key = key;
- if (set_add_entry(so, &an_entry))
+ if (set_insert_key(so, key, hash))
return -1;
}
return 0;
@@ -1583,17 +1563,16 @@ set_difference(PySetObject *so, PyObject *other)
if (PyDict_CheckExact(other)) {
while (set_next(so, &pos, &entry)) {
- setentry entrycopy;
+ PyObject *key = entry->key;
+ Py_hash_t hash = entry->hash;
int rv;
- entrycopy.hash = entry->hash;
- entrycopy.key = entry->key;
- rv = _PyDict_Contains(other, entry->key, entry->hash);
+ rv = _PyDict_Contains(other, key, hash);
if (rv < 0) {
Py_DECREF(result);
return NULL;
}
if (!rv) {
- if (set_add_entry((PySetObject *)result, &entrycopy)) {
+ if (set_insert_key((PySetObject *)result, key, hash)) {
Py_DECREF(result);
return NULL;
}