diff options
author | Raymond Hettinger <python@rcn.com> | 2015-07-04 00:21:17 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2015-07-04 00:21:17 (GMT) |
commit | 15f08696096a80d026ed21164f892a661fc72e98 (patch) | |
tree | 5c8b25b7c0924225da104a3ca6aa7b96df4d4bd1 /Objects/setobject.c | |
parent | df418b67aba4a31ba9fc8a80372d58b82b768b72 (diff) | |
download | cpython-15f08696096a80d026ed21164f892a661fc72e98.zip cpython-15f08696096a80d026ed21164f892a661fc72e98.tar.gz cpython-15f08696096a80d026ed21164f892a661fc72e98.tar.bz2 |
Move insertion resize logic into set_insert_key().
Simplifies the code a little bit and does the resize check
only when a new key is added (giving a small speed up in
the case where the key already exists).
Fixes possible bug in set_merge() where the set_insert_key()
call relies on a big resize at the start to make enough room
for the keys but is vulnerable to a comparision callback that
could cause the table to shrink in the middle of the merge.
Also, changed the resize threshold from two-thirds of the
mask+1 to just two-thirds. The plus one offset gave no
real benefit (afterall, the two-thirds mark is just a
heuristic and isn't a precise cut-off).
Diffstat (limited to 'Objects/setobject.c')
-rw-r--r-- | Objects/setobject.c | 69 |
1 files changed, 24 insertions, 45 deletions
diff --git a/Objects/setobject.c b/Objects/setobject.c index 09d1129..56084c1 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -125,6 +125,8 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) } } +static int set_table_resize(PySetObject *, Py_ssize_t); + static int set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash) { @@ -139,7 +141,7 @@ set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash) entry = &table[i]; if (entry->key == NULL) - goto found_null_first; + goto found_unused; freeslot = NULL; perturb = hash; @@ -173,7 +175,7 @@ set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash) for (j = 0 ; j < LINEAR_PROBES ; j++) { entry++; if (entry->hash == 0 && entry->key == NULL) - goto found_null; + goto found_unused_or_dummy; if (entry->hash == hash) { PyObject *startkey = entry->key; assert(startkey != dummy); @@ -204,30 +206,27 @@ set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash) entry = &table[i]; if (entry->key == NULL) - goto found_null; + goto found_unused_or_dummy; } - found_null_first: + found_unused_or_dummy: + if (freeslot == NULL) + goto found_unused; Py_INCREF(key); - so->fill++; so->used++; - entry->key = key; - entry->hash = hash; + freeslot->key = key; + freeslot->hash = hash; return 0; - found_null: + found_unused: Py_INCREF(key); - if (freeslot == NULL) { - /* UNUSED */ - so->fill++; - } else { - /* DUMMY */ - entry = freeslot; - } + so->fill++; so->used++; entry->key = key; entry->hash = hash; - return 0; + if ((size_t)so->fill*3 < mask*2) + return 0; + return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4); found_active: return 0; @@ -366,28 +365,15 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) return 0; } -/* CAUTION: set_add_key/entry() must guarantee it won't resize the table */ - static int set_add_entry(PySetObject *so, setentry *entry) { - Py_ssize_t n_used; - PyObject *key = entry->key; - Py_hash_t hash = entry->hash; - - assert(so->fill <= so->mask); /* at least one empty slot */ - n_used = so->used; - if (set_insert_key(so, key, hash)) - return -1; - if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2)) - return 0; - return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4); + return set_insert_key(so, entry->key, entry->hash); } static int set_add_key(PySetObject *so, PyObject *key) { - setentry entry; Py_hash_t hash; if (!PyUnicode_CheckExact(key) || @@ -396,9 +382,7 @@ set_add_key(PySetObject *so, PyObject *key) if (hash == -1) return -1; } - entry.key = key; - entry.hash = hash; - return set_add_entry(so, &entry); + return set_insert_key(so, key, hash); } #define DISCARD_NOTFOUND 0 @@ -631,7 +615,7 @@ set_merge(PySetObject *so, PyObject *otherset) * incrementally resizing as we insert new keys. Expect * that there will be no (or few) overlapping keys. */ - if ((so->fill + other->used)*3 >= (so->mask+1)*2) { + if ((so->fill + other->used)*3 >= so->mask*2) { if (set_table_resize(so, (so->used + other->used)*2) != 0) return -1; } @@ -979,16 +963,12 @@ set_update_internal(PySetObject *so, PyObject *other) */ if (dictsize == -1) return -1; - if ((so->fill + dictsize)*3 >= (so->mask+1)*2) { + if ((so->fill + dictsize)*3 >= so->mask*2) { if (set_table_resize(so, (so->used + dictsize)*2) != 0) return -1; } while (_PyDict_Next(other, &pos, &key, &value, &hash)) { - setentry an_entry; - - an_entry.hash = hash; - an_entry.key = key; - if (set_add_entry(so, &an_entry)) + if (set_insert_key(so, key, hash)) return -1; } return 0; @@ -1583,17 +1563,16 @@ set_difference(PySetObject *so, PyObject *other) if (PyDict_CheckExact(other)) { while (set_next(so, &pos, &entry)) { - setentry entrycopy; + PyObject *key = entry->key; + Py_hash_t hash = entry->hash; int rv; - entrycopy.hash = entry->hash; - entrycopy.key = entry->key; - rv = _PyDict_Contains(other, entry->key, entry->hash); + rv = _PyDict_Contains(other, key, hash); if (rv < 0) { Py_DECREF(result); return NULL; } if (!rv) { - if (set_add_entry((PySetObject *)result, &entrycopy)) { + if (set_insert_key((PySetObject *)result, key, hash)) { Py_DECREF(result); return NULL; } |