diff options
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/abstract.c | 2 | ||||
-rw-r--r-- | Objects/dictnotes.txt | 20 | ||||
-rw-r--r-- | Objects/dictobject.c | 41 | ||||
-rw-r--r-- | Objects/enumobject.c | 6 | ||||
-rw-r--r-- | Objects/setobject.c | 43 | ||||
-rw-r--r-- | Objects/typeobject.c | 8 |
6 files changed, 104 insertions, 16 deletions
diff --git a/Objects/abstract.c b/Objects/abstract.c index 4e5838f..7016c2e 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -980,6 +980,8 @@ PyNumber_Float(PyObject *o) int PySequence_Check(PyObject *s) { + if (PyObject_IsInstance(s, (PyObject *)&PyDict_Type)) + return 0; return s != NULL && s->ob_type->tp_as_sequence && s->ob_type->tp_as_sequence->sq_item != NULL; } diff --git a/Objects/dictnotes.txt b/Objects/dictnotes.txt index 3b63197..d3aa774 100644 --- a/Objects/dictnotes.txt +++ b/Objects/dictnotes.txt @@ -98,6 +98,17 @@ Tunable Dictionary Parameters depending on the size of the dictionary. Setting to *4 eliminates every other resize step. +* Maximum sparseness (minimum dictionary load). What percentage + of entries can be unused before the dictionary shrinks to + free up memory and speed up iteration? (The current CPython + code does not represent this parameter directly.) + +* Shrinkage rate upon exceeding maximum sparseness. The current + CPython code never even checks sparseness when deleting a + key. When a new key is added, it resizes based on the number + of active keys, so that the addition may trigger shrinkage + rather than growth. + Tune-ups should be measured across a broad range of applications and use cases. A change to any parameter will help in some situations and hurt in others. The key is to find settings that help the most common @@ -115,6 +126,15 @@ __iter__(), iterkeys(), iteritems(), itervalues(), and update(). Also, every dictionary iterates at least twice, once for the memset() when it is created and once by dealloc(). +Dictionary operations involving only a single key can be O(1) unless +resizing is possible. By checking for a resize only when the +dictionary can grow (and may *require* resizing), other operations +remain O(1), and the odds of resize thrashing or memory fragmentation +are reduced. In particular, an algorithm that empties a dictionary +by repeatedly invoking .pop will see no resizing, which might +not be necessary at all because the dictionary is eventually +discarded entirely. + Results of Cache Locality Experiments ------------------------------------- diff --git a/Objects/dictobject.c b/Objects/dictobject.c index eeb8bd5..ffa7a86 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -833,6 +833,34 @@ PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) return 1; } +/* Internal version of PyDict_Next that returns a hash value in addition to the key and value.*/ +int +_PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue, long *phash) +{ + register Py_ssize_t i; + register Py_ssize_t mask; + register dictentry *ep; + + if (!PyDict_Check(op)) + return 0; + i = *ppos; + if (i < 0) + return 0; + ep = ((dictobject *)op)->ma_table; + mask = ((dictobject *)op)->ma_mask; + while (i <= mask && ep[i].me_value == NULL) + i++; + *ppos = i+1; + if (i > mask) + return 0; + *phash = (long)(ep[i].me_hash); + if (pkey) + *pkey = ep[i].me_key; + if (pvalue) + *pvalue = ep[i].me_value; + return 1; +} + /* Methods */ static void @@ -1336,7 +1364,7 @@ PyDict_Merge(PyObject *a, PyObject *b, int override) return -1; } mp = (dictobject*)a; - if (PyDict_Check(b)) { + if (PyDict_CheckExact(b)) { other = (dictobject*)b; if (other == mp || other->ma_used == 0) /* a.update(a) or a.update({}); nothing to do */ @@ -1909,6 +1937,17 @@ PyDict_Contains(PyObject *op, PyObject *key) return ep == NULL ? -1 : (ep->me_value != NULL); } +/* Internal version of PyDict_Contains used when the hash value is already known */ +int +_PyDict_Contains(PyObject *op, PyObject *key, long hash) +{ + dictobject *mp = (dictobject *)op; + dictentry *ep; + + ep = (mp->ma_lookup)(mp, key, hash); + return ep == NULL ? -1 : (ep->me_value != NULL); +} + /* Hack to implement "key in dict" */ static PySequenceMethods dict_as_sequence = { 0, /* sq_length */ diff --git a/Objects/enumobject.c b/Objects/enumobject.c index a8f43e0..a456c9d 100644 --- a/Objects/enumobject.c +++ b/Objects/enumobject.c @@ -62,6 +62,12 @@ enum_next(enumobject *en) PyObject *result = en->en_result; PyObject *it = en->en_sit; + if (en->en_index == LONG_MAX) { + PyErr_SetString(PyExc_OverflowError, + "enumerate() is limited to LONG_MAX items"); + return NULL; + } + next_item = (*it->ob_type->tp_iternext)(it); if (next_item == NULL) return NULL; diff --git a/Objects/setobject.c b/Objects/setobject.c index 0f6c902..2210edf 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -932,14 +932,31 @@ set_update_internal(PySetObject *so, PyObject *other) { PyObject *key, *it; - if (PyAnySet_Check(other)) + if (PyAnySet_CheckExact(other)) return set_merge(so, other); if (PyDict_CheckExact(other)) { PyObject *value; Py_ssize_t pos = 0; - while (PyDict_Next(other, &pos, &key, &value)) { - if (set_add_key(so, key) == -1) + long hash; + Py_ssize_t dictsize = PyDict_Size(other); + + /* Do one big resize at the start, rather than + * incrementally resizing as we insert new keys. Expect + * that there will be no (or few) overlapping keys. + */ + if (dictsize == -1) + return -1; + if ((so->fill + dictsize)*3 >= (so->mask+1)*2) { + if (set_table_resize(so, (so->used + dictsize)*2) != 0) + return -1; + } + while (_PyDict_Next(other, &pos, &key, &value, &hash)) { + setentry an_entry; + + an_entry.hash = hash; + an_entry.key = key; + if (set_add_entry(so, &an_entry) == -1) return -1; } return 0; @@ -1210,7 +1227,7 @@ set_intersection(PySetObject *so, PyObject *other) if (result == NULL) return NULL; - if (PyAnySet_Check(other)) { + if (PyAnySet_CheckExact(other)) { Py_ssize_t pos = 0; setentry *entry; @@ -1334,7 +1351,7 @@ set_difference_update_internal(PySetObject *so, PyObject *other) if ((PyObject *)so == other) return set_clear_internal(so); - if (PyAnySet_Check(other)) { + if (PyAnySet_CheckExact(other)) { setentry *entry; Py_ssize_t pos = 0; @@ -1383,7 +1400,7 @@ set_difference(PySetObject *so, PyObject *other) setentry *entry; Py_ssize_t pos = 0; - if (!PyAnySet_Check(other) && !PyDict_CheckExact(other)) { + if (!PyAnySet_CheckExact(other) && !PyDict_CheckExact(other)) { result = set_copy(so); if (result == NULL) return NULL; @@ -1402,7 +1419,7 @@ set_difference(PySetObject *so, PyObject *other) setentry entrycopy; entrycopy.hash = entry->hash; entrycopy.key = entry->key; - if (!PyDict_Contains(other, entry->key)) { + if (!_PyDict_Contains(other, entry->key, entry->hash)) { if (set_add_entry((PySetObject *)result, &entrycopy) == -1) { Py_DECREF(result); return NULL; @@ -1473,12 +1490,10 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) if (PyDict_CheckExact(other)) { PyObject *value; int rv; - while (PyDict_Next(other, &pos, &key, &value)) { + long hash; + while (_PyDict_Next(other, &pos, &key, &value, &hash)) { setentry an_entry; - long hash = PyObject_Hash(key); - if (hash == -1) - return NULL; an_entry.hash = hash; an_entry.key = key; rv = set_discard_entry(so, &an_entry); @@ -1492,7 +1507,7 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) Py_RETURN_NONE; } - if (PyAnySet_Check(other)) { + if (PyAnySet_CheckExact(other)) { Py_INCREF(other); otherset = (PySetObject *)other; } else { @@ -1575,7 +1590,7 @@ set_issubset(PySetObject *so, PyObject *other) setentry *entry; Py_ssize_t pos = 0; - if (!PyAnySet_Check(other)) { + if (!PyAnySet_CheckExact(other)) { PyObject *tmp, *result; tmp = make_new_set(&PySet_Type, other); if (tmp == NULL) @@ -1604,7 +1619,7 @@ set_issuperset(PySetObject *so, PyObject *other) { PyObject *tmp, *result; - if (!PyAnySet_Check(other)) { + if (!PyAnySet_CheckExact(other)) { tmp = make_new_set(&PySet_Type, other); if (tmp == NULL) return NULL; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index cd211e6..848203e 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -4279,7 +4279,13 @@ SLOT1(slot_nb_inplace_add, "__iadd__", PyObject *, "O") SLOT1(slot_nb_inplace_subtract, "__isub__", PyObject *, "O") SLOT1(slot_nb_inplace_multiply, "__imul__", PyObject *, "O") SLOT1(slot_nb_inplace_remainder, "__imod__", PyObject *, "O") -SLOT1(slot_nb_inplace_power, "__ipow__", PyObject *, "O") +/* Can't use SLOT1 here, because nb_inplace_power is ternary */ +static PyObject * +slot_nb_inplace_power(PyObject *self, PyObject * arg1, PyObject *arg2) +{ + static PyObject *cache_str; + return call_method(self, "__ipow__", &cache_str, "(" "O" ")", arg1); +} SLOT1(slot_nb_inplace_lshift, "__ilshift__", PyObject *, "O") SLOT1(slot_nb_inplace_rshift, "__irshift__", PyObject *, "O") SLOT1(slot_nb_inplace_and, "__iand__", PyObject *, "O") |