summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
Diffstat (limited to 'Objects')
-rw-r--r--Objects/abstract.c2
-rw-r--r--Objects/dictnotes.txt20
-rw-r--r--Objects/dictobject.c41
-rw-r--r--Objects/enumobject.c6
-rw-r--r--Objects/setobject.c43
-rw-r--r--Objects/typeobject.c8
6 files changed, 104 insertions, 16 deletions
diff --git a/Objects/abstract.c b/Objects/abstract.c
index 4e5838f..7016c2e 100644
--- a/Objects/abstract.c
+++ b/Objects/abstract.c
@@ -980,6 +980,8 @@ PyNumber_Float(PyObject *o)
int
PySequence_Check(PyObject *s)
{
+ if (PyObject_IsInstance(s, (PyObject *)&PyDict_Type))
+ return 0;
return s != NULL && s->ob_type->tp_as_sequence &&
s->ob_type->tp_as_sequence->sq_item != NULL;
}
diff --git a/Objects/dictnotes.txt b/Objects/dictnotes.txt
index 3b63197..d3aa774 100644
--- a/Objects/dictnotes.txt
+++ b/Objects/dictnotes.txt
@@ -98,6 +98,17 @@ Tunable Dictionary Parameters
depending on the size of the dictionary. Setting to *4
eliminates every other resize step.
+* Maximum sparseness (minimum dictionary load). What percentage
+ of entries can be unused before the dictionary shrinks to
+ free up memory and speed up iteration? (The current CPython
+ code does not represent this parameter directly.)
+
+* Shrinkage rate upon exceeding maximum sparseness. The current
+ CPython code never even checks sparseness when deleting a
+ key. When a new key is added, it resizes based on the number
+ of active keys, so that the addition may trigger shrinkage
+ rather than growth.
+
Tune-ups should be measured across a broad range of applications and
use cases. A change to any parameter will help in some situations and
hurt in others. The key is to find settings that help the most common
@@ -115,6 +126,15 @@ __iter__(), iterkeys(), iteritems(), itervalues(), and update().
Also, every dictionary iterates at least twice, once for the memset()
when it is created and once by dealloc().
+Dictionary operations involving only a single key can be O(1) unless
+resizing is possible. By checking for a resize only when the
+dictionary can grow (and may *require* resizing), other operations
+remain O(1), and the odds of resize thrashing or memory fragmentation
+are reduced. In particular, an algorithm that empties a dictionary
+by repeatedly invoking .pop will see no resizing, which might
+not be necessary at all because the dictionary is eventually
+discarded entirely.
+
Results of Cache Locality Experiments
-------------------------------------
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index eeb8bd5..ffa7a86 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -833,6 +833,34 @@ PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
return 1;
}
+/* Internal version of PyDict_Next that returns a hash value in addition to the key and value.*/
+int
+_PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue, long *phash)
+{
+ register Py_ssize_t i;
+ register Py_ssize_t mask;
+ register dictentry *ep;
+
+ if (!PyDict_Check(op))
+ return 0;
+ i = *ppos;
+ if (i < 0)
+ return 0;
+ ep = ((dictobject *)op)->ma_table;
+ mask = ((dictobject *)op)->ma_mask;
+ while (i <= mask && ep[i].me_value == NULL)
+ i++;
+ *ppos = i+1;
+ if (i > mask)
+ return 0;
+ *phash = (long)(ep[i].me_hash);
+ if (pkey)
+ *pkey = ep[i].me_key;
+ if (pvalue)
+ *pvalue = ep[i].me_value;
+ return 1;
+}
+
/* Methods */
static void
@@ -1336,7 +1364,7 @@ PyDict_Merge(PyObject *a, PyObject *b, int override)
return -1;
}
mp = (dictobject*)a;
- if (PyDict_Check(b)) {
+ if (PyDict_CheckExact(b)) {
other = (dictobject*)b;
if (other == mp || other->ma_used == 0)
/* a.update(a) or a.update({}); nothing to do */
@@ -1909,6 +1937,17 @@ PyDict_Contains(PyObject *op, PyObject *key)
return ep == NULL ? -1 : (ep->me_value != NULL);
}
+/* Internal version of PyDict_Contains used when the hash value is already known */
+int
+_PyDict_Contains(PyObject *op, PyObject *key, long hash)
+{
+ dictobject *mp = (dictobject *)op;
+ dictentry *ep;
+
+ ep = (mp->ma_lookup)(mp, key, hash);
+ return ep == NULL ? -1 : (ep->me_value != NULL);
+}
+
/* Hack to implement "key in dict" */
static PySequenceMethods dict_as_sequence = {
0, /* sq_length */
diff --git a/Objects/enumobject.c b/Objects/enumobject.c
index a8f43e0..a456c9d 100644
--- a/Objects/enumobject.c
+++ b/Objects/enumobject.c
@@ -62,6 +62,12 @@ enum_next(enumobject *en)
PyObject *result = en->en_result;
PyObject *it = en->en_sit;
+ if (en->en_index == LONG_MAX) {
+ PyErr_SetString(PyExc_OverflowError,
+ "enumerate() is limited to LONG_MAX items");
+ return NULL;
+ }
+
next_item = (*it->ob_type->tp_iternext)(it);
if (next_item == NULL)
return NULL;
diff --git a/Objects/setobject.c b/Objects/setobject.c
index 0f6c902..2210edf 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -932,14 +932,31 @@ set_update_internal(PySetObject *so, PyObject *other)
{
PyObject *key, *it;
- if (PyAnySet_Check(other))
+ if (PyAnySet_CheckExact(other))
return set_merge(so, other);
if (PyDict_CheckExact(other)) {
PyObject *value;
Py_ssize_t pos = 0;
- while (PyDict_Next(other, &pos, &key, &value)) {
- if (set_add_key(so, key) == -1)
+ long hash;
+ Py_ssize_t dictsize = PyDict_Size(other);
+
+ /* Do one big resize at the start, rather than
+ * incrementally resizing as we insert new keys. Expect
+ * that there will be no (or few) overlapping keys.
+ */
+ if (dictsize == -1)
+ return -1;
+ if ((so->fill + dictsize)*3 >= (so->mask+1)*2) {
+ if (set_table_resize(so, (so->used + dictsize)*2) != 0)
+ return -1;
+ }
+ while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
+ setentry an_entry;
+
+ an_entry.hash = hash;
+ an_entry.key = key;
+ if (set_add_entry(so, &an_entry) == -1)
return -1;
}
return 0;
@@ -1210,7 +1227,7 @@ set_intersection(PySetObject *so, PyObject *other)
if (result == NULL)
return NULL;
- if (PyAnySet_Check(other)) {
+ if (PyAnySet_CheckExact(other)) {
Py_ssize_t pos = 0;
setentry *entry;
@@ -1334,7 +1351,7 @@ set_difference_update_internal(PySetObject *so, PyObject *other)
if ((PyObject *)so == other)
return set_clear_internal(so);
- if (PyAnySet_Check(other)) {
+ if (PyAnySet_CheckExact(other)) {
setentry *entry;
Py_ssize_t pos = 0;
@@ -1383,7 +1400,7 @@ set_difference(PySetObject *so, PyObject *other)
setentry *entry;
Py_ssize_t pos = 0;
- if (!PyAnySet_Check(other) && !PyDict_CheckExact(other)) {
+ if (!PyAnySet_CheckExact(other) && !PyDict_CheckExact(other)) {
result = set_copy(so);
if (result == NULL)
return NULL;
@@ -1402,7 +1419,7 @@ set_difference(PySetObject *so, PyObject *other)
setentry entrycopy;
entrycopy.hash = entry->hash;
entrycopy.key = entry->key;
- if (!PyDict_Contains(other, entry->key)) {
+ if (!_PyDict_Contains(other, entry->key, entry->hash)) {
if (set_add_entry((PySetObject *)result, &entrycopy) == -1) {
Py_DECREF(result);
return NULL;
@@ -1473,12 +1490,10 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other)
if (PyDict_CheckExact(other)) {
PyObject *value;
int rv;
- while (PyDict_Next(other, &pos, &key, &value)) {
+ long hash;
+ while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
setentry an_entry;
- long hash = PyObject_Hash(key);
- if (hash == -1)
- return NULL;
an_entry.hash = hash;
an_entry.key = key;
rv = set_discard_entry(so, &an_entry);
@@ -1492,7 +1507,7 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other)
Py_RETURN_NONE;
}
- if (PyAnySet_Check(other)) {
+ if (PyAnySet_CheckExact(other)) {
Py_INCREF(other);
otherset = (PySetObject *)other;
} else {
@@ -1575,7 +1590,7 @@ set_issubset(PySetObject *so, PyObject *other)
setentry *entry;
Py_ssize_t pos = 0;
- if (!PyAnySet_Check(other)) {
+ if (!PyAnySet_CheckExact(other)) {
PyObject *tmp, *result;
tmp = make_new_set(&PySet_Type, other);
if (tmp == NULL)
@@ -1604,7 +1619,7 @@ set_issuperset(PySetObject *so, PyObject *other)
{
PyObject *tmp, *result;
- if (!PyAnySet_Check(other)) {
+ if (!PyAnySet_CheckExact(other)) {
tmp = make_new_set(&PySet_Type, other);
if (tmp == NULL)
return NULL;
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index cd211e6..848203e 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -4279,7 +4279,13 @@ SLOT1(slot_nb_inplace_add, "__iadd__", PyObject *, "O")
SLOT1(slot_nb_inplace_subtract, "__isub__", PyObject *, "O")
SLOT1(slot_nb_inplace_multiply, "__imul__", PyObject *, "O")
SLOT1(slot_nb_inplace_remainder, "__imod__", PyObject *, "O")
-SLOT1(slot_nb_inplace_power, "__ipow__", PyObject *, "O")
+/* Can't use SLOT1 here, because nb_inplace_power is ternary */
+static PyObject *
+slot_nb_inplace_power(PyObject *self, PyObject * arg1, PyObject *arg2)
+{
+ static PyObject *cache_str;
+ return call_method(self, "__ipow__", &cache_str, "(" "O" ")", arg1);
+}
SLOT1(slot_nb_inplace_lshift, "__ilshift__", PyObject *, "O")
SLOT1(slot_nb_inplace_rshift, "__irshift__", PyObject *, "O")
SLOT1(slot_nb_inplace_and, "__iand__", PyObject *, "O")