From e965d97ed1adac6a9a5cf69f04770249589756c7 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 27 Feb 2012 00:45:12 +0100 Subject: =?UTF-8?q?Issue=20#13521:=20dict.setdefault()=20now=20does=20only?= =?UTF-8?q?=20one=20lookup=20for=20the=20given=20key,=20making=20it=20"ato?= =?UTF-8?q?mic"=20for=20many=20purposes.=20Patch=20by=20Filip=20Gruszczy?= =?UTF-8?q?=C5=84ski.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Lib/test/test_dict.py | 20 +++++++++ Misc/NEWS | 3 ++ Objects/dictobject.c | 112 +++++++++++++++++++++++++++++++------------------- 3 files changed, 93 insertions(+), 42 deletions(-) diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index 1507e42..d2740a3 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -299,6 +299,26 @@ class DictTest(unittest.TestCase): x.fail = True self.assertRaises(Exc, d.setdefault, x, []) + def test_setdefault_atomic(self): + # Issue #13521: setdefault() calls __hash__ and __eq__ only once. + class Hashed(object): + def __init__(self): + self.hash_count = 0 + self.eq_count = 0 + def __hash__(self): + self.hash_count += 1 + return 42 + def __eq__(self, other): + self.eq_count += 1 + return id(self) == id(other) + hashed1 = Hashed() + y = {hashed1: 5} + hashed2 = Hashed() + y.setdefault(hashed2, []) + self.assertEqual(hashed1.hash_count, 1) + self.assertEqual(hashed2.hash_count, 1) + self.assertEqual(hashed1.eq_count + hashed2.eq_count, 1) + def test_popitem(self): # dict.popitem() for copymode in -1, +1: diff --git a/Misc/NEWS b/Misc/NEWS index 0375ff3..b69eb4b 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.2.3 release candidate 1? Core and Builtins ----------------- +- Issue #13521: dict.setdefault() now does only one lookup for the given key, + making it "atomic" for many purposes. Patch by Filip GruszczyƄski. + - Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED environment variable, to provide an opt-in way to protect against denial of service attacks due to hash collisions within the dict and set types. Patch diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 768351e..27de10d 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -510,27 +510,16 @@ _PyDict_MaybeUntrack(PyObject *op) _PyObject_GC_UNTRACK(op); } - /* -Internal routine to insert a new item into the table. -Used both by the internal resize routine and by the public insert routine. -Eats a reference to key and one to value. -Returns -1 if an error occurred, or 0 on success. +Internal routine to insert a new item into the table when you have entry object. +Used by insertdict. */ static int -insertdict(register PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value) +insertdict_by_entry(register PyDictObject *mp, PyObject *key, Py_hash_t hash, + PyDictEntry *ep, PyObject *value) { PyObject *old_value; - register PyDictEntry *ep; - typedef PyDictEntry *(*lookupfunc)(PyDictObject *, PyObject *, Py_hash_t); - assert(mp->ma_lookup != NULL); - ep = mp->ma_lookup(mp, key, hash); - if (ep == NULL) { - Py_DECREF(key); - Py_DECREF(value); - return -1; - } MAINTAIN_TRACKING(mp, key, value); if (ep->me_value != NULL) { old_value = ep->me_value; @@ -553,6 +542,28 @@ insertdict(register PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *v return 0; } + +/* +Internal routine to insert a new item into the table. +Used both by the internal resize routine and by the public insert routine. +Eats a reference to key and one to value. +Returns -1 if an error occurred, or 0 on success. +*/ +static int +insertdict(register PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value) +{ + register PyDictEntry *ep; + + assert(mp->ma_lookup != NULL); + ep = mp->ma_lookup(mp, key, hash); + if (ep == NULL) { + Py_DECREF(key); + Py_DECREF(value); + return -1; + } + return insertdict_by_entry(mp, key, hash, ep, value); +} + /* Internal routine used by dictresize() to insert an item which is known to be absent from the dict. This routine also assumes that @@ -776,39 +787,26 @@ PyDict_GetItemWithError(PyObject *op, PyObject *key) return ep->me_value; } -/* CAUTION: PyDict_SetItem() must guarantee that it won't resize the - * dictionary if it's merely replacing the value for an existing key. - * This means that it's safe to loop over a dictionary with PyDict_Next() - * and occasionally replace a value -- but you can't insert new keys or - * remove them. - */ -int -PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value) +static int +dict_set_item_by_hash_or_entry(register PyObject *op, PyObject *key, + Py_hash_t hash, PyDictEntry *ep, PyObject *value) { register PyDictObject *mp; - register Py_hash_t hash; register Py_ssize_t n_used; - if (!PyDict_Check(op)) { - PyErr_BadInternalCall(); - return -1; - } - assert(key); - assert(value); mp = (PyDictObject *)op; - if (!PyUnicode_CheckExact(key) || - (hash = ((PyUnicodeObject *) key)->hash) == -1) - { - hash = PyObject_Hash(key); - if (hash == -1) - return -1; - } assert(mp->ma_fill <= mp->ma_mask); /* at least one empty slot */ n_used = mp->ma_used; Py_INCREF(value); Py_INCREF(key); - if (insertdict(mp, key, hash, value) != 0) - return -1; + if (ep == NULL) { + if (insertdict(mp, key, hash, value) != 0) + return -1; + } + else { + if (insertdict_by_entry(mp, key, hash, ep, value) != 0) + return -1; + } /* If we added a key, we can safely resize. Otherwise just return! * If fill >= 2/3 size, adjust size. Normally, this doubles or * quaduples the size, but it's also possible for the dict to shrink @@ -828,6 +826,36 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value) return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used); } +/* CAUTION: PyDict_SetItem() must guarantee that it won't resize the + * dictionary if it's merely replacing the value for an existing key. + * This means that it's safe to loop over a dictionary with PyDict_Next() + * and occasionally replace a value -- but you can't insert new keys or + * remove them. + */ +int +PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value) +{ + register Py_hash_t hash; + + if (!PyDict_Check(op)) { + PyErr_BadInternalCall(); + return -1; + } + assert(key); + assert(value); + if (PyUnicode_CheckExact(key)) { + hash = ((PyUnicodeObject *) key)->hash; + if (hash == -1) + hash = PyObject_Hash(key); + } + else { + hash = PyObject_Hash(key); + if (hash == -1) + return -1; + } + return dict_set_item_by_hash_or_entry(op, key, hash, NULL, value); +} + int PyDict_DelItem(PyObject *op, PyObject *key) { @@ -1797,9 +1825,9 @@ dict_setdefault(register PyDictObject *mp, PyObject *args) return NULL; val = ep->me_value; if (val == NULL) { - val = failobj; - if (PyDict_SetItem((PyObject*)mp, key, failobj)) - val = NULL; + if (dict_set_item_by_hash_or_entry((PyObject*)mp, key, hash, ep, + failobj) == 0) + val = failobj; } Py_XINCREF(val); return val; -- cgit v0.12