From 35f6d36951766c8ca7a88642415e76a603751878 Mon Sep 17 00:00:00 2001 From: Armin Rigo Date: Thu, 1 Jun 2006 13:19:12 +0000 Subject: [ 1497053 ] Let dicts propagate the exceptions in user __eq__(). [ 1456209 ] dictresize() vulnerability ( <- backport candidate ). --- Lib/test/crashers/dictresize_attack.py | 32 ------- Lib/test/output/test_operations | 19 +++- Lib/test/test_operations.py | 56 ++++++++---- Misc/NEWS | 5 ++ Objects/dictobject.c | 157 ++++++++++++++++++++++----------- Python/ceval.c | 15 +++- 6 files changed, 183 insertions(+), 101 deletions(-) delete mode 100644 Lib/test/crashers/dictresize_attack.py diff --git a/Lib/test/crashers/dictresize_attack.py b/Lib/test/crashers/dictresize_attack.py deleted file mode 100644 index 1895791..0000000 --- a/Lib/test/crashers/dictresize_attack.py +++ /dev/null @@ -1,32 +0,0 @@ -# http://www.python.org/sf/1456209 - -# A dictresize() attack. If oldtable == mp->ma_smalltable then pure -# Python code can mangle with mp->ma_smalltable while it is being walked -# over. - -class X(object): - - def __hash__(self): - return 5 - - def __eq__(self, other): - if resizing: - d.clear() - return False - - -d = {} - -resizing = False - -d[X()] = 1 -d[X()] = 2 -d[X()] = 3 -d[X()] = 4 -d[X()] = 5 - -# now trigger a resize -resizing = True -d[9] = 6 - -# ^^^ I get Segmentation fault or Illegal instruction here. diff --git a/Lib/test/output/test_operations b/Lib/test/output/test_operations index 32eff3f..8a1bc2a 100644 --- a/Lib/test/output/test_operations +++ b/Lib/test/output/test_operations @@ -1,6 +1,21 @@ test_operations 3. Operations XXX Mostly not yet implemented -3.1 Dictionary lookups succeed even if __cmp__() raises an exception +3.1 Dictionary lookups fail if __cmp__() raises an exception raising error -No exception passed through. +d[x2] = 2: caught the RuntimeError outside +raising error +z = d[x2]: caught the RuntimeError outside +raising error +x2 in d: caught the RuntimeError outside +raising error +d.has_key(x2): caught the RuntimeError outside +raising error +d.get(x2): caught the RuntimeError outside +raising error +d.setdefault(x2, 42): caught the RuntimeError outside +raising error +d.pop(x2): caught the RuntimeError outside +raising error +d.update({x2: 2}): caught the RuntimeError outside +resize bugs not triggered. diff --git a/Lib/test/test_operations.py b/Lib/test/test_operations.py index b599c9d..fafc062 100644 --- a/Lib/test/test_operations.py +++ b/Lib/test/test_operations.py @@ -5,27 +5,16 @@ print '3. Operations' print 'XXX Mostly not yet implemented' -print '3.1 Dictionary lookups succeed even if __cmp__() raises an exception' - -# SourceForge bug #112558: -# http://sourceforge.net/bugs/?func=detailbug&bug_id=112558&group_id=5470 +print '3.1 Dictionary lookups fail if __cmp__() raises an exception' class BadDictKey: - already_printed_raising_error = 0 def __hash__(self): return hash(self.__class__) def __cmp__(self, other): if isinstance(other, self.__class__): - if not BadDictKey.already_printed_raising_error: - # How many times __cmp__ gets called depends on the hash - # code and the internals of the dict implementation; we - # know it will be called at least once, but that's it. - # already_printed_raising_error makes sure the expected- - # output file prints the msg at most once. - BadDictKey.already_printed_raising_error = 1 - print "raising error" + print "raising error" raise RuntimeError, "gotcha" return other @@ -33,8 +22,21 @@ d = {} x1 = BadDictKey() x2 = BadDictKey() d[x1] = 1 -d[x2] = 2 -print "No exception passed through." +for stmt in ['d[x2] = 2', + 'z = d[x2]', + 'x2 in d', + 'd.has_key(x2)', + 'd.get(x2)', + 'd.setdefault(x2, 42)', + 'd.pop(x2)', + 'd.update({x2: 2})']: + try: + exec stmt + except RuntimeError: + print "%s: caught the RuntimeError outside" % (stmt,) + else: + print "%s: No exception passed through!" # old CPython behavior + # Dict resizing bug, found by Jack Jansen in 2.2 CVS development. # This version got an assert failure in debug build, infinite loop in @@ -50,3 +52,27 @@ for i in range(5): del d[i] for i in range(5, 9): # i==8 was the problem d[i] = i + + +# Another dict resizing bug (SF bug #1456209). +# This caused Segmentation faults or Illegal instructions. + +class X(object): + def __hash__(self): + return 5 + def __eq__(self, other): + if resizing: + d.clear() + return False +d = {} +resizing = False +d[X()] = 1 +d[X()] = 2 +d[X()] = 3 +d[X()] = 4 +d[X()] = 5 +# now trigger a resize +resizing = True +d[9] = 6 + +print 'resize bugs not triggered.' diff --git a/Misc/NEWS b/Misc/NEWS index c2b6932..4bdacde 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -59,6 +59,11 @@ Core and builtins - Patch #1479181: split open() and file() from being aliases for each other. +- Patch #1497053: Exceptions occurring in __eq__() methods were always + silently ignored by dictionaries when comparing keys. They are now + passed through (except when using the C API function PyDict_GetItem(), + whose semantics did not change). + Extension Modules ----------------- diff --git a/Objects/dictobject.c b/Objects/dictobject.c index d4cd925..d6b9597 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -241,10 +241,7 @@ lookdict(dictobject *mp, PyObject *key, register long hash) register Py_ssize_t mask = mp->ma_mask; dictentry *ep0 = mp->ma_table; register dictentry *ep; - register int restore_error; - register int checked_error; register int cmp; - PyObject *err_type, *err_value, *err_tb; PyObject *startkey; i = hash & mask; @@ -252,24 +249,17 @@ lookdict(dictobject *mp, PyObject *key, register long hash) if (ep->me_key == NULL || ep->me_key == key) return ep; - restore_error = checked_error = 0; if (ep->me_key == dummy) freeslot = ep; else { if (ep->me_hash == hash) { - /* error can't have been checked yet */ - checked_error = 1; - if (PyErr_Occurred()) { - restore_error = 1; - PyErr_Fetch(&err_type, &err_value, &err_tb); - } startkey = ep->me_key; cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); if (cmp < 0) - PyErr_Clear(); + return NULL; if (ep0 == mp->ma_table && ep->me_key == startkey) { if (cmp > 0) - goto Done; + return ep; } else { /* The compare did major nasty stuff to the @@ -277,8 +267,7 @@ lookdict(dictobject *mp, PyObject *key, register long hash) * XXX A clever adversary could prevent this * XXX from terminating. */ - ep = lookdict(mp, key, hash); - goto Done; + return lookdict(mp, key, hash); } } freeslot = NULL; @@ -289,29 +278,18 @@ lookdict(dictobject *mp, PyObject *key, register long hash) for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1; ep = &ep0[i & mask]; - if (ep->me_key == NULL) { - if (freeslot != NULL) - ep = freeslot; - break; - } + if (ep->me_key == NULL) + return freeslot == NULL ? ep : freeslot; if (ep->me_key == key) - break; + return ep; if (ep->me_hash == hash && ep->me_key != dummy) { - if (!checked_error) { - checked_error = 1; - if (PyErr_Occurred()) { - restore_error = 1; - PyErr_Fetch(&err_type, &err_value, - &err_tb); - } - } startkey = ep->me_key; cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); if (cmp < 0) - PyErr_Clear(); + return NULL; if (ep0 == mp->ma_table && ep->me_key == startkey) { if (cmp > 0) - break; + return ep; } else { /* The compare did major nasty stuff to the @@ -319,18 +297,12 @@ lookdict(dictobject *mp, PyObject *key, register long hash) * XXX A clever adversary could prevent this * XXX from terminating. */ - ep = lookdict(mp, key, hash); - break; + return lookdict(mp, key, hash); } } else if (ep->me_key == dummy && freeslot == NULL) freeslot = ep; } - -Done: - if (restore_error) - PyErr_Restore(err_type, err_value, err_tb); - return ep; } /* @@ -400,7 +372,7 @@ Internal routine to insert a new item into the table. Used both by the internal resize routine and by the public insert routine. Eats a reference to key and one to value. */ -static void +static int insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value) { PyObject *old_value; @@ -409,6 +381,11 @@ insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value) assert(mp->ma_lookup != NULL); ep = mp->ma_lookup(mp, key, hash); + if (ep == NULL) { + Py_DECREF(key); + Py_DECREF(value); + return -1; + } if (ep->me_value != NULL) { old_value = ep->me_value; ep->me_value = value; @@ -427,6 +404,36 @@ insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value) ep->me_value = value; mp->ma_used++; } + return 0; +} + +/* +Internal routine used by dictresize() to insert an item which is +known to be absent from the dict. This routine also assumes that +the dict contains no deleted entries. Besides the performance benefit, +using insertdict() in dictresize() is dangerous (SF bug #1456209). +*/ +static void +insertdict_clean(register dictobject *mp, PyObject *key, long hash, + PyObject *value) +{ + register Py_ssize_t i; + register size_t perturb; + register unsigned int mask = mp->ma_mask; + dictentry *ep0 = mp->ma_table; + register dictentry *ep; + + i = hash & mask; + ep = &ep0[i]; + for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) { + i = (i << 2) + i + perturb + 1; + ep = &ep0[i & mask]; + } + mp->ma_fill++; + ep->me_key = key; + ep->me_hash = (Py_ssize_t)hash; + ep->me_value = value; + mp->ma_used++; } /* @@ -501,7 +508,8 @@ dictresize(dictobject *mp, Py_ssize_t minused) for (ep = oldtable; i > 0; ep++) { if (ep->me_value != NULL) { /* active entry */ --i; - insertdict(mp, ep->me_key, ep->me_hash, ep->me_value); + insertdict_clean(mp, ep->me_key, (long)ep->me_hash, + ep->me_value); } else if (ep->me_key != NULL) { /* dummy entry */ --i; @@ -521,6 +529,8 @@ PyDict_GetItem(PyObject *op, PyObject *key) { long hash; dictobject *mp = (dictobject *)op; + dictentry *ep; + PyThreadState *tstate; if (!PyDict_Check(op)) { return NULL; } @@ -533,7 +543,29 @@ PyDict_GetItem(PyObject *op, PyObject *key) return NULL; } } - return (mp->ma_lookup)(mp, key, hash)->me_value; + + /* We can arrive here with a NULL tstate during initialization: + try running "python -Wi" for an example related to string + interning. Let's just hope that no exception occurs then... */ + tstate = PyThreadState_GET(); + if (tstate != NULL && tstate->curexc_type != NULL) { + /* preserve the existing exception */ + PyObject *err_type, *err_value, *err_tb; + PyErr_Fetch(&err_type, &err_value, &err_tb); + ep = (mp->ma_lookup)(mp, key, hash); + /* ignore errors */ + PyErr_Restore(err_type, err_value, err_tb); + if (ep == NULL) + return NULL; + } + else { + ep = (mp->ma_lookup)(mp, key, hash); + if (ep == NULL) { + PyErr_Clear(); + return NULL; + } + } + return ep->me_value; } /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the @@ -568,7 +600,8 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value) n_used = mp->ma_used; Py_INCREF(value); Py_INCREF(key); - insertdict(mp, key, hash, value); + if (insertdict(mp, key, hash, value) != 0) + return -1; /* If we added a key, we can safely resize. Otherwise just return! * If fill >= 2/3 size, adjust size. Normally, this doubles or * quaduples the size, but it's also possible for the dict to shrink @@ -608,6 +641,8 @@ PyDict_DelItem(PyObject *op, PyObject *key) } mp = (dictobject *)op; ep = (mp->ma_lookup)(mp, key, hash); + if (ep == NULL) + return -1; if (ep->me_value == NULL) { PyErr_SetObject(PyExc_KeyError, key); return -1; @@ -893,6 +928,7 @@ dict_subscript(dictobject *mp, register PyObject *key) { PyObject *v; long hash; + dictentry *ep; assert(mp->ma_table != NULL); if (!PyString_CheckExact(key) || (hash = ((PyStringObject *) key)->ob_shash) == -1) { @@ -900,7 +936,10 @@ dict_subscript(dictobject *mp, register PyObject *key) if (hash == -1) return NULL; } - v = (mp->ma_lookup)(mp, key, hash) -> me_value; + ep = (mp->ma_lookup)(mp, key, hash); + if (ep == NULL) + return NULL; + v = ep->me_value; if (v == NULL) { if (!PyDict_CheckExact(mp)) { /* Look up __missing__ method if we're a subclass. */ @@ -1258,9 +1297,10 @@ PyDict_Merge(PyObject *a, PyObject *b, int override) PyDict_GetItem(a, entry->me_key) == NULL)) { Py_INCREF(entry->me_key); Py_INCREF(entry->me_value); - insertdict(mp, entry->me_key, - (long)entry->me_hash, - entry->me_value); + if (insertdict(mp, entry->me_key, + (long)entry->me_hash, + entry->me_value) != 0) + return -1; } } } @@ -1568,13 +1608,17 @@ dict_has_key(register dictobject *mp, PyObject *key) { long hash; register long ok; + dictentry *ep; if (!PyString_CheckExact(key) || (hash = ((PyStringObject *) key)->ob_shash) == -1) { hash = PyObject_Hash(key); if (hash == -1) return NULL; } - ok = (mp->ma_lookup)(mp, key, hash)->me_value != NULL; + ep = (mp->ma_lookup)(mp, key, hash); + if (ep == NULL) + return NULL; + ok = ep->me_value != NULL; return PyBool_FromLong(ok); } @@ -1585,6 +1629,7 @@ dict_get(register dictobject *mp, PyObject *args) PyObject *failobj = Py_None; PyObject *val = NULL; long hash; + dictentry *ep; if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj)) return NULL; @@ -1595,8 +1640,10 @@ dict_get(register dictobject *mp, PyObject *args) if (hash == -1) return NULL; } - val = (mp->ma_lookup)(mp, key, hash)->me_value; - + ep = (mp->ma_lookup)(mp, key, hash); + if (ep == NULL) + return NULL; + val = ep->me_value; if (val == NULL) val = failobj; Py_INCREF(val); @@ -1611,6 +1658,7 @@ dict_setdefault(register dictobject *mp, PyObject *args) PyObject *failobj = Py_None; PyObject *val = NULL; long hash; + dictentry *ep; if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &failobj)) return NULL; @@ -1621,7 +1669,10 @@ dict_setdefault(register dictobject *mp, PyObject *args) if (hash == -1) return NULL; } - val = (mp->ma_lookup)(mp, key, hash)->me_value; + ep = (mp->ma_lookup)(mp, key, hash); + if (ep == NULL) + return NULL; + val = ep->me_value; if (val == NULL) { val = failobj; if (PyDict_SetItem((PyObject*)mp, key, failobj)) @@ -1665,6 +1716,8 @@ dict_pop(dictobject *mp, PyObject *args) return NULL; } ep = (mp->ma_lookup)(mp, key, hash); + if (ep == NULL) + return NULL; if (ep->me_value == NULL) { if (deflt) { Py_INCREF(deflt); @@ -1884,6 +1937,7 @@ PyDict_Contains(PyObject *op, PyObject *key) { long hash; dictobject *mp = (dictobject *)op; + dictentry *ep; if (!PyString_CheckExact(key) || (hash = ((PyStringObject *) key)->ob_shash) == -1) { @@ -1891,7 +1945,10 @@ PyDict_Contains(PyObject *op, PyObject *key) if (hash == -1) return -1; } - return (mp->ma_lookup)(mp, key, hash)->me_value != NULL; + ep = (mp->ma_lookup)(mp, key, hash); + if (ep == NULL) + return -1; + return ep->me_value != NULL; } /* Hack to implement "key in dict" */ diff --git a/Python/ceval.c b/Python/ceval.c index 803815e..4d20431 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1855,15 +1855,26 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) long hash = ((PyStringObject *)w)->ob_shash; if (hash != -1) { PyDictObject *d; + PyDictEntry *e; d = (PyDictObject *)(f->f_globals); - x = d->ma_lookup(d, w, hash)->me_value; + e = d->ma_lookup(d, w, hash); + if (e == NULL) { + x = NULL; + break; + } + x = e->me_value; if (x != NULL) { Py_INCREF(x); PUSH(x); continue; } d = (PyDictObject *)(f->f_builtins); - x = d->ma_lookup(d, w, hash)->me_value; + e = d->ma_lookup(d, w, hash); + if (e == NULL) { + x = NULL; + break; + } + x = e->me_value; if (x != NULL) { Py_INCREF(x); PUSH(x); -- cgit v0.12