From 198b291df7ae66d507849d92ddb6588a630fb71c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 6 Mar 2012 01:03:13 +0100 Subject: Close #14205: dict lookup raises a RuntimeError if the dict is modified during a lookup. "if you want to make a sandbox on top of CPython, you have to fix segfaults" so let's fix segfaults! --- Lib/test/crashers/nasty_eq_vs_dict.py | 47 ------ Lib/test/test_dict.py | 22 ++- Lib/test/test_mutants.py | 291 ---------------------------------- Misc/NEWS | 5 +- Objects/dictobject.c | 18 +-- 5 files changed, 31 insertions(+), 352 deletions(-) delete mode 100644 Lib/test/crashers/nasty_eq_vs_dict.py delete mode 100644 Lib/test/test_mutants.py diff --git a/Lib/test/crashers/nasty_eq_vs_dict.py b/Lib/test/crashers/nasty_eq_vs_dict.py deleted file mode 100644 index 85f7caf..0000000 --- a/Lib/test/crashers/nasty_eq_vs_dict.py +++ /dev/null @@ -1,47 +0,0 @@ -# from http://mail.python.org/pipermail/python-dev/2001-June/015239.html - -# if you keep changing a dictionary while looking up a key, you can -# provoke an infinite recursion in C - -# At the time neither Tim nor Michael could be bothered to think of a -# way to fix it. - -class Yuck: - def __init__(self): - self.i = 0 - - def make_dangerous(self): - self.i = 1 - - def __hash__(self): - # direct to slot 4 in table of size 8; slot 12 when size 16 - return 4 + 8 - - def __eq__(self, other): - if self.i == 0: - # leave dict alone - pass - elif self.i == 1: - # fiddle to 16 slots - self.__fill_dict(6) - self.i = 2 - else: - # fiddle to 8 slots - self.__fill_dict(4) - self.i = 1 - - return 1 - - def __fill_dict(self, n): - self.i = 0 - dict.clear() - for i in range(n): - dict[i] = i - dict[self] = "OK!" - -y = Yuck() -dict = {y: "OK!"} - -z = Yuck() -y.make_dangerous() -print(dict[z]) diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index d2740a3..15db51d 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -379,7 +379,7 @@ class DictTest(unittest.TestCase): x.fail = True self.assertRaises(Exc, d.pop, x) - def test_mutatingiteration(self): + def test_mutating_iteration(self): # changing dict size during iteration d = {} d[1] = 1 @@ -387,6 +387,26 @@ class DictTest(unittest.TestCase): for i in d: d[i+1] = 1 + def test_mutating_lookup(self): + # changing dict during a lookup + class NastyKey: + mutate_dict = None + + def __hash__(self): + # hash collision! + return 1 + + def __eq__(self, other): + if self.mutate_dict: + self.mutate_dict[self] = 1 + return self == other + + d = {} + d[NastyKey()] = 0 + NastyKey.mutate_dict = d + with self.assertRaises(RuntimeError): + d[NastyKey()] = None + def test_repr(self): d = {} self.assertEqual(repr(d), '{}') diff --git a/Lib/test/test_mutants.py b/Lib/test/test_mutants.py deleted file mode 100644 index b43fa47..0000000 --- a/Lib/test/test_mutants.py +++ /dev/null @@ -1,291 +0,0 @@ -from test.support import verbose, TESTFN -import random -import os - -# From SF bug #422121: Insecurities in dict comparison. - -# Safety of code doing comparisons has been an historical Python weak spot. -# The problem is that comparison of structures written in C *naturally* -# wants to hold on to things like the size of the container, or "the -# biggest" containee so far, across a traversal of the container; but -# code to do containee comparisons can call back into Python and mutate -# the container in arbitrary ways while the C loop is in midstream. If the -# C code isn't extremely paranoid about digging things out of memory on -# each trip, and artificially boosting refcounts for the duration, anything -# from infinite loops to OS crashes can result (yes, I use Windows ). -# -# The other problem is that code designed to provoke a weakness is usually -# white-box code, and so catches only the particular vulnerabilities the -# author knew to protect against. For example, Python's list.sort() code -# went thru many iterations as one "new" vulnerability after another was -# discovered. -# -# So the dict comparison test here uses a black-box approach instead, -# generating dicts of various sizes at random, and performing random -# mutations on them at random times. This proved very effective, -# triggering at least six distinct failure modes the first 20 times I -# ran it. Indeed, at the start, the driver never got beyond 6 iterations -# before the test died. - -# The dicts are global to make it easy to mutate tham from within functions. -dict1 = {} -dict2 = {} - -# The current set of keys in dict1 and dict2. These are materialized as -# lists to make it easy to pick a dict key at random. -dict1keys = [] -dict2keys = [] - -# Global flag telling maybe_mutate() whether to *consider* mutating. -mutate = 0 - -# If global mutate is true, consider mutating a dict. May or may not -# mutate a dict even if mutate is true. If it does decide to mutate a -# dict, it picks one of {dict1, dict2} at random, and deletes a random -# entry from it; or, more rarely, adds a random element. - -def maybe_mutate(): - global mutate - if not mutate: - return - if random.random() < 0.5: - return - - if random.random() < 0.5: - target, keys = dict1, dict1keys - else: - target, keys = dict2, dict2keys - - if random.random() < 0.2: - # Insert a new key. - mutate = 0 # disable mutation until key inserted - while 1: - newkey = Horrid(random.randrange(100)) - if newkey not in target: - break - target[newkey] = Horrid(random.randrange(100)) - keys.append(newkey) - mutate = 1 - - elif keys: - # Delete a key at random. - mutate = 0 # disable mutation until key deleted - i = random.randrange(len(keys)) - key = keys[i] - del target[key] - del keys[i] - mutate = 1 - -# A horrid class that triggers random mutations of dict1 and dict2 when -# instances are compared. - -class Horrid: - def __init__(self, i): - # Comparison outcomes are determined by the value of i. - self.i = i - - # An artificial hashcode is selected at random so that we don't - # have any systematic relationship between comparison outcomes - # (based on self.i and other.i) and relative position within the - # hash vector (based on hashcode). - # XXX This is no longer effective. - ##self.hashcode = random.randrange(1000000000) - - def __hash__(self): - return 42 - return self.hashcode - - def __eq__(self, other): - maybe_mutate() # The point of the test. - return self.i == other.i - - def __ne__(self, other): - raise RuntimeError("I didn't expect some kind of Spanish inquisition!") - - __lt__ = __le__ = __gt__ = __ge__ = __ne__ - - def __repr__(self): - return "Horrid(%d)" % self.i - -# Fill dict d with numentries (Horrid(i), Horrid(j)) key-value pairs, -# where i and j are selected at random from the candidates list. -# Return d.keys() after filling. - -def fill_dict(d, candidates, numentries): - d.clear() - for i in range(numentries): - d[Horrid(random.choice(candidates))] = \ - Horrid(random.choice(candidates)) - return list(d.keys()) - -# Test one pair of randomly generated dicts, each with n entries. -# Note that dict comparison is trivial if they don't have the same number -# of entires (then the "shorter" dict is instantly considered to be the -# smaller one, without even looking at the entries). - -def test_one(n): - global mutate, dict1, dict2, dict1keys, dict2keys - - # Fill the dicts without mutating them. - mutate = 0 - dict1keys = fill_dict(dict1, range(n), n) - dict2keys = fill_dict(dict2, range(n), n) - - # Enable mutation, then compare the dicts so long as they have the - # same size. - mutate = 1 - if verbose: - print("trying w/ lengths", len(dict1), len(dict2), end=' ') - while dict1 and len(dict1) == len(dict2): - if verbose: - print(".", end=' ') - c = dict1 == dict2 - if verbose: - print() - -# Run test_one n times. At the start (before the bugs were fixed), 20 -# consecutive runs of this test each blew up on or before the sixth time -# test_one was run. So n doesn't have to be large to get an interesting -# test. -# OTOH, calling with large n is also interesting, to ensure that the fixed -# code doesn't hold on to refcounts *too* long (in which case memory would -# leak). - -def test(n): - for i in range(n): - test_one(random.randrange(1, 100)) - -# See last comment block for clues about good values for n. -test(100) - -########################################################################## -# Another segfault bug, distilled by Michael Hudson from a c.l.py post. - -class Child: - def __init__(self, parent): - self.__dict__['parent'] = parent - def __getattr__(self, attr): - self.parent.a = 1 - self.parent.b = 1 - self.parent.c = 1 - self.parent.d = 1 - self.parent.e = 1 - self.parent.f = 1 - self.parent.g = 1 - self.parent.h = 1 - self.parent.i = 1 - return getattr(self.parent, attr) - -class Parent: - def __init__(self): - self.a = Child(self) - -# Hard to say what this will print! May vary from time to time. But -# we're specifically trying to test the tp_print slot here, and this is -# the clearest way to do it. We print the result to a temp file so that -# the expected-output file doesn't need to change. - -f = open(TESTFN, "w") -print(Parent().__dict__, file=f) -f.close() -os.unlink(TESTFN) - -########################################################################## -# And another core-dumper from Michael Hudson. - -dict = {} - -# Force dict to malloc its table. -for i in range(1, 10): - dict[i] = i - -f = open(TESTFN, "w") - -class Machiavelli: - def __repr__(self): - dict.clear() - - # Michael sez: "doesn't crash without this. don't know why." - # Tim sez: "luck of the draw; crashes with or without for me." - print(file=f) - - return repr("machiavelli") - - def __hash__(self): - return 0 - -dict[Machiavelli()] = Machiavelli() - -print(str(dict), file=f) -f.close() -os.unlink(TESTFN) -del f, dict - - -########################################################################## -# And another core-dumper from Michael Hudson. - -dict = {} - -# let's force dict to malloc its table -for i in range(1, 10): - dict[i] = i - -class Machiavelli2: - def __eq__(self, other): - dict.clear() - return 1 - - def __hash__(self): - return 0 - -dict[Machiavelli2()] = Machiavelli2() - -try: - dict[Machiavelli2()] -except KeyError: - pass - -del dict - -########################################################################## -# And another core-dumper from Michael Hudson. - -dict = {} - -# let's force dict to malloc its table -for i in range(1, 10): - dict[i] = i - -class Machiavelli3: - def __init__(self, id): - self.id = id - - def __eq__(self, other): - if self.id == other.id: - dict.clear() - return 1 - else: - return 0 - - def __repr__(self): - return "%s(%s)"%(self.__class__.__name__, self.id) - - def __hash__(self): - return 0 - -dict[Machiavelli3(1)] = Machiavelli3(0) -dict[Machiavelli3(2)] = Machiavelli3(0) - -f = open(TESTFN, "w") -try: - try: - print(dict[Machiavelli3(2)], file=f) - except KeyError: - pass -finally: - f.close() - os.unlink(TESTFN) - -del dict -del dict1, dict2, dict1keys, dict2keys diff --git a/Misc/NEWS b/Misc/NEWS index 132857f..2cf22cb 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,10 +10,13 @@ What's New in Python 3.3.0 Alpha 2? Core and Builtins ----------------- +- Issue #14205: dict lookup raises a RuntimeError if the dict is modified + during a lookup. + Library ------- -- Issue #14168: Check for presence of Element._attrs in minidom before +- Issue #14168: Check for presence of Element._attrs in minidom before accessing it. - Issue #12328: Fix multiprocessing's use of overlapped I/O on Windows. diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 83957ac..23ca442 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -347,12 +347,9 @@ lookdict(PyDictObject *mp, PyObject *key, register Py_hash_t hash) return ep; } else { - /* The compare did major nasty stuff to the - * dict: start over. - * XXX A clever adversary could prevent this - * XXX from terminating. - */ - return lookdict(mp, key, hash); + PyErr_SetString(PyExc_RuntimeError, + "dictionary changed size during lookup"); + return NULL; } } freeslot = NULL; @@ -379,12 +376,9 @@ lookdict(PyDictObject *mp, PyObject *key, register Py_hash_t hash) return ep; } else { - /* The compare did major nasty stuff to the - * dict: start over. - * XXX A clever adversary could prevent this - * XXX from terminating. - */ - return lookdict(mp, key, hash); + PyErr_SetString(PyExc_RuntimeError, + "dictionary changed size during lookup"); + return NULL; } } else if (ep->me_key == dummy && freeslot == NULL) -- cgit v0.12