From 67796521dd22b3008788a75108b45f33d06f85dd Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 12 Jan 2017 18:34:33 +0200 Subject: Issue #28969: Fixed race condition in C implementation of functools.lru_cache. KeyError could be raised when cached function with full cache was simultaneously called from differen threads with the same uncached arguments. --- Include/dictobject.h | 1 + Lib/test/test_functools.py | 15 ++++++++++++ Misc/NEWS | 4 ++++ Modules/_functoolsmodule.c | 58 ++++++++++++++++++++++++++++------------------ Objects/dictobject.c | 31 ++++++++++++++++++------- 5 files changed, 79 insertions(+), 30 deletions(-) diff --git a/Include/dictobject.h b/Include/dictobject.h index e0e1c26..17e12c0 100644 --- a/Include/dictobject.h +++ b/Include/dictobject.h @@ -102,6 +102,7 @@ PyAPI_FUNC(int) _PyDict_HasOnlyStringKeys(PyObject *mp); Py_ssize_t _PyDict_KeysSize(PyDictKeysObject *keys); Py_ssize_t _PyDict_SizeOf(PyDictObject *); PyObject *_PyDict_Pop(PyDictObject *, PyObject *, PyObject *); +PyObject *_PyDict_Pop_KnownHash(PyDictObject *, PyObject *, Py_hash_t, PyObject *); PyObject *_PyDict_FromKeys(PyObject *, PyObject *, PyObject *); #define _PyDict_HasSplitTable(d) ((d)->ma_values != NULL) diff --git a/Lib/test/test_functools.py b/Lib/test/test_functools.py index b431e05..9ccd0ca 100644 --- a/Lib/test/test_functools.py +++ b/Lib/test/test_functools.py @@ -7,6 +7,7 @@ import pickle from random import choice import sys from test import support +import time import unittest from weakref import proxy try: @@ -1364,6 +1365,20 @@ class TestLRU: pause.reset() self.assertEqual(f.cache_info(), (0, (i+1)*n, m*n, i+1)) + @unittest.skipUnless(threading, 'This test requires threading.') + def test_lru_cache_threaded3(self): + @self.module.lru_cache(maxsize=2) + def f(x): + time.sleep(.01) + return 3 * x + def test(i, x): + with self.subTest(thread=i): + self.assertEqual(f(x), 3 * x, i) + threads = [threading.Thread(target=test, args=(i, v)) + for i, v in enumerate([1, 2, 2, 3, 2])] + with support.start_threads(threads): + pass + def test_need_for_rlock(self): # This will deadlock on an LRU cache that uses a regular lock diff --git a/Misc/NEWS b/Misc/NEWS index df9d0dd..e1ae98f 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -13,6 +13,10 @@ Core and Builtins Library ------- +- Issue #28969: Fixed race condition in C implementation of functools.lru_cache. + KeyError could be raised when cached function with full cache was + simultaneously called from differen threads with the same uncached arguments. + - Issue #29142: In urllib.request, suffixes in no_proxy environment variable with leading dots could match related hostnames again (e.g. .b.c matches a.b.c). Patch by Milan Oberkirch. diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index 9c9ab5f..be0f5f9 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -864,42 +864,56 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds } if (self->full && self->root.next != &self->root) { /* Use the oldest item to store the new key and result. */ - PyObject *oldkey, *oldresult; + PyObject *oldkey, *oldresult, *popresult; /* Extricate the oldest item. */ link = self->root.next; lru_cache_extricate_link(link); /* Remove it from the cache. The cache dict holds one reference to the link, and the linked list holds yet one reference to it. */ - if (_PyDict_DelItem_KnownHash(self->cache, link->key, - link->hash) < 0) { + popresult = _PyDict_Pop_KnownHash((PyDictObject *)self->cache, + link->key, link->hash, + Py_None); + if (popresult == Py_None) { + /* Getting here means that this same key was added to the + cache while the lock was released. Since the link + update is already done, we need only return the + computed result and update the count of misses. */ + Py_DECREF(popresult); + Py_DECREF(link); + Py_DECREF(key); + } + else if (popresult == NULL) { lru_cache_append_link(self, link); Py_DECREF(key); Py_DECREF(result); return NULL; } - /* Keep a reference to the old key and old result to - prevent their ref counts from going to zero during the - update. That will prevent potentially arbitrary object - clean-up code (i.e. __del__) from running while we're - still adjusting the links. */ - oldkey = link->key; - oldresult = link->result; - - link->hash = hash; - link->key = key; - link->result = result; - if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link, - hash) < 0) { - Py_DECREF(link); + else { + Py_DECREF(popresult); + /* Keep a reference to the old key and old result to + prevent their ref counts from going to zero during the + update. That will prevent potentially arbitrary object + clean-up code (i.e. __del__) from running while we're + still adjusting the links. */ + oldkey = link->key; + oldresult = link->result; + + link->hash = hash; + link->key = key; + link->result = result; + if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link, + hash) < 0) { + Py_DECREF(link); + Py_DECREF(oldkey); + Py_DECREF(oldresult); + return NULL; + } + lru_cache_append_link(self, link); + Py_INCREF(result); /* for return */ Py_DECREF(oldkey); Py_DECREF(oldresult); - return NULL; } - lru_cache_append_link(self, link); - Py_INCREF(result); /* for return */ - Py_DECREF(oldkey); - Py_DECREF(oldresult); } else { /* Put result in a new link at the front of the queue. */ link = (lru_list_elem *)PyObject_GC_New(lru_list_elem, diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 747d218..11c086f 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1475,9 +1475,8 @@ _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, /* Internal version of dict.pop(). */ PyObject * -_PyDict_Pop(PyDictObject *mp, PyObject *key, PyObject *deflt) +_PyDict_Pop_KnownHash(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *deflt) { - Py_hash_t hash; PyObject *old_value, *old_key; PyDictKeyEntry *ep; PyObject **value_addr; @@ -1490,12 +1489,6 @@ _PyDict_Pop(PyDictObject *mp, PyObject *key, PyObject *deflt) _PyErr_SetKeyError(key); return NULL; } - if (!PyUnicode_CheckExact(key) || - (hash = ((PyASCIIObject *) key)->hash) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) - return NULL; - } ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr); if (ep == NULL) return NULL; @@ -1520,6 +1513,28 @@ _PyDict_Pop(PyDictObject *mp, PyObject *key, PyObject *deflt) return old_value; } +PyObject * +_PyDict_Pop(PyDictObject *mp, PyObject *key, PyObject *deflt) +{ + Py_hash_t hash; + + if (mp->ma_used == 0) { + if (deflt) { + Py_INCREF(deflt); + return deflt; + } + _PyErr_SetKeyError(key); + return NULL; + } + if (!PyUnicode_CheckExact(key) || + (hash = ((PyASCIIObject *) key)->hash) == -1) { + hash = PyObject_Hash(key); + if (hash == -1) + return NULL; + } + return _PyDict_Pop_KnownHash(mp, key, hash, deflt); +} + /* Internal version of dict.from_keys(). It is subclass-friendly. */ PyObject * _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value) -- cgit v0.12