diff options
author | Raymond Hettinger <rhettinger@users.noreply.github.com> | 2019-01-26 08:02:00 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-01-26 08:02:00 (GMT) |
commit | d8080c01195cc9a19af752bfa04d98824dd9fb15 (patch) | |
tree | 75ab01426d6a329ad2987f58b5261443a0ee7c0e /Modules/_functoolsmodule.c | |
parent | adad9e68013aac166c84ffe4e23f3a5464f41840 (diff) | |
download | cpython-d8080c01195cc9a19af752bfa04d98824dd9fb15.zip cpython-d8080c01195cc9a19af752bfa04d98824dd9fb15.tar.gz cpython-d8080c01195cc9a19af752bfa04d98824dd9fb15.tar.bz2 |
bpo-35780: Fix errors in lru_cache() C code (GH-11623)
Diffstat (limited to 'Modules/_functoolsmodule.c')
-rw-r--r-- | Modules/_functoolsmodule.c | 272 |
1 files changed, 188 insertions, 84 deletions
diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index 0fb4847..1412102 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -711,16 +711,15 @@ typedef PyObject *(*lru_cache_ternaryfunc)(struct lru_cache_object *, PyObject * typedef struct lru_cache_object { lru_list_elem root; /* includes PyObject_HEAD */ - Py_ssize_t maxsize; - PyObject *maxsize_O; - PyObject *func; lru_cache_ternaryfunc wrapper; + int typed; PyObject *cache; + Py_ssize_t hits; + PyObject *func; + Py_ssize_t maxsize; + Py_ssize_t misses; PyObject *cache_info_type; - Py_ssize_t misses, hits; - int typed; PyObject *dict; - int full; } lru_cache_object; static PyTypeObject lru_cache_type; @@ -733,6 +732,15 @@ lru_cache_make_key(PyObject *args, PyObject *kwds, int typed) /* short path, key will match args anyway, which is a tuple */ if (!typed && !kwds) { + if (PyTuple_GET_SIZE(args) == 1) { + key = PyTuple_GET_ITEM(args, 0); + if (PyUnicode_CheckExact(key) || PyLong_CheckExact(key)) { + /* For common scalar keys, save space by + dropping the enclosing args tuple */ + Py_INCREF(key); + return key; + } + } Py_INCREF(args); return args; } @@ -835,10 +843,12 @@ infinite_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwd } static void -lru_cache_extricate_link(lru_list_elem *link) +lru_cache_extract_link(lru_list_elem *link) { - link->prev->next = link->next; - link->next->prev = link->prev; + lru_list_elem *link_prev = link->prev; + lru_list_elem *link_next = link->next; + link_prev->next = link->next; + link_next->prev = link->prev; } static void @@ -851,11 +861,52 @@ lru_cache_append_link(lru_cache_object *self, lru_list_elem *link) link->next = root; } +static void +lru_cache_prepend_link(lru_cache_object *self, lru_list_elem *link) +{ + lru_list_elem *root = &self->root; + lru_list_elem *first = root->next; + first->prev = root->next = link; + link->prev = root; + link->next = first; +} + +/* General note on reentrancy: + + There are four dictionary calls in the bounded_lru_cache_wrapper(): + 1) The initial check for a cache match. 2) The post user-function + check for a cache match. 3) The deletion of the oldest entry. + 4) The addition of the newest entry. + + In all four calls, we have a known hash which lets use avoid a call + to __hash__(). That leaves only __eq__ as a possible source of a + reentrant call. + + The __eq__ method call is always made for a cache hit (dict access #1). + Accordingly, we have make sure not modify the cache state prior to + this call. + + The __eq__ method call is never made for the deletion (dict access #3) + because it is an identity match. + + For the other two accesses (#2 and #4), calls to __eq__ only occur + when some other entry happens to have an exactly matching hash (all + 64-bits). Though rare, this can happen, so we have to make sure to + either call it at the top of its code path before any cache + state modifications (dict access #2) or be prepared to restore + invariants at the end of the code path (dict access #4). + + Another possible source of reentrancy is a decref which can trigger + arbitrary code execution. To make the code easier to reason about, + the decrefs are deferred to the end of the each possible code path + so that we know the cache is a consistent state. + */ + static PyObject * bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds) { lru_list_elem *link; - PyObject *key, *result; + PyObject *key, *result, *testresult; Py_hash_t hash; key = lru_cache_make_key(args, kwds, self->typed); @@ -867,11 +918,11 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds return NULL; } link = (lru_list_elem *)_PyDict_GetItem_KnownHash(self->cache, key, hash); - if (link) { - lru_cache_extricate_link(link); + if (link != NULL) { + lru_cache_extract_link(link); lru_cache_append_link(self, link); - self->hits++; result = link->result; + self->hits++; Py_INCREF(result); Py_DECREF(key); return result; @@ -880,65 +931,38 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds Py_DECREF(key); return NULL; } + self->misses++; result = PyObject_Call(self->func, args, kwds); if (!result) { Py_DECREF(key); return NULL; } - if (self->full && self->root.next != &self->root) { - /* Use the oldest item to store the new key and result. */ - PyObject *oldkey, *oldresult, *popresult; - /* Extricate the oldest item. */ - link = self->root.next; - lru_cache_extricate_link(link); - /* Remove it from the cache. - The cache dict holds one reference to the link, - and the linked list holds yet one reference to it. */ - popresult = _PyDict_Pop_KnownHash(self->cache, - link->key, link->hash, - Py_None); - if (popresult == Py_None) { - /* Getting here means that this same key was added to the - cache while the lock was released. Since the link - update is already done, we need only return the - computed result and update the count of misses. */ - Py_DECREF(popresult); - Py_DECREF(link); - Py_DECREF(key); - } - else if (popresult == NULL) { - lru_cache_append_link(self, link); - Py_DECREF(key); - Py_DECREF(result); - return NULL; - } - else { - Py_DECREF(popresult); - /* Keep a reference to the old key and old result to - prevent their ref counts from going to zero during the - update. That will prevent potentially arbitrary object - clean-up code (i.e. __del__) from running while we're - still adjusting the links. */ - oldkey = link->key; - oldresult = link->result; - - link->hash = hash; - link->key = key; - link->result = result; - if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link, - hash) < 0) { - Py_DECREF(link); - Py_DECREF(oldkey); - Py_DECREF(oldresult); - return NULL; - } - lru_cache_append_link(self, link); - Py_INCREF(result); /* for return */ - Py_DECREF(oldkey); - Py_DECREF(oldresult); - } - } else { - /* Put result in a new link at the front of the queue. */ + testresult = _PyDict_GetItem_KnownHash(self->cache, key, hash); + if (testresult != NULL) { + /* Getting here means that this same key was added to the cache + during the PyObject_Call(). Since the link update is already + done, we need only return the computed result. */ + Py_DECREF(key); + return result; + } + if (PyErr_Occurred()) { + /* This is an unusual case since this same lookup + did not previously trigger an error during lookup. + Treat it the same as an error in user function + and return with the error set. */ + Py_DECREF(key); + Py_DECREF(result); + return NULL; + } + /* This is the normal case. The new key wasn't found before + user function call and it is still not there. So we + proceed normally and update the cache with the new result. */ + + assert(self->maxsize > 0); + if (PyDict_GET_SIZE(self->cache) < self->maxsize || + self->root.next == &self->root) + { + /* Cache is not full, so put the result in a new link */ link = (lru_list_elem *)PyObject_New(lru_list_elem, &lru_list_elem_type); if (link == NULL) { @@ -950,6 +974,11 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds link->hash = hash; link->key = key; link->result = result; + /* What is really needed here is a SetItem variant with a "no clobber" + option. If the __eq__ call triggers a reentrant call that adds + this same key, then this setitem call will update the cache dict + with this new link, leaving the old link as an orphan (i.e. not + having a cache dict entry that refers to it). */ if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link, hash) < 0) { Py_DECREF(link); @@ -957,9 +986,83 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds } lru_cache_append_link(self, link); Py_INCREF(result); /* for return */ - self->full = (PyDict_GET_SIZE(self->cache) >= self->maxsize); + return result; } - self->misses++; + /* Since the cache is full, we need to evict an old key and add + a new key. Rather than free the old link and allocate a new + one, we reuse the link for the new key and result and move it + to front of the cache to mark it as recently used. + + We try to assure all code paths (including errors) leave all + of the links in place. Either the link is successfully + updated and moved or it is restored to its old position. + However if an unrecoverable error is found, it doesn't + make sense to reinsert the link, so we leave it out + and the cache will no longer register as full. + */ + PyObject *oldkey, *oldresult, *popresult; + + /* Extract the oldest item. */ + assert(self->root.next != &self->root); + link = self->root.next; + lru_cache_extract_link(link); + /* Remove it from the cache. + The cache dict holds one reference to the link, + and the linked list holds yet one reference to it. */ + popresult = _PyDict_Pop_KnownHash(self->cache, link->key, + link->hash, Py_None); + if (popresult == Py_None) { + /* Getting here means that the user function call or another + thread has already removed the old key from the dictionary. + This link is now an orpan. Since we don't want to leave the + cache in an inconsistent state, we don't restore the link. */ + Py_DECREF(popresult); + Py_DECREF(link); + Py_DECREF(key); + return result; + } + if (popresult == NULL) { + /* An error arose while trying to remove the oldest key (the one + being evicted) from the cache. We restore the link to its + original position as the oldest link. Then we allow the + error propagate upward; treating it the same as an error + arising in the user function. */ + lru_cache_prepend_link(self, link); + Py_DECREF(key); + Py_DECREF(result); + return NULL; + } + /* Keep a reference to the old key and old result to prevent their + ref counts from going to zero during the update. That will + prevent potentially arbitrary object clean-up code (i.e. __del__) + from running while we're still adjusting the links. */ + oldkey = link->key; + oldresult = link->result; + + link->hash = hash; + link->key = key; + link->result = result; + /* Note: The link is being added to the cache dict without the + prev and next fields set to valid values. We have to wait + for successful insertion in the cache dict before adding the + link to the linked list. Otherwise, the potentially reentrant + __eq__ call could cause the then ophan link to be visited. */ + if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link, + hash) < 0) { + /* Somehow the cache dict update failed. We no longer can + restore the old link. Let the error propagate upward and + leave the cache short one link. */ + Py_DECREF(popresult); + Py_DECREF(link); + Py_DECREF(oldkey); + Py_DECREF(oldresult); + return NULL; + } + lru_cache_append_link(self, link); + Py_INCREF(result); /* for return */ + Py_DECREF(popresult); + Py_DECREF(oldkey); + Py_DECREF(oldresult); return result; } @@ -995,6 +1098,9 @@ lru_cache_new(PyTypeObject *type, PyObject *args, PyObject *kw) maxsize = PyNumber_AsSsize_t(maxsize_O, PyExc_OverflowError); if (maxsize == -1 && PyErr_Occurred()) return NULL; + if (maxsize < 0) { + maxsize = 0; + } if (maxsize == 0) wrapper = uncached_lru_cache_wrapper; else @@ -1013,20 +1119,17 @@ lru_cache_new(PyTypeObject *type, PyObject *args, PyObject *kw) return NULL; } - obj->cache = cachedict; obj->root.prev = &obj->root; obj->root.next = &obj->root; - obj->maxsize = maxsize; - Py_INCREF(maxsize_O); - obj->maxsize_O = maxsize_O; + obj->wrapper = wrapper; + obj->typed = typed; + obj->cache = cachedict; Py_INCREF(func); obj->func = func; - obj->wrapper = wrapper; obj->misses = obj->hits = 0; - obj->typed = typed; + obj->maxsize = maxsize; Py_INCREF(cache_info_type); obj->cache_info_type = cache_info_type; - return (PyObject *)obj; } @@ -1060,11 +1163,10 @@ lru_cache_dealloc(lru_cache_object *obj) PyObject_GC_UnTrack(obj); list = lru_cache_unlink_list(obj); - Py_XDECREF(obj->maxsize_O); - Py_XDECREF(obj->func); Py_XDECREF(obj->cache); - Py_XDECREF(obj->dict); + Py_XDECREF(obj->func); Py_XDECREF(obj->cache_info_type); + Py_XDECREF(obj->dict); lru_cache_clear_list(list); Py_TYPE(obj)->tp_free(obj); } @@ -1088,8 +1190,13 @@ lru_cache_descr_get(PyObject *self, PyObject *obj, PyObject *type) static PyObject * lru_cache_cache_info(lru_cache_object *self, PyObject *unused) { - return PyObject_CallFunction(self->cache_info_type, "nnOn", - self->hits, self->misses, self->maxsize_O, + if (self->maxsize == -1) { + return PyObject_CallFunction(self->cache_info_type, "nnOn", + self->hits, self->misses, Py_None, + PyDict_GET_SIZE(self->cache)); + } + return PyObject_CallFunction(self->cache_info_type, "nnnn", + self->hits, self->misses, self->maxsize, PyDict_GET_SIZE(self->cache)); } @@ -1098,7 +1205,6 @@ lru_cache_cache_clear(lru_cache_object *self, PyObject *unused) { lru_list_elem *list = lru_cache_unlink_list(self); self->hits = self->misses = 0; - self->full = 0; PyDict_Clear(self->cache); lru_cache_clear_list(list); Py_RETURN_NONE; @@ -1134,7 +1240,6 @@ lru_cache_tp_traverse(lru_cache_object *self, visitproc visit, void *arg) Py_VISIT(link->result); link = next; } - Py_VISIT(self->maxsize_O); Py_VISIT(self->func); Py_VISIT(self->cache); Py_VISIT(self->cache_info_type); @@ -1146,7 +1251,6 @@ static int lru_cache_tp_clear(lru_cache_object *self) { lru_list_elem *list = lru_cache_unlink_list(self); - Py_CLEAR(self->maxsize_O); Py_CLEAR(self->func); Py_CLEAR(self->cache); Py_CLEAR(self->cache_info_type); |