bpo-35780: Fix errors in lru_cache() C code (GH-11623)

7 years ago · d8080c0119
4 changed files with 234 additions and 89 deletions
--- a/Lib/functools.py
+++ b/Lib/functools.py
@ -454,7 +454,7 @@ class _HashedSeq(list):

 def _make_key(args, kwds, typed,
             kwd_mark = (object(),),
-             fasttypes = {int, str, frozenset, type(None)},
+             fasttypes = {int, str},
             tuple=tuple, type=type, len=len):
    """Make a cache key from optionally typed positional and keyword arguments

@ -510,8 +510,11 @@ def lru_cache(maxsize=128, typed=False):

    # Early detection of an erroneous call to @lru_cache without any arguments
    # resulting in the inner function being passed to maxsize instead of an
-    # integer or None.
-    if maxsize is not None and not isinstance(maxsize, int):
+    # integer or None.  Negative maxsize is treated as 0.
+    if isinstance(maxsize, int):
+        if maxsize < 0:
+            maxsize = 0
+    elif maxsize is not None:
        raise TypeError('Expected maxsize to be an integer or None')

    def decorating_function(user_function):
@ -578,6 +581,7 @@ def _lru_cache_wrapper(user_function, maxsize, typed, _CacheInfo):
                    link[NEXT] = root
                    hits += 1
                    return result
+                misses += 1
            result = user_function(*args, **kwds)
            with lock:
                if key in cache:
@ -615,7 +619,6 @@ def _lru_cache_wrapper(user_function, maxsize, typed, _CacheInfo):
                    # Use the cache_len bound method instead of the len() function
                    # which could potentially be wrapped in an lru_cache itself.
                    full = (cache_len() >= maxsize)
-                misses += 1
            return result

    def cache_info():
--- a/Lib/test/test_functools.py
+++ b/Lib/test/test_functools.py
@ -1233,6 +1233,33 @@ class TestLRU:
        self.assertEqual(misses, 4)
        self.assertEqual(currsize, 2)

+    def test_lru_bug_35780(self):
+        # C version of the lru_cache was not checking to see if
+        # the user function call has already modified the cache
+        # (this arises in recursive calls and in multi-threading).
+        # This cause the cache to have orphan links not referenced
+        # by the cache dictionary.
+
+        once = True                 # Modified by f(x) below
+
+        @self.module.lru_cache(maxsize=10)
+        def f(x):
+            nonlocal once
+            rv = f'.{x}.'
+            if x == 20 and once:
+                once = False
+                rv = f(x)
+            return rv
+
+        # Fill the cache
+        for x in range(15):
+            self.assertEqual(f(x), f'.{x}.')
+        self.assertEqual(f.cache_info().currsize, 10)
+
+        # Make a recursive call and make sure the cache remains full
+        self.assertEqual(f(20), '.20.')
+        self.assertEqual(f.cache_info().currsize, 10)
+
    def test_lru_hash_only_once(self):
        # To protect against weird reentrancy bugs and to improve
        # efficiency when faced with slow __hash__ methods, the
@ -1329,7 +1356,7 @@ class TestLRU:
        for i in (0, 1):
            self.assertEqual([eq(n) for n in range(150)], list(range(150)))
        self.assertEqual(eq.cache_info(),
-            self.module._CacheInfo(hits=0, misses=300, maxsize=-10, currsize=1))
+            self.module._CacheInfo(hits=0, misses=300, maxsize=0, currsize=0))

    def test_lru_with_exceptions(self):
        # Verify that user_function exceptions get passed through without
--- a/Misc/NEWS.d/next/Library/2019-01-19-17-01-43.bpo-35780.CLf7fT.rst
+++ b/Misc/NEWS.d/next/Library/2019-01-19-17-01-43.bpo-35780.CLf7fT.rst
@ -0,0 +1,11 @@
+Fix lru_cache() errors arising in recursive, reentrant, or
+multi-threaded code. These errors could result in orphan links and in
+the cache being trapped in a state with fewer than the specified maximum
+number of links.  Fix handling of negative maxsize which should have
+been treated as zero.  Fix errors in toggling the "full" status flag.
+Fix misordering of links when errors are encountered.  Sync-up the C
+code and pure Python code for the space saving path in functions with a
+single positional argument. In this common case, the space overhead of
+an lru cache entry is reduced by almost half.  Fix counting of cache
+misses. In error cases, the miss count was out of sync with the actual
+number of times the underlying user function was called.
--- a/Modules/_functoolsmodule.c
+++ b/Modules/_functoolsmodule.c
@ -711,16 +711,15 @@ typedef PyObject *(*lru_cache_ternaryfunc)(struct lru_cache_object *, PyObject *

 typedef struct lru_cache_object {
    lru_list_elem root;  /* includes PyObject_HEAD */
-    Py_ssize_t maxsize;
-    PyObject *maxsize_O;
-    PyObject *func;
    lru_cache_ternaryfunc wrapper;
+    int typed;
    PyObject *cache;
+    Py_ssize_t hits;
+    PyObject *func;
+    Py_ssize_t maxsize;
+    Py_ssize_t misses;
    PyObject *cache_info_type;
-    Py_ssize_t misses, hits;
-    int typed;
    PyObject *dict;
-    int full;
 } lru_cache_object;

 static PyTypeObject lru_cache_type;
@ -733,6 +732,15 @@ lru_cache_make_key(PyObject *args, PyObject *kwds, int typed)

    /* short path, key will match args anyway, which is a tuple */
    if (!typed && !kwds) {
+        if (PyTuple_GET_SIZE(args) == 1) {
+            key = PyTuple_GET_ITEM(args, 0);
+            if (PyUnicode_CheckExact(key) || PyLong_CheckExact(key)) {
+                /* For common scalar keys, save space by
+                   dropping the enclosing args tuple  */
+                Py_INCREF(key);
+                return key;
+            }
+        }
        Py_INCREF(args);
        return args;
    }
@ -835,10 +843,12 @@ infinite_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwd
 }

 static void
-lru_cache_extricate_link(lru_list_elem *link)
+lru_cache_extract_link(lru_list_elem *link)
 {
-    link->prev->next = link->next;
-    link->next->prev = link->prev;
+    lru_list_elem *link_prev = link->prev;
+    lru_list_elem *link_next = link->next;
+    link_prev->next = link->next;
+    link_next->prev = link->prev;
 }

 static void
@ -851,11 +861,52 @@ lru_cache_append_link(lru_cache_object *self, lru_list_elem *link)
    link->next = root;
 }

+static void
+lru_cache_prepend_link(lru_cache_object *self, lru_list_elem *link)
+{
+    lru_list_elem *root = &self->root;
+    lru_list_elem *first = root->next;
+    first->prev = root->next = link;
+    link->prev = root;
+    link->next = first;
+}
+
+/* General note on reentrancy:
+
+   There are four dictionary calls in the bounded_lru_cache_wrapper():
+   1) The initial check for a cache match.  2) The post user-function
+   check for a cache match.  3) The deletion of the oldest entry.
+   4) The addition of the newest entry.
+
+   In all four calls, we have a known hash which lets use avoid a call
+   to __hash__().  That leaves only __eq__ as a possible source of a
+   reentrant call.
+
+   The __eq__ method call is always made for a cache hit (dict access #1).
+   Accordingly, we have make sure not modify the cache state prior to
+   this call.
+
+   The __eq__ method call is never made for the deletion (dict access #3)
+   because it is an identity match.
+
+   For the other two accesses (#2 and #4), calls to __eq__ only occur
+   when some other entry happens to have an exactly matching hash (all
+   64-bits).  Though rare, this can happen, so we have to make sure to
+   either call it at the top of its code path before any cache
+   state modifications (dict access #2) or be prepared to restore
+   invariants at the end of the code path (dict access #4).
+
+   Another possible source of reentrancy is a decref which can trigger
+   arbitrary code execution.  To make the code easier to reason about,
+   the decrefs are deferred to the end of the each possible code path
+   so that we know the cache is a consistent state.
+ */
+
 static PyObject *
 bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds)
 {
    lru_list_elem *link;
-    PyObject *key, *result;
+    PyObject *key, *result, *testresult;
    Py_hash_t hash;

    key = lru_cache_make_key(args, kwds, self->typed);
@ -867,11 +918,11 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds
        return NULL;
    }
    link  = (lru_list_elem *)_PyDict_GetItem_KnownHash(self->cache, key, hash);
-    if (link) {
-        lru_cache_extricate_link(link);
+    if (link != NULL) {
+        lru_cache_extract_link(link);
        lru_cache_append_link(self, link);
-        self->hits++;
        result = link->result;
+        self->hits++;
        Py_INCREF(result);
        Py_DECREF(key);
        return result;
@ -880,65 +931,38 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds
        Py_DECREF(key);
        return NULL;
    }
+    self->misses++;
    result = PyObject_Call(self->func, args, kwds);
    if (!result) {
        Py_DECREF(key);
        return NULL;
    }
-    if (self->full && self->root.next != &self->root) {
-        /* Use the oldest item to store the new key and result. */
-        PyObject *oldkey, *oldresult, *popresult;
-        /* Extricate the oldest item. */
-        link = self->root.next;
-        lru_cache_extricate_link(link);
-        /* Remove it from the cache.
-           The cache dict holds one reference to the link,
-           and the linked list holds yet one reference to it. */
-        popresult = _PyDict_Pop_KnownHash(self->cache,
-                                          link->key, link->hash,
-                                          Py_None);
-        if (popresult == Py_None) {
-            /* Getting here means that this same key was added to the
-               cache while the lock was released.  Since the link
-               update is already done, we need only return the
-               computed result and update the count of misses. */
-            Py_DECREF(popresult);
-            Py_DECREF(link);
-            Py_DECREF(key);
-        }
-        else if (popresult == NULL) {
-            lru_cache_append_link(self, link);
-            Py_DECREF(key);
-            Py_DECREF(result);
-            return NULL;
-        }
-        else {
-            Py_DECREF(popresult);
-            /* Keep a reference to the old key and old result to
-               prevent their ref counts from going to zero during the
-               update. That will prevent potentially arbitrary object
-               clean-up code (i.e. __del__) from running while we're
-               still adjusting the links. */
-            oldkey = link->key;
-            oldresult = link->result;
-
-            link->hash = hash;
-            link->key = key;
-            link->result = result;
-            if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link,
-                                          hash) < 0) {
-                Py_DECREF(link);
-                Py_DECREF(oldkey);
-                Py_DECREF(oldresult);
-                return NULL;
-            }
-            lru_cache_append_link(self, link);
-            Py_INCREF(result); /* for return */
-            Py_DECREF(oldkey);
-            Py_DECREF(oldresult);
-        }
-    } else {
-        /* Put result in a new link at the front of the queue. */
+    testresult = _PyDict_GetItem_KnownHash(self->cache, key, hash);
+    if (testresult != NULL) {
+        /* Getting here means that this same key was added to the cache
+           during the PyObject_Call().  Since the link update is already
+           done, we need only return the computed result. */
+        Py_DECREF(key);
+        return result;
+    }
+    if (PyErr_Occurred()) {
+        /* This is an unusual case since this same lookup
+           did not previously trigger an error during lookup.
+           Treat it the same as an error in user function
+           and return with the error set. */
+        Py_DECREF(key);
+        Py_DECREF(result);
+        return NULL;
+    }
+    /* This is the normal case.  The new key wasn't found before
+       user function call and it is still not there.  So we
+       proceed normally and update the cache with the new result. */
+
+    assert(self->maxsize > 0);
+    if (PyDict_GET_SIZE(self->cache) < self->maxsize ||
+        self->root.next == &self->root)
+    {
+        /* Cache is not full, so put the result in a new link */
        link = (lru_list_elem *)PyObject_New(lru_list_elem,
                                             &lru_list_elem_type);
        if (link == NULL) {
@ -950,6 +974,11 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds
        link->hash = hash;
        link->key = key;
        link->result = result;
+        /* What is really needed here is a SetItem variant with a "no clobber"
+           option.  If the __eq__ call triggers a reentrant call that adds
+           this same key, then this setitem call will update the cache dict
+           with this new link, leaving the old link as an orphan (i.e. not
+           having a cache dict entry that refers to it). */
        if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link,
                                      hash) < 0) {
            Py_DECREF(link);
@ -957,9 +986,83 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds
        }
        lru_cache_append_link(self, link);
        Py_INCREF(result); /* for return */
-        self->full = (PyDict_GET_SIZE(self->cache) >= self->maxsize);
+        return result;
    }
-    self->misses++;
+    /* Since the cache is full, we need to evict an old key and add
+       a new key.  Rather than free the old link and allocate a new
+       one, we reuse the link for the new key and result and move it
+       to front of the cache to mark it as recently used.
+
+       We try to assure all code paths (including errors) leave all
+       of the links in place.  Either the link is successfully
+       updated and moved or it is restored to its old position.
+       However if an unrecoverable error is found, it doesn't
+       make sense to reinsert the link, so we leave it out
+       and the cache will no longer register as full.
+    */
+    PyObject *oldkey, *oldresult, *popresult;
+
+    /* Extract the oldest item. */
+    assert(self->root.next != &self->root);
+    link = self->root.next;
+    lru_cache_extract_link(link);
+    /* Remove it from the cache.
+       The cache dict holds one reference to the link,
+       and the linked list holds yet one reference to it. */
+    popresult = _PyDict_Pop_KnownHash(self->cache, link->key,
+                                      link->hash, Py_None);
+    if (popresult == Py_None) {
+        /* Getting here means that the user function call or another
+           thread has already removed the old key from the dictionary.
+           This link is now an orpan.  Since we don't want to leave the
+           cache in an inconsistent state, we don't restore the link. */
+        Py_DECREF(popresult);
+        Py_DECREF(link);
+        Py_DECREF(key);
+        return result;
+    }
+    if (popresult == NULL) {
+        /* An error arose while trying to remove the oldest key (the one
+           being evicted) from the cache.  We restore the link to its
+           original position as the oldest link.  Then we allow the
+           error propagate upward; treating it the same as an error
+           arising in the user function. */
+        lru_cache_prepend_link(self, link);
+        Py_DECREF(key);
+        Py_DECREF(result);
+        return NULL;
+    }
+    /* Keep a reference to the old key and old result to prevent their
+       ref counts from going to zero during the update. That will
+       prevent potentially arbitrary object clean-up code (i.e. __del__)
+       from running while we're still adjusting the links. */
+    oldkey = link->key;
+    oldresult = link->result;
+
+    link->hash = hash;
+    link->key = key;
+    link->result = result;
+    /* Note:  The link is being added to the cache dict without the
+       prev and next fields set to valid values.   We have to wait
+       for successful insertion in the cache dict before adding the
+       link to the linked list.  Otherwise, the potentially reentrant
+       __eq__ call could cause the then ophan link to be visited. */
+    if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link,
+                                  hash) < 0) {
+        /* Somehow the cache dict update failed.  We no longer can
+           restore the old link.  Let the error propagate upward and
+           leave the cache short one link. */
+        Py_DECREF(popresult);
+        Py_DECREF(link);
+        Py_DECREF(oldkey);
+        Py_DECREF(oldresult);
+        return NULL;
+    }
+    lru_cache_append_link(self, link);
+    Py_INCREF(result); /* for return */
+    Py_DECREF(popresult);
+    Py_DECREF(oldkey);
+    Py_DECREF(oldresult);
    return result;
 }

@ -995,6 +1098,9 @@ lru_cache_new(PyTypeObject *type, PyObject *args, PyObject *kw)
        maxsize = PyNumber_AsSsize_t(maxsize_O, PyExc_OverflowError);
        if (maxsize == -1 && PyErr_Occurred())
            return NULL;
+        if (maxsize < 0) {
+            maxsize = 0;
+        }
        if (maxsize == 0)
            wrapper = uncached_lru_cache_wrapper;
        else
@ -1013,20 +1119,17 @@ lru_cache_new(PyTypeObject *type, PyObject *args, PyObject *kw)
        return NULL;
    }

-    obj->cache = cachedict;
    obj->root.prev = &obj->root;
    obj->root.next = &obj->root;
-    obj->maxsize = maxsize;
-    Py_INCREF(maxsize_O);
-    obj->maxsize_O = maxsize_O;
+    obj->wrapper = wrapper;
+    obj->typed = typed;
+    obj->cache = cachedict;
    Py_INCREF(func);
    obj->func = func;
-    obj->wrapper = wrapper;
    obj->misses = obj->hits = 0;
-    obj->typed = typed;
+    obj->maxsize = maxsize;
    Py_INCREF(cache_info_type);
    obj->cache_info_type = cache_info_type;
-
    return (PyObject *)obj;
 }

@ -1060,11 +1163,10 @@ lru_cache_dealloc(lru_cache_object *obj)
    PyObject_GC_UnTrack(obj);

    list = lru_cache_unlink_list(obj);
-    Py_XDECREF(obj->maxsize_O);
-    Py_XDECREF(obj->func);
    Py_XDECREF(obj->cache);
-    Py_XDECREF(obj->dict);
+    Py_XDECREF(obj->func);
    Py_XDECREF(obj->cache_info_type);
+    Py_XDECREF(obj->dict);
    lru_cache_clear_list(list);
    Py_TYPE(obj)->tp_free(obj);
 }
@ -1088,8 +1190,13 @@ lru_cache_descr_get(PyObject *self, PyObject *obj, PyObject *type)
 static PyObject *
 lru_cache_cache_info(lru_cache_object *self, PyObject *unused)
 {
-    return PyObject_CallFunction(self->cache_info_type, "nnOn",
-                                 self->hits, self->misses, self->maxsize_O,
+    if (self->maxsize == -1) {
+        return PyObject_CallFunction(self->cache_info_type, "nnOn",
+                                     self->hits, self->misses, Py_None,
+                                     PyDict_GET_SIZE(self->cache));
+    }
+    return PyObject_CallFunction(self->cache_info_type, "nnnn",
+                                 self->hits, self->misses, self->maxsize,
                                 PyDict_GET_SIZE(self->cache));
 }

@ -1098,7 +1205,6 @@ lru_cache_cache_clear(lru_cache_object *self, PyObject *unused)
 {
    lru_list_elem *list = lru_cache_unlink_list(self);
    self->hits = self->misses = 0;
-    self->full = 0;
    PyDict_Clear(self->cache);
    lru_cache_clear_list(list);
    Py_RETURN_NONE;
@ -1134,7 +1240,6 @@ lru_cache_tp_traverse(lru_cache_object *self, visitproc visit, void *arg)
        Py_VISIT(link->result);
        link = next;
    }
-    Py_VISIT(self->maxsize_O);
    Py_VISIT(self->func);
    Py_VISIT(self->cache);
    Py_VISIT(self->cache_info_type);
@ -1146,7 +1251,6 @@ static int
 lru_cache_tp_clear(lru_cache_object *self)
 {
    lru_list_elem *list = lru_cache_unlink_list(self);
-    Py_CLEAR(self->maxsize_O);
    Py_CLEAR(self->func);
    Py_CLEAR(self->cache);
    Py_CLEAR(self->cache_info_type);