From 234515afe594e5f9ca1b3f460735c68b04c031e2 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Tue, 30 Nov 2010 06:19:46 +0000 Subject: Issue 10586: change the new functools.lru_cache implementation to expose the maximum and current cache sizes through the public statistics API. This API is now a single function that returns a named tuple. --- Doc/library/functools.rst | 14 ++++++++++---- Doc/whatsnew/3.2.rst | 2 ++ Lib/functools.py | 25 ++++++++++++++++++------- Lib/test/test_functools.py | 44 ++++++++++++++++++++++++++++++++++++-------- Misc/NEWS | 3 +++ 5 files changed, 69 insertions(+), 19 deletions(-) diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst index 82238fd..a8fc856 100644 --- a/Doc/library/functools.rst +++ b/Doc/library/functools.rst @@ -52,10 +52,16 @@ The :mod:`functools` module defines the following functions: results, the positional and keyword arguments to the function must be hashable. - The wrapped function is instrumented with two attributes, :attr:`cache_hits` - and :attr:`cache_misses` which count the number of successful or unsuccessful - cache lookups. These statistics are helpful for tuning the *maxsize* - parameter and for measuring the cache's effectiveness. + The wrapped function is instrumented with a :attr:`cache_info` attribute that + can be called to retrieve a named tuple with the following fields: + + - :attr:`maxsize`: maximum cache size (as set by the *maxsize* parameter) + - :attr:`size`: current number of entries in the cache + - :attr:`hits`: number of successful cache lookups + - :attr:`misses`: number of unsuccessful cache lookups. + + These statistics are helpful for tuning the *maxsize* parameter and for measuring + the effectiveness of the cache. The wrapped function also has a :attr:`cache_clear` attribute which can be called (with no arguments) to clear the cache. diff --git a/Doc/whatsnew/3.2.rst b/Doc/whatsnew/3.2.rst index 1edbf50..91ab849 100644 --- a/Doc/whatsnew/3.2.rst +++ b/Doc/whatsnew/3.2.rst @@ -332,6 +332,8 @@ New, Improved, and Deprecated Modules c.execute('SELECT phonenumber FROM phonelist WHERE name=?', (name,)) return c.fetchone()[0] + XXX: update for Issue 10586 changes to cache statistics API + To help with choosing an effective cache size, the wrapped function is instrumented with two attributes *cache_hits* and *cache_misses*: diff --git a/Lib/functools.py b/Lib/functools.py index c1fa170..c223a62 100644 --- a/Lib/functools.py +++ b/Lib/functools.py @@ -12,7 +12,7 @@ __all__ = ['update_wrapper', 'wraps', 'WRAPPER_ASSIGNMENTS', 'WRAPPER_UPDATES', 'total_ordering', 'cmp_to_key', 'lru_cache', 'reduce', 'partial'] from _functools import partial, reduce -from collections import OrderedDict +from collections import OrderedDict, namedtuple try: from _thread import allocate_lock as Lock except: @@ -114,12 +114,15 @@ def cmp_to_key(mycmp): raise TypeError('hash not implemented') return K +_CacheInfo = namedtuple("CacheInfo", "maxsize, size, hits, misses") + def lru_cache(maxsize=100): """Least-recently-used cache decorator. Arguments to the cached function must be hashable. - Performance statistics stored in f.cache_hits and f.cache_misses. + Significant statistics (maxsize, size, hits, misses) are + available through the f.cache_info() named tuple. Clear the cache and statistics using f.cache_clear(). The underlying function is stored in f.__wrapped__. @@ -127,7 +130,7 @@ def lru_cache(maxsize=100): """ # Users should only access the lru_cache through its public API: - # cache_hits, cache_misses, cache_clear(), and __wrapped__ + # cache_info, cache_clear, and f.__wrapped__ # The internals of the lru_cache are encapsulated for thread safety and # to allow the implementation to change (including a possible C version). @@ -137,11 +140,13 @@ def lru_cache(maxsize=100): cache = OrderedDict() # ordered least recent to most recent cache_popitem = cache.popitem cache_renew = cache.move_to_end + hits = misses = 0 kwd_mark = object() # separate positional and keyword args lock = Lock() @wraps(user_function) def wrapper(*args, **kwds): + nonlocal hits, misses key = args if kwds: key += (kwd_mark,) + tuple(sorted(kwds.items())) @@ -149,23 +154,29 @@ def lru_cache(maxsize=100): with lock: result = cache[key] cache_renew(key) # record recent use of this key - wrapper.cache_hits += 1 + hits += 1 except KeyError: result = user_function(*args, **kwds) with lock: cache[key] = result # record recent use of this key - wrapper.cache_misses += 1 + misses += 1 if len(cache) > maxsize: cache_popitem(0) # purge least recently used cache entry return result + def cache_info(): + """Report significant cache statistics""" + with lock: + return _CacheInfo(maxsize, len(cache), hits, misses) + def cache_clear(): """Clear the cache and cache statistics""" + nonlocal hits, misses with lock: cache.clear() - wrapper.cache_hits = wrapper.cache_misses = 0 + hits = misses = 0 - wrapper.cache_hits = wrapper.cache_misses = 0 + wrapper.cache_info = cache_info wrapper.cache_clear = cache_clear return wrapper diff --git a/Lib/test/test_functools.py b/Lib/test/test_functools.py index 65ef9b4..c877f88 100644 --- a/Lib/test/test_functools.py +++ b/Lib/test/test_functools.py @@ -501,6 +501,11 @@ class TestLRU(unittest.TestCase): def orig(x, y): return 3*x+y f = functools.lru_cache(maxsize=20)(orig) + maxsize, currsize, hits, misses = f.cache_info() + self.assertEqual(maxsize, 20) + self.assertEqual(currsize, 0) + self.assertEqual(hits, 0) + self.assertEqual(misses, 0) domain = range(5) for i in range(1000): @@ -508,21 +513,29 @@ class TestLRU(unittest.TestCase): actual = f(x, y) expected = orig(x, y) self.assertEqual(actual, expected) - self.assertTrue(f.cache_hits > f.cache_misses) - self.assertEqual(f.cache_hits + f.cache_misses, 1000) + maxsize, currsize, hits, misses = f.cache_info() + self.assertTrue(hits > misses) + self.assertEqual(hits + misses, 1000) + self.assertEqual(currsize, 20) f.cache_clear() # test clearing - self.assertEqual(f.cache_hits, 0) - self.assertEqual(f.cache_misses, 0) + maxsize, currsize, hits, misses = f.cache_info() + self.assertEqual(hits, 0) + self.assertEqual(misses, 0) + self.assertEqual(currsize, 0) f(x, y) - self.assertEqual(f.cache_hits, 0) - self.assertEqual(f.cache_misses, 1) + maxsize, currsize, hits, misses = f.cache_info() + self.assertEqual(hits, 0) + self.assertEqual(misses, 1) + self.assertEqual(currsize, 1) # Test bypassing the cache self.assertIs(f.__wrapped__, orig) f.__wrapped__(x, y) - self.assertEqual(f.cache_hits, 0) - self.assertEqual(f.cache_misses, 1) + maxsize, currsize, hits, misses = f.cache_info() + self.assertEqual(hits, 0) + self.assertEqual(misses, 1) + self.assertEqual(currsize, 1) # test size zero (which means "never-cache") @functools.lru_cache(0) @@ -530,10 +543,15 @@ class TestLRU(unittest.TestCase): nonlocal f_cnt f_cnt += 1 return 20 + self.assertEqual(f.cache_info().maxsize, 0) f_cnt = 0 for i in range(5): self.assertEqual(f(), 20) self.assertEqual(f_cnt, 5) + maxsize, currsize, hits, misses = f.cache_info() + self.assertEqual(hits, 0) + self.assertEqual(misses, 5) + self.assertEqual(currsize, 0) # test size one @functools.lru_cache(1) @@ -541,10 +559,15 @@ class TestLRU(unittest.TestCase): nonlocal f_cnt f_cnt += 1 return 20 + self.assertEqual(f.cache_info().maxsize, 1) f_cnt = 0 for i in range(5): self.assertEqual(f(), 20) self.assertEqual(f_cnt, 1) + maxsize, currsize, hits, misses = f.cache_info() + self.assertEqual(hits, 4) + self.assertEqual(misses, 1) + self.assertEqual(currsize, 1) # test size two @functools.lru_cache(2) @@ -552,11 +575,16 @@ class TestLRU(unittest.TestCase): nonlocal f_cnt f_cnt += 1 return x*10 + self.assertEqual(f.cache_info().maxsize, 2) f_cnt = 0 for x in 7, 9, 7, 9, 7, 9, 8, 8, 8, 9, 9, 9, 8, 8, 8, 7: # * * * * self.assertEqual(f(x), x*10) self.assertEqual(f_cnt, 4) + maxsize, currsize, hits, misses = f.cache_info() + self.assertEqual(hits, 12) + self.assertEqual(misses, 4) + self.assertEqual(currsize, 2) def test_main(verbose=None): test_classes = ( diff --git a/Misc/NEWS b/Misc/NEWS index 320aaaa..f39b2e2 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -43,6 +43,9 @@ Core and Builtins Library ------- +- Issue #10586: The statistics API for the new functools.lru_cache has + been changed to a single cache_info() method returning a named tuple. + - Issue #10323: itertools.islice() now consumes the minimum number of inputs before stopping. Formerly, the final state of the underlying iterator was undefined. -- cgit v0.12