From 9e46ef819c38ec76273d7ffb35bcd14a558d35d4 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 31 Jul 2010 10:11:39 +0000 Subject: Add functools.lfu_cache() and functools.lru_cache(). --- Doc/library/functools.rst | 51 +++++++++++++++++++++++++ Lib/functools.py | 94 +++++++++++++++++++++++++++++++++++++++++++++- Lib/test/test_functools.py | 48 ++++++++++++++++++++++- Misc/NEWS | 2 + 4 files changed, 193 insertions(+), 2 deletions(-) diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst index a9819f2..b2d69de 100644 --- a/Doc/library/functools.rst +++ b/Doc/library/functools.rst @@ -37,6 +37,57 @@ The :mod:`functools` module defines the following functions: .. versionadded:: 3.2 +.. decorator:: lfu_cache(maxsize) + + Decorator to wrap a function with a memoizing callable that saves up to the + *maxsize* most frequent calls. It can save time when an expensive or I/O + bound function is periodically called with the same arguments. + + The *maxsize* parameter defaults to 100. Since a dictionary is used to cache + results, the positional and keyword arguments to the function must be + hashable. + + The wrapped function is instrumented with two attributes, :attr:`hits` + and :attr:`misses` which count the number of successful or unsuccessful + cache lookups. These statistics are helpful for tuning the *maxsize* + parameter and for measuring the cache's effectiveness. + + The wrapped function also has a :attr:`clear` attribute which can be + called (with no arguments) to clear the cache. + + A `LFU (least frequently used) cache + `_ + is indicated when the pattern of calls does not change over time, when + more the most common calls already seen are the best predictors of the + most common upcoming calls. + + .. versionadded:: 3.2 + +.. decorator:: lru_cache(maxsize) + + Decorator to wrap a function with a memoizing callable that saves up to the + *maxsize* most recent calls. It can save time when an expensive or I/O bound + function is periodically called with the same arguments. + + The *maxsize* parameter defaults to 100. Since a dictionary is used to cache + results, the positional and keyword arguments to the function must be + hashable. + + The wrapped function is instrumented with two attributes, :attr:`hits` + and :attr:`misses` which count the number of successful or unsuccessful + cache lookups. These statistics are helpful for tuning the *maxsize* + parameter and for measuring the cache's effectiveness. + + The wrapped function also has a :attr:`clear` attribute which can be + called (with no arguments) to clear the cache. + + A `LRU (least recently used) cache + `_ + is indicated when the pattern of calls changes over time, such as + when more recent calls are the best predictors of upcoming calls. + + .. versionadded:: 3.2 + .. decorator:: total_ordering Given a class defining one or more rich comparison ordering methods, this diff --git a/Lib/functools.py b/Lib/functools.py index 1a1f22e..863706d 100644 --- a/Lib/functools.py +++ b/Lib/functools.py @@ -4,10 +4,17 @@ # to allow utilities written in Python to be added # to the functools module. # Written by Nick Coghlan -# Copyright (C) 2006 Python Software Foundation. +# and Raymond Hettinger +# Copyright (C) 2006-2010 Python Software Foundation. # See C source code for _functools credits/copyright +__all__ = ['update_wrapper', 'wraps', 'WRAPPER_ASSIGNMENTS', 'WRAPPER_UPDATES', + 'total_ordering', 'cmp_to_key', 'lfu_cache', 'lru_cache'] + from _functools import partial, reduce +from collections import OrderedDict, Counter +from heapq import nsmallest +from operator import itemgetter # update_wrapper() and wraps() are tools to help write # wrapper functions that can handle naive introspection @@ -97,3 +104,88 @@ def cmp_to_key(mycmp): def __hash__(self): raise TypeError('hash not implemented') return K + +def lfu_cache(maxsize=100): + '''Least-frequently-used cache decorator. + + Arguments to the cached function must be hashable. + Cache performance statistics stored in f.hits and f.misses. + Clear the cache using f.clear(). + http://en.wikipedia.org/wiki/Cache_algorithms#Least-Frequently_Used + + ''' + def decorating_function(user_function): + cache = {} # mapping of args to results + use_count = Counter() # times each key has been accessed + kwd_mark = object() # separate positional and keyword args + + @wraps(user_function) + def wrapper(*args, **kwds): + key = args + if kwds: + key += (kwd_mark,) + tuple(sorted(kwds.items())) + use_count[key] += 1 # count a use of this key + try: + result = cache[key] + wrapper.hits += 1 + except KeyError: + result = user_function(*args, **kwds) + cache[key] = result + wrapper.misses += 1 + if len(cache) > maxsize: + # purge the 10% least frequently used entries + for key, _ in nsmallest(maxsize // 10, + use_count.items(), + key=itemgetter(1)): + del cache[key], use_count[key] + return result + + def clear(): + 'Clear the cache and cache statistics' + cache.clear() + use_count.clear() + wrapper.hits = wrapper.misses = 0 + + wrapper.hits = wrapper.misses = 0 + wrapper.clear = clear + return wrapper + return decorating_function + +def lru_cache(maxsize=100): + '''Least-recently-used cache decorator. + + Arguments to the cached function must be hashable. + Cache performance statistics stored in f.hits and f.misses. + Clear the cache using f.clear(). + http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used + + ''' + def decorating_function(user_function): + cache = OrderedDict() # ordered least recent to most recent + kwd_mark = object() # separate positional and keyword args + + @wraps(user_function) + def wrapper(*args, **kwds): + key = args + if kwds: + key += (kwd_mark,) + tuple(sorted(kwds.items())) + try: + result = cache.pop(key) + wrapper.hits += 1 + except KeyError: + result = user_function(*args, **kwds) + wrapper.misses += 1 + if len(cache) >= maxsize: + cache.popitem(0) # purge least recently used cache entry + cache[key] = result # record recent use of this key + return result + + def clear(): + 'Clear the cache and cache statistics' + cache.clear() + wrapper.hits = wrapper.misses = 0 + + wrapper.hits = wrapper.misses = 0 + wrapper.clear = clear + return wrapper + return decorating_function diff --git a/Lib/test/test_functools.py b/Lib/test/test_functools.py index f6ccc87..a02d37c 100644 --- a/Lib/test/test_functools.py +++ b/Lib/test/test_functools.py @@ -4,6 +4,7 @@ import unittest from test import support from weakref import proxy import pickle +from random import choice @staticmethod def PythonPartial(func, *args, **keywords): @@ -454,6 +455,50 @@ class TestTotalOrdering(unittest.TestCase): class A: pass +class TestLRU(unittest.TestCase): + + def test_lru(self): + def orig(x, y): + return 3*x+y + f = functools.lru_cache(maxsize=20)(orig) + + domain = range(5) + for i in range(1000): + x, y = choice(domain), choice(domain) + actual = f(x, y) + expected = orig(x, y) + self.assertEquals(actual, expected) + self.assert_(f.hits > f.misses) + self.assertEquals(f.hits + f.misses, 1000) + + f.clear() # test clearing + self.assertEqual(f.hits, 0) + self.assertEqual(f.misses, 0) + f(x, y) + self.assertEqual(f.hits, 0) + self.assertEqual(f.misses, 1) + + def test_lfu(self): + def orig(x, y): + return 3*x+y + f = functools.lfu_cache(maxsize=20)(orig) + + domain = range(5) + for i in range(1000): + x, y = choice(domain), choice(domain) + actual = f(x, y) + expected = orig(x, y) + self.assertEquals(actual, expected) + self.assert_(f.hits > f.misses) + self.assertEquals(f.hits + f.misses, 1000) + + f.clear() # test clearing + self.assertEqual(f.hits, 0) + self.assertEqual(f.misses, 0) + f(x, y) + self.assertEqual(f.hits, 0) + self.assertEqual(f.misses, 1) + def test_main(verbose=None): test_classes = ( TestPartial, @@ -461,7 +506,8 @@ def test_main(verbose=None): TestPythonPartial, TestUpdateWrapper, TestWraps, - TestReduce + TestReduce, + TestLRU, ) support.run_unittest(*test_classes) diff --git a/Misc/NEWS b/Misc/NEWS index d04beaa..d8f64a4 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -504,6 +504,8 @@ Library - Issue #4179: In pdb, allow "list ." as a command to return to the currently debugged line. +- Add lfu_cache() and lru_cache() decorators to the functools module. + - Issue #4108: In urllib.robotparser, if there are multiple 'User-agent: *' entries, consider the first one. -- cgit v0.12