diff options
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/re/__init__.py | 67 |
1 files changed, 46 insertions, 21 deletions
diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py index d58c211..8d6a4ef 100644 --- a/Lib/re/__init__.py +++ b/Lib/re/__init__.py @@ -229,6 +229,7 @@ def compile(pattern, flags=0): def purge(): "Clear the regular expression caches" _cache.clear() + _cache2.clear() _compile_repl.cache_clear() def template(pattern, flags=0): @@ -266,40 +267,64 @@ Match = type(_compiler.compile('', 0).match('')) # -------------------------------------------------------------------- # internals -_cache = {} # ordered! - +# Use the fact that dict keeps the insertion order. +# _cache2 uses the simple FIFO policy which has better latency. +# _cache uses the LRU policy which has better hit rate. +_cache = {} # LRU +_cache2 = {} # FIFO _MAXCACHE = 512 +_MAXCACHE2 = 256 +assert _MAXCACHE2 < _MAXCACHE + def _compile(pattern, flags): # internal: compile pattern if isinstance(flags, RegexFlag): flags = flags.value try: - return _cache[type(pattern), pattern, flags] + return _cache2[type(pattern), pattern, flags] except KeyError: pass - if isinstance(pattern, Pattern): - if flags: - raise ValueError( - "cannot process flags argument with a compiled pattern") - return pattern - if not _compiler.isstring(pattern): - raise TypeError("first argument must be string or compiled pattern") - if flags & T: - import warnings - warnings.warn("The re.TEMPLATE/re.T flag is deprecated " - "as it is an undocumented flag " - "without an obvious purpose. " - "Don't use it.", - DeprecationWarning) - p = _compiler.compile(pattern, flags) - if not (flags & DEBUG): + + key = (type(pattern), pattern, flags) + # Item in _cache should be moved to the end if found. + p = _cache.pop(key, None) + if p is None: + if isinstance(pattern, Pattern): + if flags: + raise ValueError( + "cannot process flags argument with a compiled pattern") + return pattern + if not _compiler.isstring(pattern): + raise TypeError("first argument must be string or compiled pattern") + if flags & T: + import warnings + warnings.warn("The re.TEMPLATE/re.T flag is deprecated " + "as it is an undocumented flag " + "without an obvious purpose. " + "Don't use it.", + DeprecationWarning) + p = _compiler.compile(pattern, flags) + if flags & DEBUG: + return p if len(_cache) >= _MAXCACHE: - # Drop the oldest item + # Drop the least recently used item. + # next(iter(_cache)) is known to have linear amortized time, + # but it is used here to avoid a dependency from using OrderedDict. + # For the small _MAXCACHE value it doesn't make much of a difference. try: del _cache[next(iter(_cache))] except (StopIteration, RuntimeError, KeyError): pass - _cache[type(pattern), pattern, flags] = p + # Append to the end. + _cache[key] = p + + if len(_cache2) >= _MAXCACHE2: + # Drop the oldest item. + try: + del _cache2[next(iter(_cache2))] + except (StopIteration, RuntimeError, KeyError): + pass + _cache2[key] = p return p @functools.lru_cache(_MAXCACHE) |