diff options
Diffstat (limited to 'Lib/re.py')
-rw-r--r-- | Lib/re.py | 39 |
1 files changed, 36 insertions, 3 deletions
@@ -254,7 +254,40 @@ _cache_repl = {} _pattern_type = type(sre_compile.compile("", 0)) -_MAXCACHE = 100 +_MAXCACHE = 500 + +def _shrink_cache(cache_dict, max_length, divisor=5): + """Make room in the given cache. + + Args: + cache_dict: The cache dictionary to modify. + max_length: Maximum # of entries in cache_dict before it is shrunk. + divisor: Cache will shrink to max_length - 1/divisor*max_length items. + """ + # Toss out a fraction of the entries at random to make room for new ones. + # A random algorithm was chosen as opposed to simply cache_dict.popitem() + # as popitem could penalize the same regular expression repeatedly based + # on its internal hash value. Being random should spread the cache miss + # love around. + cache_keys = tuple(cache_dict.keys()) + overage = len(cache_keys) - max_length + if overage < 0: + # Cache is already within limits. Normally this should not happen + # but it could due to multithreading. + return + number_to_toss = max_length // divisor + overage + # The import is done here to avoid a circular depencency. + import random + if not hasattr(random, 'sample'): + # Do nothing while resolving the circular dependency: + # re->random->warnings->tokenize->string->re + return + for doomed_key in random.sample(cache_keys, number_to_toss): + try: + del cache_dict[doomed_key] + except KeyError: + # Ignore problems if the cache changed from another thread. + pass def _compile(*key): # internal: compile pattern @@ -272,7 +305,7 @@ def _compile(*key): raise TypeError("first argument must be string or compiled pattern") p = sre_compile.compile(pattern, flags) if len(_cache) >= _MAXCACHE: - _cache.clear() + _shrink_cache(_cache, _MAXCACHE) _cache[cachekey] = p return p @@ -284,7 +317,7 @@ def _compile_repl(*key): repl, pattern = key p = sre_parse.parse_template(repl, pattern) if len(_cache_repl) >= _MAXCACHE: - _cache_repl.clear() + _shrink_cache(_cache_repl, _MAXCACHE) _cache_repl[key] = p return p |