summaryrefslogtreecommitdiffstats
path: root/Lib/re.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/re.py')
-rw-r--r--Lib/re.py39
1 files changed, 36 insertions, 3 deletions
diff --git a/Lib/re.py b/Lib/re.py
index 9bd913a..2f1a76e 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -254,7 +254,40 @@ _cache_repl = {}
_pattern_type = type(sre_compile.compile("", 0))
-_MAXCACHE = 100
+_MAXCACHE = 500
+
+def _shrink_cache(cache_dict, max_length, divisor=5):
+ """Make room in the given cache.
+
+ Args:
+ cache_dict: The cache dictionary to modify.
+ max_length: Maximum # of entries in cache_dict before it is shrunk.
+ divisor: Cache will shrink to max_length - 1/divisor*max_length items.
+ """
+ # Toss out a fraction of the entries at random to make room for new ones.
+ # A random algorithm was chosen as opposed to simply cache_dict.popitem()
+ # as popitem could penalize the same regular expression repeatedly based
+ # on its internal hash value. Being random should spread the cache miss
+ # love around.
+ cache_keys = tuple(cache_dict.keys())
+ overage = len(cache_keys) - max_length
+ if overage < 0:
+ # Cache is already within limits. Normally this should not happen
+ # but it could due to multithreading.
+ return
+ number_to_toss = max_length // divisor + overage
+ # The import is done here to avoid a circular depencency.
+ import random
+ if not hasattr(random, 'sample'):
+ # Do nothing while resolving the circular dependency:
+ # re->random->warnings->tokenize->string->re
+ return
+ for doomed_key in random.sample(cache_keys, number_to_toss):
+ try:
+ del cache_dict[doomed_key]
+ except KeyError:
+ # Ignore problems if the cache changed from another thread.
+ pass
def _compile(*key):
# internal: compile pattern
@@ -272,7 +305,7 @@ def _compile(*key):
raise TypeError("first argument must be string or compiled pattern")
p = sre_compile.compile(pattern, flags)
if len(_cache) >= _MAXCACHE:
- _cache.clear()
+ _shrink_cache(_cache, _MAXCACHE)
_cache[cachekey] = p
return p
@@ -284,7 +317,7 @@ def _compile_repl(*key):
repl, pattern = key
p = sre_parse.parse_template(repl, pattern)
if len(_cache_repl) >= _MAXCACHE:
- _cache_repl.clear()
+ _shrink_cache(_cache_repl, _MAXCACHE)
_cache_repl[key] = p
return p