1 files changed, 36 insertions, 3 deletions
diff --git a/Lib/re.py b/Lib/re.py
index 9bd913a..2f1a76e 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -254,7 +254,40 @@ _cache_repl = {}
 
 _pattern_type = type(sre_compile.compile("", 0))
 
-_MAXCACHE = 100
+_MAXCACHE = 500
+
+def _shrink_cache(cache_dict, max_length, divisor=5):
+    """Make room in the given cache.
+
+    Args:
+        cache_dict: The cache dictionary to modify.
+        max_length: Maximum # of entries in cache_dict before it is shrunk.
+        divisor: Cache will shrink to max_length - 1/divisor*max_length items.
+    """
+    # Toss out a fraction of the entries at random to make room for new ones.
+    # A random algorithm was chosen as opposed to simply cache_dict.popitem()
+    # as popitem could penalize the same regular expression repeatedly based
+    # on its internal hash value.  Being random should spread the cache miss
+    # love around.
+    cache_keys = tuple(cache_dict.keys())
+    overage = len(cache_keys) - max_length
+    if overage < 0:
+        # Cache is already within limits.  Normally this should not happen
+        # but it could due to multithreading.
+        return
+    number_to_toss = max_length // divisor + overage
+    # The import is done here to avoid a circular depencency.
+    import random
+    if not hasattr(random, 'sample'):
+        # Do nothing while resolving the circular dependency:
+        #  re->random->warnings->tokenize->string->re
+        return
+    for doomed_key in random.sample(cache_keys, number_to_toss):
+        try:
+            del cache_dict[doomed_key]
+        except KeyError:
+            # Ignore problems if the cache changed from another thread.
+            pass
 
 def _compile(*key):
     # internal: compile pattern
@@ -272,7 +305,7 @@ def _compile(*key):
         raise TypeError("first argument must be string or compiled pattern")
     p = sre_compile.compile(pattern, flags)
     if len(_cache) >= _MAXCACHE:
-        _cache.clear()
+        _shrink_cache(_cache, _MAXCACHE)
     _cache[cachekey] = p
     return p
 
@@ -284,7 +317,7 @@ def _compile_repl(*key):
     repl, pattern = key
     p = sre_parse.parse_template(repl, pattern)
     if len(_cache_repl) >= _MAXCACHE:
-        _cache_repl.clear()
+        _shrink_cache(_cache_repl, _MAXCACHE)
     _cache_repl[key] = p
     return p