diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2022-10-07 19:21:42 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-07 19:21:42 (GMT) |
commit | c11b667a1d8c13095b295d1a8c4ab6e4ccf0f895 (patch) | |
tree | b9a672c05190e39e73b04dcf2c19cb4af261dbd6 | |
parent | eed80458e8e776d15fa862da71dcce58c47e2ca7 (diff) | |
download | cpython-c11b667a1d8c13095b295d1a8c4ab6e4ccf0f895.zip cpython-c11b667a1d8c13095b295d1a8c4ab6e4ccf0f895.tar.gz cpython-c11b667a1d8c13095b295d1a8c4ab6e4ccf0f895.tar.bz2 |
gh-96346: Use double caching for re._compile() (#96347)
-rw-r--r-- | Lib/re/__init__.py | 67 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2022-08-27-23-16-09.gh-issue-96346.jJX14I.rst | 1 |
2 files changed, 47 insertions, 21 deletions
diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py index d58c211..8d6a4ef 100644 --- a/Lib/re/__init__.py +++ b/Lib/re/__init__.py @@ -229,6 +229,7 @@ def compile(pattern, flags=0): def purge(): "Clear the regular expression caches" _cache.clear() + _cache2.clear() _compile_repl.cache_clear() def template(pattern, flags=0): @@ -266,40 +267,64 @@ Match = type(_compiler.compile('', 0).match('')) # -------------------------------------------------------------------- # internals -_cache = {} # ordered! - +# Use the fact that dict keeps the insertion order. +# _cache2 uses the simple FIFO policy which has better latency. +# _cache uses the LRU policy which has better hit rate. +_cache = {} # LRU +_cache2 = {} # FIFO _MAXCACHE = 512 +_MAXCACHE2 = 256 +assert _MAXCACHE2 < _MAXCACHE + def _compile(pattern, flags): # internal: compile pattern if isinstance(flags, RegexFlag): flags = flags.value try: - return _cache[type(pattern), pattern, flags] + return _cache2[type(pattern), pattern, flags] except KeyError: pass - if isinstance(pattern, Pattern): - if flags: - raise ValueError( - "cannot process flags argument with a compiled pattern") - return pattern - if not _compiler.isstring(pattern): - raise TypeError("first argument must be string or compiled pattern") - if flags & T: - import warnings - warnings.warn("The re.TEMPLATE/re.T flag is deprecated " - "as it is an undocumented flag " - "without an obvious purpose. " - "Don't use it.", - DeprecationWarning) - p = _compiler.compile(pattern, flags) - if not (flags & DEBUG): + + key = (type(pattern), pattern, flags) + # Item in _cache should be moved to the end if found. + p = _cache.pop(key, None) + if p is None: + if isinstance(pattern, Pattern): + if flags: + raise ValueError( + "cannot process flags argument with a compiled pattern") + return pattern + if not _compiler.isstring(pattern): + raise TypeError("first argument must be string or compiled pattern") + if flags & T: + import warnings + warnings.warn("The re.TEMPLATE/re.T flag is deprecated " + "as it is an undocumented flag " + "without an obvious purpose. " + "Don't use it.", + DeprecationWarning) + p = _compiler.compile(pattern, flags) + if flags & DEBUG: + return p if len(_cache) >= _MAXCACHE: - # Drop the oldest item + # Drop the least recently used item. + # next(iter(_cache)) is known to have linear amortized time, + # but it is used here to avoid a dependency from using OrderedDict. + # For the small _MAXCACHE value it doesn't make much of a difference. try: del _cache[next(iter(_cache))] except (StopIteration, RuntimeError, KeyError): pass - _cache[type(pattern), pattern, flags] = p + # Append to the end. + _cache[key] = p + + if len(_cache2) >= _MAXCACHE2: + # Drop the oldest item. + try: + del _cache2[next(iter(_cache2))] + except (StopIteration, RuntimeError, KeyError): + pass + _cache2[key] = p return p @functools.lru_cache(_MAXCACHE) diff --git a/Misc/NEWS.d/next/Library/2022-08-27-23-16-09.gh-issue-96346.jJX14I.rst b/Misc/NEWS.d/next/Library/2022-08-27-23-16-09.gh-issue-96346.jJX14I.rst new file mode 100644 index 0000000..9883348 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-08-27-23-16-09.gh-issue-96346.jJX14I.rst @@ -0,0 +1 @@ +Use double caching for compiled RE patterns. |