summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2022-10-07 19:21:42 (GMT)
committerGitHub <noreply@github.com>2022-10-07 19:21:42 (GMT)
commitc11b667a1d8c13095b295d1a8c4ab6e4ccf0f895 (patch)
treeb9a672c05190e39e73b04dcf2c19cb4af261dbd6
parenteed80458e8e776d15fa862da71dcce58c47e2ca7 (diff)
downloadcpython-c11b667a1d8c13095b295d1a8c4ab6e4ccf0f895.zip
cpython-c11b667a1d8c13095b295d1a8c4ab6e4ccf0f895.tar.gz
cpython-c11b667a1d8c13095b295d1a8c4ab6e4ccf0f895.tar.bz2
gh-96346: Use double caching for re._compile() (#96347)
-rw-r--r--Lib/re/__init__.py67
-rw-r--r--Misc/NEWS.d/next/Library/2022-08-27-23-16-09.gh-issue-96346.jJX14I.rst1
2 files changed, 47 insertions, 21 deletions
diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py
index d58c211..8d6a4ef 100644
--- a/Lib/re/__init__.py
+++ b/Lib/re/__init__.py
@@ -229,6 +229,7 @@ def compile(pattern, flags=0):
def purge():
"Clear the regular expression caches"
_cache.clear()
+ _cache2.clear()
_compile_repl.cache_clear()
def template(pattern, flags=0):
@@ -266,40 +267,64 @@ Match = type(_compiler.compile('', 0).match(''))
# --------------------------------------------------------------------
# internals
-_cache = {} # ordered!
-
+# Use the fact that dict keeps the insertion order.
+# _cache2 uses the simple FIFO policy which has better latency.
+# _cache uses the LRU policy which has better hit rate.
+_cache = {} # LRU
+_cache2 = {} # FIFO
_MAXCACHE = 512
+_MAXCACHE2 = 256
+assert _MAXCACHE2 < _MAXCACHE
+
def _compile(pattern, flags):
# internal: compile pattern
if isinstance(flags, RegexFlag):
flags = flags.value
try:
- return _cache[type(pattern), pattern, flags]
+ return _cache2[type(pattern), pattern, flags]
except KeyError:
pass
- if isinstance(pattern, Pattern):
- if flags:
- raise ValueError(
- "cannot process flags argument with a compiled pattern")
- return pattern
- if not _compiler.isstring(pattern):
- raise TypeError("first argument must be string or compiled pattern")
- if flags & T:
- import warnings
- warnings.warn("The re.TEMPLATE/re.T flag is deprecated "
- "as it is an undocumented flag "
- "without an obvious purpose. "
- "Don't use it.",
- DeprecationWarning)
- p = _compiler.compile(pattern, flags)
- if not (flags & DEBUG):
+
+ key = (type(pattern), pattern, flags)
+ # Item in _cache should be moved to the end if found.
+ p = _cache.pop(key, None)
+ if p is None:
+ if isinstance(pattern, Pattern):
+ if flags:
+ raise ValueError(
+ "cannot process flags argument with a compiled pattern")
+ return pattern
+ if not _compiler.isstring(pattern):
+ raise TypeError("first argument must be string or compiled pattern")
+ if flags & T:
+ import warnings
+ warnings.warn("The re.TEMPLATE/re.T flag is deprecated "
+ "as it is an undocumented flag "
+ "without an obvious purpose. "
+ "Don't use it.",
+ DeprecationWarning)
+ p = _compiler.compile(pattern, flags)
+ if flags & DEBUG:
+ return p
if len(_cache) >= _MAXCACHE:
- # Drop the oldest item
+ # Drop the least recently used item.
+ # next(iter(_cache)) is known to have linear amortized time,
+ # but it is used here to avoid a dependency from using OrderedDict.
+ # For the small _MAXCACHE value it doesn't make much of a difference.
try:
del _cache[next(iter(_cache))]
except (StopIteration, RuntimeError, KeyError):
pass
- _cache[type(pattern), pattern, flags] = p
+ # Append to the end.
+ _cache[key] = p
+
+ if len(_cache2) >= _MAXCACHE2:
+ # Drop the oldest item.
+ try:
+ del _cache2[next(iter(_cache2))]
+ except (StopIteration, RuntimeError, KeyError):
+ pass
+ _cache2[key] = p
return p
@functools.lru_cache(_MAXCACHE)
diff --git a/Misc/NEWS.d/next/Library/2022-08-27-23-16-09.gh-issue-96346.jJX14I.rst b/Misc/NEWS.d/next/Library/2022-08-27-23-16-09.gh-issue-96346.jJX14I.rst
new file mode 100644
index 0000000..9883348
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-08-27-23-16-09.gh-issue-96346.jJX14I.rst
@@ -0,0 +1 @@
+Use double caching for compiled RE patterns.