From 0f606a636aabe2559525697d0df54ee347f947c0 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 16 Mar 2013 22:52:09 +0200 Subject: Issue #16564: Fixed a performance regression relative to Python 3.1 in the caching of compiled regular expressions. --- Lib/re.py | 34 +++++++++++++++++++++++++--------- Misc/NEWS | 3 +++ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/Lib/re.py b/Lib/re.py index 85c5a57..b910606 100644 --- a/Lib/re.py +++ b/Lib/re.py @@ -215,8 +215,8 @@ def compile(pattern, flags=0): def purge(): "Clear the regular expression caches" - _compile_typed.cache_clear() - _compile_repl.cache_clear() + _cache.clear() + _cache_repl.clear() def template(pattern, flags=0): "Compile a template pattern, returning a pattern object" @@ -257,14 +257,19 @@ def escape(pattern): # -------------------------------------------------------------------- # internals +_cache = {} +_cache_repl = {} + _pattern_type = type(sre_compile.compile("", 0)) -def _compile(pattern, flags): - return _compile_typed(type(pattern), pattern, flags) +_MAXCACHE = 512 -@functools.lru_cache(maxsize=500) -def _compile_typed(text_bytes_type, pattern, flags): +def _compile(pattern, flags): # internal: compile pattern + try: + return _cache[type(pattern), pattern, flags] + except KeyError: + pass if isinstance(pattern, _pattern_type): if flags: raise ValueError( @@ -272,12 +277,23 @@ def _compile_typed(text_bytes_type, pattern, flags): return pattern if not sre_compile.isstring(pattern): raise TypeError("first argument must be string or compiled pattern") - return sre_compile.compile(pattern, flags) + p = sre_compile.compile(pattern, flags) + if len(_cache) >= _MAXCACHE: + _cache.clear() + _cache[type(pattern), pattern, flags] = p + return p -@functools.lru_cache(maxsize=500) def _compile_repl(repl, pattern): # internal: compile replacement pattern - return sre_parse.parse_template(repl, pattern) + try: + return _cache_repl[repl, pattern] + except KeyError: + pass + p = sre_parse.parse_template(repl, pattern) + if len(_cache_repl) >= _MAXCACHE: + _cache_repl.clear() + _cache_repl[repl, pattern] = p + return p def _expand(pattern, match, template): # internal: match.expand implementation hook diff --git a/Misc/NEWS b/Misc/NEWS index 6bde852..ddef489 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -233,6 +233,9 @@ Core and Builtins Library ------- +- Issue #16564: Fixed a performance regression relative to Python 3.1 in the + caching of compiled regular expressions. + - Issue #17431: Fix missing import of BytesFeedParser in email.parser. - Issue #1285086: Get rid of the refcounting hack and speed up -- cgit v0.12