From c1c47c166b1012d34f2c6e111ee9ccb5c4d12de7 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 5 Oct 2017 17:19:26 +0900 Subject: bpo-31671: re: Convert RegexFlag to int before compile (GH-3862) sre_compile does bit test (e.g. `flags & SRE_FLAG_IGNORECASE`) in loop. `IntFlag.__and__` and `IntFlag.__new__` made it slower. So this commit convert it to normal int before passing flags to `sre_compile()`. --- Doc/whatsnew/3.7.rst | 5 +++++ Lib/re.py | 4 ++++ Misc/NEWS.d/next/Library/2017-10-04-21-28-44.bpo-31671.E-zfc9.rst | 2 ++ 3 files changed, 11 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2017-10-04-21-28-44.bpo-31671.E-zfc9.rst diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index 845ed64..19b766f 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -326,6 +326,11 @@ Optimizations expressions `. Searching some patterns can now be up to 20 times faster. (Contributed by Serhiy Storchaka in :issue:`30285`.) +* :func:`re.compile` now converts ``flags`` parameter to int object if + it is ``RegexFlag``. It is now as fast as Python 3.5, and faster than + Python 3.6 about 10% depending on the pattern. + (Contributed by INADA Naoki in :issue:`31671`.) + * :meth:`selectors.EpollSelector.modify`, :meth:`selectors.PollSelector.modify` and :meth:`selectors.DevpollSelector.modify` may be around 10% faster under heavy loads. (Contributed by Giampaolo Rodola' in :issue:`30014`) diff --git a/Lib/re.py b/Lib/re.py index d772979..abbf8d6 100644 --- a/Lib/re.py +++ b/Lib/re.py @@ -275,6 +275,8 @@ _cache = OrderedDict() _MAXCACHE = 512 def _compile(pattern, flags): # internal: compile pattern + if isinstance(flags, RegexFlag): + flags = flags.value try: return _cache[type(pattern), pattern, flags] except KeyError: @@ -331,6 +333,8 @@ copyreg.pickle(Pattern, _pickle, _compile) class Scanner: def __init__(self, lexicon, flags=0): from sre_constants import BRANCH, SUBPATTERN + if isinstance(flags, RegexFlag): + flags = flags.value self.lexicon = lexicon # combine phrases into a compound pattern p = [] diff --git a/Misc/NEWS.d/next/Library/2017-10-04-21-28-44.bpo-31671.E-zfc9.rst b/Misc/NEWS.d/next/Library/2017-10-04-21-28-44.bpo-31671.E-zfc9.rst new file mode 100644 index 0000000..b84dedd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-10-04-21-28-44.bpo-31671.E-zfc9.rst @@ -0,0 +1,2 @@ +Now ``re.compile()`` converts passed RegexFlag to normal int object before +compiling. bm_regex_compile benchmark shows 14% performance improvements. -- cgit v0.12