From 77fcccb5321137456549b7f55b819f2c8a4c78a4 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Wed, 31 Jul 2019 13:22:09 -0700 Subject: bpo-37723: Fix performance regression on regular expression parsing. (GH-15030) Improve performance of sre_parse._uniq function. (cherry picked from commit 9f55551f3df238e58315e724e50cb0d574d75b94) Co-authored-by: yannvgn --- Lib/sre_parse.py | 8 +------- Misc/ACKS | 1 + Misc/NEWS.d/next/Library/2019-07-31-16-49-01.bpo-37723.zq6tw8.rst | 2 ++ 3 files changed, 4 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-07-31-16-49-01.bpo-37723.zq6tw8.rst diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 84c9125..8311916 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -430,13 +430,7 @@ def _escape(source, escape, state): raise source.error("bad escape %s" % escape, len(escape)) def _uniq(items): - if len(set(items)) == len(items): - return items - newitems = [] - for item in items: - if item not in newitems: - newitems.append(item) - return newitems + return list(dict.fromkeys(items)) def _parse_sub(source, state, verbose, nested): # parse an alternation: a|b|c diff --git a/Misc/ACKS b/Misc/ACKS index 829aa79..ad2e0a1 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1702,6 +1702,7 @@ Michael Urman Hector Urtubia Lukas Vacek Ville Vainio +Yann Vaginay Andi Vajda Case Van Horsen John Mark Vandenberg diff --git a/Misc/NEWS.d/next/Library/2019-07-31-16-49-01.bpo-37723.zq6tw8.rst b/Misc/NEWS.d/next/Library/2019-07-31-16-49-01.bpo-37723.zq6tw8.rst new file mode 100644 index 0000000..65507bd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-07-31-16-49-01.bpo-37723.zq6tw8.rst @@ -0,0 +1,2 @@ +Fix performance regression on regular expression parsing with huge +character sets. Patch by Yann Vaginay. \ No newline at end of file -- cgit v0.12