gh-106628: email parsing speedup (gh-106629)

author: CF Bolz-Tereick <cfbolz@gmx.de> 2023-07-13 06:12:56 (GMT)
committer: GitHub <noreply@github.com> 2023-07-13 06:12:56 (GMT)
commit: 7e6ce48872fa3de98c986057764f35e1b2f4b936 (patch)
tree: 340b37ca60b5d12c2a25be0786a3c894c3c03c9d /Lib/email
parent: af51bd7cda9c0cba149b882c1e501765595e5fc3 (diff)
download: cpython-7e6ce48872fa3de98c986057764f35e1b2f4b936.zip
cpython-7e6ce48872fa3de98c986057764f35e1b2f4b936.tar.gz
cpython-7e6ce48872fa3de98c986057764f35e1b2f4b936.tar.bz2
1 files changed, 9 insertions, 6 deletions
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
index 885097c..53d71f5 100644
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -37,6 +37,8 @@ NLCRE_crack = re.compile(r'(\r\n|\r|\n)')
 headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])')
 EMPTYSTRING = ''
 NL = '\n'
+boundaryendRE = re.compile(
+    r'(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
 
 NeedMoreData = object()
 
@@ -327,9 +329,10 @@ class FeedParser:
             # this onto the input stream until we've scanned past the
             # preamble.
             separator = '--' + boundary
-            boundaryre = re.compile(
-                '(?P<sep>' + re.escape(separator) +
-                r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
+            def boundarymatch(line):
+                if not line.startswith(separator):
+                    return None
+                return boundaryendRE.match(line, len(separator))
             capturing_preamble = True
             preamble = []
             linesep = False
@@ -341,7 +344,7 @@ class FeedParser:
                     continue
                 if line == '':
                     break
-                mo = boundaryre.match(line)
+                mo = boundarymatch(line)
                 if mo:
                     # If we're looking at the end boundary, we're done with
                     # this multipart.  If there was a newline at the end of
@@ -373,13 +376,13 @@ class FeedParser:
                         if line is NeedMoreData:
                             yield NeedMoreData
                             continue
-                        mo = boundaryre.match(line)
+                        mo = boundarymatch(line)
                         if not mo:
                             self._input.unreadline(line)
                             break
                     # Recurse to parse this subpart; the input stream points
                     # at the subpart's first line.
-                    self._input.push_eof_matcher(boundaryre.match)
+                    self._input.push_eof_matcher(boundarymatch)
                     for retval in self._parsegen():
                         if retval is NeedMoreData:
                             yield NeedMoreData
author	CF Bolz-Tereick <cfbolz@gmx.de>	2023-07-13 06:12:56 (GMT)
committer	GitHub <noreply@github.com>	2023-07-13 06:12:56 (GMT)
commit	7e6ce48872fa3de98c986057764f35e1b2f4b936 (patch)
tree	340b37ca60b5d12c2a25be0786a3c894c3c03c9d /Lib/email
parent	af51bd7cda9c0cba149b882c1e501765595e5fc3 (diff)
download	cpython-7e6ce48872fa3de98c986057764f35e1b2f4b936.zip cpython-7e6ce48872fa3de98c986057764f35e1b2f4b936.tar.gz cpython-7e6ce48872fa3de98c986057764f35e1b2f4b936.tar.bz2