1 files changed, 52 insertions, 33 deletions
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
index ea41e95..c95b27f 100644
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -33,7 +33,7 @@ NLCRE_eol = re.compile('(\r\n|\r|\n)\Z')
 NLCRE_crack = re.compile('(\r\n|\r|\n)')
 # RFC 2822 $3.6.8 Optional fields.  ftext is %d33-57 / %d59-126, Any character
 # except controls, SP, and ":".
-headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])')
+headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])')
 EMPTYSTRING = ''
 NL = '\n'
 
@@ -50,8 +50,8 @@ class BufferedSubFile(object):
     simple abstraction -- it parses until EOF closes the current message.
     """
     def __init__(self):
-        # The last partial line pushed into this object.
-        self._partial = ''
+        # Chunks of the last partial line pushed into this object.
+        self._partial = []
         # The list of full, pushed lines, in reverse order
         self._lines = []
         # The stack of false-EOF checking predicates.
@@ -67,8 +67,8 @@ class BufferedSubFile(object):
 
     def close(self):
         # Don't forget any trailing partial line.
-        self._lines.append(self._partial)
-        self._partial = ''
+        self.pushlines(''.join(self._partial).splitlines(True))
+        self._partial = []
         self._closed = True
 
     def readline(self):
@@ -96,26 +96,27 @@ class BufferedSubFile(object):
 
     def push(self, data):
         """Push some new data into this object."""
-        # Handle any previous leftovers
-        data, self._partial = self._partial + data, ''
-        # Crack into lines, but preserve the newlines on the end of each
-        parts = NLCRE_crack.split(data)
-        # The *ahem* interesting behaviour of re.split when supplied grouping
-        # parentheses is that the last element of the resulting list is the
-        # data after the final RE.  In the case of a NL/CR terminated string,
-        # this is the empty string.
-        self._partial = parts.pop()
-        #GAN 29Mar09  bugs 1555570, 1721862  Confusion at 8K boundary ending with \r:
-        # is there a \n to follow later?
-        if not self._partial and parts and parts[-1].endswith('\r'):
-            self._partial = parts.pop(-2)+parts.pop()
-        # parts is a list of strings, alternating between the line contents
-        # and the eol character(s).  Gather up a list of lines after
-        # re-attaching the newlines.
-        lines = []
-        for i in range(len(parts) // 2):
-            lines.append(parts[i*2] + parts[i*2+1])
-        self.pushlines(lines)
+        # Crack into lines, but preserve the linesep characters on the end of each
+        parts = data.splitlines(True)
+
+        if not parts or not parts[0].endswith(('\n', '\r')):
+            # No new complete lines, so just accumulate partials
+            self._partial += parts
+            return
+
+        if self._partial:
+            # If there are previous leftovers, complete them now
+            self._partial.append(parts[0])
+            parts[0:1] = ''.join(self._partial).splitlines(True)
+            del self._partial[:]
+
+        # If the last element of the list does not end in a newline, then treat
+        # it as a partial line.  We only check for '\n' here because a line
+        # ending with '\r' might be a line that was split in the middle of a
+        # '\r\n' sequence (see bugs 1555570 and 1721862).
+        if not parts[-1].endswith('\n'):
+            self._partial = [parts.pop()]
+        self.pushlines(parts)
 
     def pushlines(self, lines):
         # Reverse and insert at the front of the lines.
@@ -135,7 +136,7 @@ class BufferedSubFile(object):
 class FeedParser:
     """A feed-style parser of email."""
 
-    def __init__(self, _factory=message.Message, *, policy=compat32):
+    def __init__(self, _factory=None, *, policy=compat32):
         """_factory is called with no arguments to create a new message obj
 
         The policy keyword specifies a policy object that controls a number of
@@ -143,14 +144,23 @@ class FeedParser:
         backward compatibility.
 
         """
-        self._factory = _factory
         self.policy = policy
-        try:
-            _factory(policy=self.policy)
-            self._factory_kwds = lambda: {'policy': self.policy}
-        except TypeError:
-            # Assume this is an old-style factory
-            self._factory_kwds = lambda: {}
+        self._factory_kwds = lambda: {'policy': self.policy}
+        if _factory is None:
+            # What this should be:
+            #self._factory = policy.default_message_factory
+            # but, because we are post 3.4 feature freeze, fix with temp hack:
+            if self.policy is compat32:
+                self._factory = message.Message
+            else:
+                self._factory = message.EmailMessage
+        else:
+            self._factory = _factory
+            try:
+                _factory(policy=self.policy)
+            except TypeError:
+                # Assume this is an old-style factory
+                self._factory_kwds = lambda: {}
         self._input = BufferedSubFile()
         self._msgstack = []
         self._parse = self._parsegen().__next__
@@ -501,6 +511,15 @@ class FeedParser:
             # There will always be a colon, because if there wasn't the part of
             # the parser that calls us would have started parsing the body.
             i = line.find(':')
+
+            # If the colon is on the start of the line the header is clearly
+            # malformed, but we might be able to salvage the rest of the
+            # message. Track the error but keep going.
+            if i == 0:
+                defect = errors.InvalidHeaderDefect("Missing header name.")
+                self._cur.defects.append(defect)
+                continue
+
             assert i>0, "_parse_headers fed line with no : and no leading WS"
             lastheader = line[:i]
             lastvalue = [line]