1 files changed, 49 insertions, 24 deletions
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
index aa8a2ff..56f50df 100644
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -25,6 +25,7 @@ import re
 
 from email import errors
 from email import message
+from email._policybase import compat32
 
 NLCRE = re.compile('\r\n|\r|\n')
 NLCRE_bol = re.compile('(\r\n|\r|\n)')
@@ -120,9 +121,6 @@ class BufferedSubFile(object):
         # Reverse and insert at the front of the lines.
         self._lines[:0] = lines[::-1]
 
-    def is_closed(self):
-        return self._closed
-
     def __iter__(self):
         return self
 
@@ -137,9 +135,22 @@ class BufferedSubFile(object):
 class FeedParser:
     """A feed-style parser of email."""
 
-    def __init__(self, _factory=message.Message):
-        """_factory is called with no arguments to create a new message obj"""
+    def __init__(self, _factory=message.Message, *, policy=compat32):
+        """_factory is called with no arguments to create a new message obj
+
+        The policy keyword specifies a policy object that controls a number of
+        aspects of the parser's operation.  The default policy maintains
+        backward compatibility.
+
+        """
         self._factory = _factory
+        self.policy = policy
+        try:
+            _factory(policy=self.policy)
+            self._factory_kwds = lambda: {'policy': self.policy}
+        except TypeError:
+            # Assume this is an old-style factory
+            self._factory_kwds = lambda: {}
         self._input = BufferedSubFile()
         self._msgstack = []
         self._parse = self._parsegen().__next__
@@ -171,11 +182,12 @@ class FeedParser:
         # Look for final set of defects
         if root.get_content_maintype() == 'multipart' \
                and not root.is_multipart():
-            root.defects.append(errors.MultipartInvariantViolationDefect())
+            defect = errors.MultipartInvariantViolationDefect()
+            self.policy.handle_defect(root, defect)
         return root
 
     def _new_message(self):
-        msg = self._factory()
+        msg = self._factory(**self._factory_kwds())
         if self._cur and self._cur.get_content_type() == 'multipart/digest':
             msg.set_default_type('message/rfc822')
         if self._msgstack:
@@ -207,6 +219,8 @@ class FeedParser:
                 # (i.e. newline), just throw it away. Otherwise the line is
                 # part of the body so push it back.
                 if not NLCRE.match(line):
+                    defect = errors.MissingHeaderBodySeparatorDefect()
+                    self.policy.handle_defect(self._cur, defect)
                     self._input.unreadline(line)
                 break
             headers.append(line)
@@ -284,7 +298,8 @@ class FeedParser:
                 # defined a boundary.  That's a problem which we'll handle by
                 # reading everything until the EOF and marking the message as
                 # defective.
-                self._cur.defects.append(errors.NoBoundaryInMultipartDefect())
+                defect = errors.NoBoundaryInMultipartDefect()
+                self.policy.handle_defect(self._cur, defect)
                 lines = []
                 for line in self._input:
                     if line is NeedMoreData:
@@ -293,6 +308,11 @@ class FeedParser:
                     lines.append(line)
                 self._cur.set_payload(EMPTYSTRING.join(lines))
                 return
+            # Make sure a valid content type was specified per RFC 2045:6.4.
+            if (self._cur.get('content-transfer-encoding', '8bit').lower()
+                    not in ('7bit', '8bit', 'binary')):
+                defect = errors.InvalidMultipartContentTransferEncodingDefect()
+                self.policy.handle_defect(self._cur, defect)
             # Create a line match predicate which matches the inter-part
             # boundary as well as the end-of-multipart boundary.  Don't push
             # this onto the input stream until we've scanned past the
@@ -304,6 +324,7 @@ class FeedParser:
             capturing_preamble = True
             preamble = []
             linesep = False
+            close_boundary_seen = False
             while True:
                 line = self._input.readline()
                 if line is NeedMoreData:
@@ -318,6 +339,7 @@ class FeedParser:
                     # the closing boundary, then we need to initialize the
                     # epilogue with the empty string (see below).
                     if mo.group('end'):
+                        close_boundary_seen = True
                         linesep = mo.group('linesep')
                         break
                     # We saw an inter-part boundary.  Were we in the preamble?
@@ -386,9 +408,9 @@ class FeedParser:
             # We've seen either the EOF or the end boundary.  If we're still
             # capturing the preamble, we never saw the start boundary.  Note
             # that as a defect and store the captured text as the payload.
-            # Everything from here to the EOF is epilogue.
             if capturing_preamble:
-                self._cur.defects.append(errors.StartBoundaryNotFoundDefect())
+                defect = errors.StartBoundaryNotFoundDefect()
+                self.policy.handle_defect(self._cur, defect)
                 self._cur.set_payload(EMPTYSTRING.join(preamble))
                 epilogue = []
                 for line in self._input:
@@ -397,8 +419,15 @@ class FeedParser:
                         continue
                 self._cur.epilogue = EMPTYSTRING.join(epilogue)
                 return
-            # If the end boundary ended in a newline, we'll need to make sure
-            # the epilogue isn't None
+            # If we're not processing the preamble, then we might have seen
+            # EOF without seeing that end boundary...that is also a defect.
+            if not close_boundary_seen:
+                defect = errors.CloseBoundaryNotFoundDefect()
+                self.policy.handle_defect(self._cur, defect)
+                return
+            # Everything from here to the EOF is epilogue.  If the end boundary
+            # ended in a newline, we'll need to make sure the epilogue isn't
+            # None
             if linesep:
                 epilogue = ['']
             else:
@@ -440,14 +469,12 @@ class FeedParser:
                     # is illegal, so let's note the defect, store the illegal
                     # line, and ignore it for purposes of headers.
                     defect = errors.FirstHeaderLineIsContinuationDefect(line)
-                    self._cur.defects.append(defect)
+                    self.policy.handle_defect(self._cur, defect)
                     continue
                 lastvalue.append(line)
                 continue
             if lastheader:
-                # XXX reconsider the joining of folded lines
-                lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n')
-                self._cur[lastheader] = lhdr
+                self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
                 lastheader, lastvalue = '', []
             # Check for envelope header, i.e. unix-from
             if line.startswith('From '):
@@ -471,19 +498,17 @@ class FeedParser:
                     self._cur.defects.append(defect)
                     continue
             # Split the line on the colon separating field name from value.
+            # There will always be a colon, because if there wasn't the part of
+            # the parser that calls us would have started parsing the body.
             i = line.find(':')
-            if i < 0:
-                defect = errors.MalformedHeaderDefect(line)
-                self._cur.defects.append(defect)
-                continue
+            assert i>0, "_parse_headers fed line with no : and no leading WS"
             lastheader = line[:i]
-            lastvalue = [line[i+1:].lstrip()]
+            lastvalue = [line]
         # Done with all the lines, so handle the last header.
         if lastheader:
-            # XXX reconsider the joining of folded lines
-            self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n')
+            self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
+
 
-
 class BytesFeedParser(FeedParser):
     """Like FeedParser, but feed accepts bytes."""