diff options
Diffstat (limited to 'Lib/email/feedparser.py')
| -rw-r--r-- | Lib/email/feedparser.py | 51 |
1 files changed, 34 insertions, 17 deletions
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py index aa8a2ff..0706cae 100644 --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -25,6 +25,7 @@ import re from email import errors from email import message +from email._policybase import compat32 NLCRE = re.compile('\r\n|\r|\n') NLCRE_bol = re.compile('(\r\n|\r|\n)') @@ -120,9 +121,6 @@ class BufferedSubFile(object): # Reverse and insert at the front of the lines. self._lines[:0] = lines[::-1] - def is_closed(self): - return self._closed - def __iter__(self): return self @@ -137,9 +135,22 @@ class BufferedSubFile(object): class FeedParser: """A feed-style parser of email.""" - def __init__(self, _factory=message.Message): - """_factory is called with no arguments to create a new message obj""" + def __init__(self, _factory=message.Message, *, policy=compat32): + """_factory is called with no arguments to create a new message obj + + The policy keyword specifies a policy object that controls a number of + aspects of the parser's operation. The default policy maintains + backward compatibility. + + """ self._factory = _factory + self.policy = policy + try: + _factory(policy=self.policy) + self._factory_kwds = lambda: {'policy': self.policy} + except TypeError: + # Assume this is an old-style factory + self._factory_kwds = lambda: {} self._input = BufferedSubFile() self._msgstack = [] self._parse = self._parsegen().__next__ @@ -171,11 +182,12 @@ class FeedParser: # Look for final set of defects if root.get_content_maintype() == 'multipart' \ and not root.is_multipart(): - root.defects.append(errors.MultipartInvariantViolationDefect()) + defect = errors.MultipartInvariantViolationDefect() + self.policy.handle_defect(root, defect) return root def _new_message(self): - msg = self._factory() + msg = self._factory(**self._factory_kwds()) if self._cur and self._cur.get_content_type() == 'multipart/digest': msg.set_default_type('message/rfc822') if self._msgstack: @@ -284,7 +296,8 @@ class FeedParser: # defined a boundary. That's a problem which we'll handle by # reading everything until the EOF and marking the message as # defective. - self._cur.defects.append(errors.NoBoundaryInMultipartDefect()) + defect = errors.NoBoundaryInMultipartDefect() + self.policy.handle_defect(self._cur, defect) lines = [] for line in self._input: if line is NeedMoreData: @@ -293,6 +306,11 @@ class FeedParser: lines.append(line) self._cur.set_payload(EMPTYSTRING.join(lines)) return + # Make sure a valid content type was specified per RFC 2045:6.4. + if (self._cur.get('content-transfer-encoding', '8bit').lower() + not in ('7bit', '8bit', 'binary')): + defect = errors.InvalidMultipartContentTransferEncodingDefect() + self.policy.handle_defect(self._cur, defect) # Create a line match predicate which matches the inter-part # boundary as well as the end-of-multipart boundary. Don't push # this onto the input stream until we've scanned past the @@ -388,7 +406,8 @@ class FeedParser: # that as a defect and store the captured text as the payload. # Everything from here to the EOF is epilogue. if capturing_preamble: - self._cur.defects.append(errors.StartBoundaryNotFoundDefect()) + defect = errors.StartBoundaryNotFoundDefect() + self.policy.handle_defect(self._cur, defect) self._cur.set_payload(EMPTYSTRING.join(preamble)) epilogue = [] for line in self._input: @@ -440,14 +459,12 @@ class FeedParser: # is illegal, so let's note the defect, store the illegal # line, and ignore it for purposes of headers. defect = errors.FirstHeaderLineIsContinuationDefect(line) - self._cur.defects.append(defect) + self.policy.handle_defect(self._cur, defect) continue lastvalue.append(line) continue if lastheader: - # XXX reconsider the joining of folded lines - lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n') - self._cur[lastheader] = lhdr + self._cur.set_raw(*self.policy.header_source_parse(lastvalue)) lastheader, lastvalue = '', [] # Check for envelope header, i.e. unix-from if line.startswith('From '): @@ -474,16 +491,16 @@ class FeedParser: i = line.find(':') if i < 0: defect = errors.MalformedHeaderDefect(line) + # XXX: fixme (defect not going through policy) self._cur.defects.append(defect) continue lastheader = line[:i] - lastvalue = [line[i+1:].lstrip()] + lastvalue = [line] # Done with all the lines, so handle the last header. if lastheader: - # XXX reconsider the joining of folded lines - self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n') + self._cur.set_raw(*self.policy.header_source_parse(lastvalue)) + - class BytesFeedParser(FeedParser): """Like FeedParser, but feed accepts bytes.""" |
