summaryrefslogtreecommitdiffstats
path: root/Lib/email/feedparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/email/feedparser.py')
-rw-r--r--Lib/email/feedparser.py58
1 files changed, 31 insertions, 27 deletions
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
index c95b27f..0b312e5 100644
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -26,6 +26,8 @@ import re
from email import errors
from email import message
from email._policybase import compat32
+from collections import deque
+from io import StringIO
NLCRE = re.compile('\r\n|\r|\n')
NLCRE_bol = re.compile('(\r\n|\r|\n)')
@@ -50,10 +52,11 @@ class BufferedSubFile(object):
simple abstraction -- it parses until EOF closes the current message.
"""
def __init__(self):
- # Chunks of the last partial line pushed into this object.
- self._partial = []
- # The list of full, pushed lines, in reverse order
- self._lines = []
+ # Text stream of the last partial line pushed into this object.
+ # See issue 22233 for why this is a text stream and not a list.
+ self._partial = StringIO(newline='')
+ # A deque of full, pushed lines
+ self._lines = deque()
# The stack of false-EOF checking predicates.
self._eofstack = []
# A flag indicating whether the file has been closed or not.
@@ -67,8 +70,10 @@ class BufferedSubFile(object):
def close(self):
# Don't forget any trailing partial line.
- self.pushlines(''.join(self._partial).splitlines(True))
- self._partial = []
+ self._partial.seek(0)
+ self.pushlines(self._partial.readlines())
+ self._partial.seek(0)
+ self._partial.truncate()
self._closed = True
def readline(self):
@@ -78,49 +83,45 @@ class BufferedSubFile(object):
return NeedMoreData
# Pop the line off the stack and see if it matches the current
# false-EOF predicate.
- line = self._lines.pop()
+ line = self._lines.popleft()
# RFC 2046, section 5.1.2 requires us to recognize outer level
# boundaries at any level of inner nesting. Do this, but be sure it's
# in the order of most to least nested.
- for ateof in self._eofstack[::-1]:
+ for ateof in reversed(self._eofstack):
if ateof(line):
# We're at the false EOF. But push the last line back first.
- self._lines.append(line)
+ self._lines.appendleft(line)
return ''
return line
def unreadline(self, line):
# Let the consumer push a line back into the buffer.
assert line is not NeedMoreData
- self._lines.append(line)
+ self._lines.appendleft(line)
def push(self, data):
"""Push some new data into this object."""
- # Crack into lines, but preserve the linesep characters on the end of each
- parts = data.splitlines(True)
-
- if not parts or not parts[0].endswith(('\n', '\r')):
- # No new complete lines, so just accumulate partials
- self._partial += parts
+ self._partial.write(data)
+ if '\n' not in data and '\r' not in data:
+ # No new complete lines, wait for more.
return
- if self._partial:
- # If there are previous leftovers, complete them now
- self._partial.append(parts[0])
- parts[0:1] = ''.join(self._partial).splitlines(True)
- del self._partial[:]
+ # Crack into lines, preserving the linesep characters.
+ self._partial.seek(0)
+ parts = self._partial.readlines()
+ self._partial.seek(0)
+ self._partial.truncate()
# If the last element of the list does not end in a newline, then treat
# it as a partial line. We only check for '\n' here because a line
# ending with '\r' might be a line that was split in the middle of a
# '\r\n' sequence (see bugs 1555570 and 1721862).
if not parts[-1].endswith('\n'):
- self._partial = [parts.pop()]
+ self._partial.write(parts.pop())
self.pushlines(parts)
def pushlines(self, lines):
- # Reverse and insert at the front of the lines.
- self._lines[:0] = lines[::-1]
+ self._lines.extend(lines)
def __iter__(self):
return self
@@ -145,7 +146,7 @@ class FeedParser:
"""
self.policy = policy
- self._factory_kwds = lambda: {'policy': self.policy}
+ self._old_style_factory = False
if _factory is None:
# What this should be:
#self._factory = policy.default_message_factory
@@ -160,7 +161,7 @@ class FeedParser:
_factory(policy=self.policy)
except TypeError:
# Assume this is an old-style factory
- self._factory_kwds = lambda: {}
+ self._old_style_factory = True
self._input = BufferedSubFile()
self._msgstack = []
self._parse = self._parsegen().__next__
@@ -197,7 +198,10 @@ class FeedParser:
return root
def _new_message(self):
- msg = self._factory(**self._factory_kwds())
+ if self._old_style_factory:
+ msg = self._factory()
+ else:
+ msg = self._factory(policy=self.policy)
if self._cur and self._cur.get_content_type() == 'multipart/digest':
msg.set_default_type('message/rfc822')
if self._msgstack: