diff options
author | R David Murray <rdmurray@bitdance.com> | 2016-09-07 21:44:34 (GMT) |
---|---|---|
committer | R David Murray <rdmurray@bitdance.com> | 2016-09-07 21:44:34 (GMT) |
commit | dc1650ca062a99d41a029a6645dc72fd7d820c94 (patch) | |
tree | 7719487f2ea0d6a95d2e024e365dbedacf697534 /Lib/email | |
parent | 6b46ec7733ad7391b9e008d2b273c556f140f88e (diff) | |
download | cpython-dc1650ca062a99d41a029a6645dc72fd7d820c94.zip cpython-dc1650ca062a99d41a029a6645dc72fd7d820c94.tar.gz cpython-dc1650ca062a99d41a029a6645dc72fd7d820c94.tar.bz2 |
#22233: Only split headers on \r and/or \n, per email RFCs.
Original patch by Martin Panter, new policy fixes by me.
Diffstat (limited to 'Lib/email')
-rw-r--r-- | Lib/email/feedparser.py | 33 | ||||
-rw-r--r-- | Lib/email/policy.py | 9 |
2 files changed, 25 insertions, 17 deletions
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py index c542018..0b312e5 100644 --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -27,6 +27,7 @@ from email import errors from email import message from email._policybase import compat32 from collections import deque +from io import StringIO NLCRE = re.compile('\r\n|\r|\n') NLCRE_bol = re.compile('(\r\n|\r|\n)') @@ -51,8 +52,9 @@ class BufferedSubFile(object): simple abstraction -- it parses until EOF closes the current message. """ def __init__(self): - # Chunks of the last partial line pushed into this object. - self._partial = [] + # Text stream of the last partial line pushed into this object. + # See issue 22233 for why this is a text stream and not a list. + self._partial = StringIO(newline='') # A deque of full, pushed lines self._lines = deque() # The stack of false-EOF checking predicates. @@ -68,8 +70,10 @@ class BufferedSubFile(object): def close(self): # Don't forget any trailing partial line. - self.pushlines(''.join(self._partial).splitlines(True)) - self._partial = [] + self._partial.seek(0) + self.pushlines(self._partial.readlines()) + self._partial.seek(0) + self._partial.truncate() self._closed = True def readline(self): @@ -97,26 +101,23 @@ class BufferedSubFile(object): def push(self, data): """Push some new data into this object.""" - # Crack into lines, but preserve the linesep characters on the end of each - parts = data.splitlines(True) - - if not parts or not parts[0].endswith(('\n', '\r')): - # No new complete lines, so just accumulate partials - self._partial += parts + self._partial.write(data) + if '\n' not in data and '\r' not in data: + # No new complete lines, wait for more. return - if self._partial: - # If there are previous leftovers, complete them now - self._partial.append(parts[0]) - parts[0:1] = ''.join(self._partial).splitlines(True) - del self._partial[:] + # Crack into lines, preserving the linesep characters. + self._partial.seek(0) + parts = self._partial.readlines() + self._partial.seek(0) + self._partial.truncate() # If the last element of the list does not end in a newline, then treat # it as a partial line. We only check for '\n' here because a line # ending with '\r' might be a line that was split in the middle of a # '\r\n' sequence (see bugs 1555570 and 1721862). if not parts[-1].endswith('\n'): - self._partial = [parts.pop()] + self._partial.write(parts.pop()) self.pushlines(parts) def pushlines(self, lines): diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 6ac64a5..35d0e69 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -2,6 +2,7 @@ code that adds all the email6 features. """ +import re from email._policybase import Policy, Compat32, compat32, _extend_docstrings from email.utils import _has_surrogates from email.headerregistry import HeaderRegistry as HeaderRegistry @@ -18,6 +19,8 @@ __all__ = [ 'HTTP', ] +linesep_splitter = re.compile(r'\n|\r') + @_extend_docstrings class EmailPolicy(Policy): @@ -135,6 +138,8 @@ class EmailPolicy(Policy): if hasattr(value, 'name') and value.name.lower() == name.lower(): return (name, value) if isinstance(value, str) and len(value.splitlines())>1: + # XXX this error message isn't quite right when we use splitlines + # (see issue 22233), but I'm not sure what should happen here. raise ValueError("Header values may not contain linefeed " "or carriage return characters") return (name, self.header_factory(name, value)) @@ -150,7 +155,9 @@ class EmailPolicy(Policy): """ if hasattr(value, 'name'): return value - return self.header_factory(name, ''.join(value.splitlines())) + # We can't use splitlines here because it splits on more than \r and \n. + value = ''.join(linesep_splitter.split(value)) + return self.header_factory(name, value) def fold(self, name, value): """+ |