summaryrefslogtreecommitdiffstats
path: root/Lib/email
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2016-09-07 21:44:34 (GMT)
committerR David Murray <rdmurray@bitdance.com>2016-09-07 21:44:34 (GMT)
commitdc1650ca062a99d41a029a6645dc72fd7d820c94 (patch)
tree7719487f2ea0d6a95d2e024e365dbedacf697534 /Lib/email
parent6b46ec7733ad7391b9e008d2b273c556f140f88e (diff)
downloadcpython-dc1650ca062a99d41a029a6645dc72fd7d820c94.zip
cpython-dc1650ca062a99d41a029a6645dc72fd7d820c94.tar.gz
cpython-dc1650ca062a99d41a029a6645dc72fd7d820c94.tar.bz2
#22233: Only split headers on \r and/or \n, per email RFCs.
Original patch by Martin Panter, new policy fixes by me.
Diffstat (limited to 'Lib/email')
-rw-r--r--Lib/email/feedparser.py33
-rw-r--r--Lib/email/policy.py9
2 files changed, 25 insertions, 17 deletions
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
index c542018..0b312e5 100644
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -27,6 +27,7 @@ from email import errors
from email import message
from email._policybase import compat32
from collections import deque
+from io import StringIO
NLCRE = re.compile('\r\n|\r|\n')
NLCRE_bol = re.compile('(\r\n|\r|\n)')
@@ -51,8 +52,9 @@ class BufferedSubFile(object):
simple abstraction -- it parses until EOF closes the current message.
"""
def __init__(self):
- # Chunks of the last partial line pushed into this object.
- self._partial = []
+ # Text stream of the last partial line pushed into this object.
+ # See issue 22233 for why this is a text stream and not a list.
+ self._partial = StringIO(newline='')
# A deque of full, pushed lines
self._lines = deque()
# The stack of false-EOF checking predicates.
@@ -68,8 +70,10 @@ class BufferedSubFile(object):
def close(self):
# Don't forget any trailing partial line.
- self.pushlines(''.join(self._partial).splitlines(True))
- self._partial = []
+ self._partial.seek(0)
+ self.pushlines(self._partial.readlines())
+ self._partial.seek(0)
+ self._partial.truncate()
self._closed = True
def readline(self):
@@ -97,26 +101,23 @@ class BufferedSubFile(object):
def push(self, data):
"""Push some new data into this object."""
- # Crack into lines, but preserve the linesep characters on the end of each
- parts = data.splitlines(True)
-
- if not parts or not parts[0].endswith(('\n', '\r')):
- # No new complete lines, so just accumulate partials
- self._partial += parts
+ self._partial.write(data)
+ if '\n' not in data and '\r' not in data:
+ # No new complete lines, wait for more.
return
- if self._partial:
- # If there are previous leftovers, complete them now
- self._partial.append(parts[0])
- parts[0:1] = ''.join(self._partial).splitlines(True)
- del self._partial[:]
+ # Crack into lines, preserving the linesep characters.
+ self._partial.seek(0)
+ parts = self._partial.readlines()
+ self._partial.seek(0)
+ self._partial.truncate()
# If the last element of the list does not end in a newline, then treat
# it as a partial line. We only check for '\n' here because a line
# ending with '\r' might be a line that was split in the middle of a
# '\r\n' sequence (see bugs 1555570 and 1721862).
if not parts[-1].endswith('\n'):
- self._partial = [parts.pop()]
+ self._partial.write(parts.pop())
self.pushlines(parts)
def pushlines(self, lines):
diff --git a/Lib/email/policy.py b/Lib/email/policy.py
index 6ac64a5..35d0e69 100644
--- a/Lib/email/policy.py
+++ b/Lib/email/policy.py
@@ -2,6 +2,7 @@
code that adds all the email6 features.
"""
+import re
from email._policybase import Policy, Compat32, compat32, _extend_docstrings
from email.utils import _has_surrogates
from email.headerregistry import HeaderRegistry as HeaderRegistry
@@ -18,6 +19,8 @@ __all__ = [
'HTTP',
]
+linesep_splitter = re.compile(r'\n|\r')
+
@_extend_docstrings
class EmailPolicy(Policy):
@@ -135,6 +138,8 @@ class EmailPolicy(Policy):
if hasattr(value, 'name') and value.name.lower() == name.lower():
return (name, value)
if isinstance(value, str) and len(value.splitlines())>1:
+ # XXX this error message isn't quite right when we use splitlines
+ # (see issue 22233), but I'm not sure what should happen here.
raise ValueError("Header values may not contain linefeed "
"or carriage return characters")
return (name, self.header_factory(name, value))
@@ -150,7 +155,9 @@ class EmailPolicy(Policy):
"""
if hasattr(value, 'name'):
return value
- return self.header_factory(name, ''.join(value.splitlines()))
+ # We can't use splitlines here because it splits on more than \r and \n.
+ value = ''.join(linesep_splitter.split(value))
+ return self.header_factory(name, value)
def fold(self, name, value):
"""+