Merge: #22233: Only split headers on \r and/or \n, per email RFCs.

author: R David Murray <rdmurray@bitdance.com> 2016-09-07 21:46:55 (GMT)
committer: R David Murray <rdmurray@bitdance.com> 2016-09-07 21:46:55 (GMT)
commit: 1badd2816361354f5e69d234b4ff3315f5f590cc (patch)
tree: becb75e793dfd3ea3e0b49865a34023768b4ba0d /Lib/email
parent: d0600ed524acb8b05b78d7399e8de136090703a0 (diff)
parent: dc1650ca062a99d41a029a6645dc72fd7d820c94 (diff)
download: cpython-1badd2816361354f5e69d234b4ff3315f5f590cc.zip
cpython-1badd2816361354f5e69d234b4ff3315f5f590cc.tar.gz
cpython-1badd2816361354f5e69d234b4ff3315f5f590cc.tar.bz2
2 files changed, 25 insertions, 17 deletions
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
index c542018..0b312e5 100644
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -27,6 +27,7 @@ from email import errors
 from email import message
 from email._policybase import compat32
 from collections import deque
+from io import StringIO
 
 NLCRE = re.compile('\r\n|\r|\n')
 NLCRE_bol = re.compile('(\r\n|\r|\n)')
@@ -51,8 +52,9 @@ class BufferedSubFile(object):
     simple abstraction -- it parses until EOF closes the current message.
     """
     def __init__(self):
-        # Chunks of the last partial line pushed into this object.
-        self._partial = []
+        # Text stream of the last partial line pushed into this object.
+        # See issue 22233 for why this is a text stream and not a list.
+        self._partial = StringIO(newline='')
         # A deque of full, pushed lines
         self._lines = deque()
         # The stack of false-EOF checking predicates.
@@ -68,8 +70,10 @@ class BufferedSubFile(object):
 
     def close(self):
         # Don't forget any trailing partial line.
-        self.pushlines(''.join(self._partial).splitlines(True))
-        self._partial = []
+        self._partial.seek(0)
+        self.pushlines(self._partial.readlines())
+        self._partial.seek(0)
+        self._partial.truncate()
         self._closed = True
 
     def readline(self):
@@ -97,26 +101,23 @@ class BufferedSubFile(object):
 
     def push(self, data):
         """Push some new data into this object."""
-        # Crack into lines, but preserve the linesep characters on the end of each
-        parts = data.splitlines(True)
-
-        if not parts or not parts[0].endswith(('\n', '\r')):
-            # No new complete lines, so just accumulate partials
-            self._partial += parts
+        self._partial.write(data)
+        if '\n' not in data and '\r' not in data:
+            # No new complete lines, wait for more.
             return
 
-        if self._partial:
-            # If there are previous leftovers, complete them now
-            self._partial.append(parts[0])
-            parts[0:1] = ''.join(self._partial).splitlines(True)
-            del self._partial[:]
+        # Crack into lines, preserving the linesep characters.
+        self._partial.seek(0)
+        parts = self._partial.readlines()
+        self._partial.seek(0)
+        self._partial.truncate()
 
         # If the last element of the list does not end in a newline, then treat
         # it as a partial line.  We only check for '\n' here because a line
         # ending with '\r' might be a line that was split in the middle of a
         # '\r\n' sequence (see bugs 1555570 and 1721862).
         if not parts[-1].endswith('\n'):
-            self._partial = [parts.pop()]
+            self._partial.write(parts.pop())
         self.pushlines(parts)
 
     def pushlines(self, lines):
diff --git a/Lib/email/policy.py b/Lib/email/policy.py
index 6ac64a5..35d0e69 100644
--- a/Lib/email/policy.py
+++ b/Lib/email/policy.py
@@ -2,6 +2,7 @@
 code that adds all the email6 features.
 """
 
+import re
 from email._policybase import Policy, Compat32, compat32, _extend_docstrings
 from email.utils import _has_surrogates
 from email.headerregistry import HeaderRegistry as HeaderRegistry
@@ -18,6 +19,8 @@ __all__ = [
     'HTTP',
     ]
 
+linesep_splitter = re.compile(r'\n|\r')
+
 @_extend_docstrings
 class EmailPolicy(Policy):
 
@@ -135,6 +138,8 @@ class EmailPolicy(Policy):
         if hasattr(value, 'name') and value.name.lower() == name.lower():
             return (name, value)
         if isinstance(value, str) and len(value.splitlines())>1:
+            # XXX this error message isn't quite right when we use splitlines
+            # (see issue 22233), but I'm not sure what should happen here.
             raise ValueError("Header values may not contain linefeed "
                              "or carriage return characters")
         return (name, self.header_factory(name, value))
@@ -150,7 +155,9 @@ class EmailPolicy(Policy):
         """
         if hasattr(value, 'name'):
             return value
-        return self.header_factory(name, ''.join(value.splitlines()))
+        # We can't use splitlines here because it splits on more than \r and \n.
+        value = ''.join(linesep_splitter.split(value))
+        return self.header_factory(name, value)
 
     def fold(self, name, value):
         """+
author	R David Murray <rdmurray@bitdance.com>	2016-09-07 21:46:55 (GMT)
committer	R David Murray <rdmurray@bitdance.com>	2016-09-07 21:46:55 (GMT)
commit	1badd2816361354f5e69d234b4ff3315f5f590cc (patch)
tree	becb75e793dfd3ea3e0b49865a34023768b4ba0d /Lib/email
parent	d0600ed524acb8b05b78d7399e8de136090703a0 (diff)
parent	dc1650ca062a99d41a029a6645dc72fd7d820c94 (diff)
download	cpython-1badd2816361354f5e69d234b4ff3315f5f590cc.zip cpython-1badd2816361354f5e69d234b4ff3315f5f590cc.tar.gz cpython-1badd2816361354f5e69d234b4ff3315f5f590cc.tar.bz2