#1555570: correctly handle a \r\n that is split by the read buffer.

Patch and test by Tony Nelson.
author: R. David Murray <rdmurray@bitdance.com> 2010-07-17 01:19:57 (GMT)
committer: R. David Murray <rdmurray@bitdance.com> 2010-07-17 01:19:57 (GMT)
commit: 45bf773f605bdee3b4f8334a97d6130a75b9286a (patch)
tree: 66d5b495715cdd792ec445b32edbc5ebda0ca3b0 /Lib/email
parent: cbe1a4e28fb8dcd9fa1a17a1160ccc1c2ff9ff99 (diff)
download: cpython-45bf773f605bdee3b4f8334a97d6130a75b9286a.zip
cpython-45bf773f605bdee3b4f8334a97d6130a75b9286a.tar.gz
cpython-45bf773f605bdee3b4f8334a97d6130a75b9286a.tar.bz2
2 files changed, 37 insertions, 0 deletions
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
index a6853c2..8db70b3 100644
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -104,6 +104,10 @@ class BufferedSubFile(object):
         # data after the final RE.  In the case of a NL/CR terminated string,
         # this is the empty string.
         self._partial = parts.pop()
+        #GAN 29Mar09  bugs 1555570, 1721862  Confusion at 8K boundary ending with \r:
+        # is there a \n to follow later?
+        if not self._partial and parts and parts[-1].endswith('\r'):
+            self._partial = parts.pop(-2)+parts.pop()
         # parts is a list of strings, alternating between the line contents
         # and the eol character(s).  Gather up a list of lines after
         # re-attaching the newlines.
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
index be0565e..09f51df 100644
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -2454,6 +2454,39 @@ Do you like this message?
 -Me
 """)
 
+    def test_pushCR_LF(self):
+        '''FeedParser BufferedSubFile.push() assumed it received complete
+           line endings.  A CR ending one push() followed by a LF starting
+           the next push() added an empty line.
+        '''
+        imt = [
+            ("a\r \n",  2),
+            ("b",       0),
+            ("c\n",     1),
+            ("",        0),
+            ("d\r\n",   1),
+            ("e\r",     0),
+            ("\nf",     1),
+            ("\r\n",    1),
+          ]
+        from email.feedparser import BufferedSubFile, NeedMoreData
+        bsf = BufferedSubFile()
+        om = []
+        nt = 0
+        for il, n in imt:
+            bsf.push(il)
+            nt += n
+            n1 = 0
+            while True:
+                ol = bsf.readline()
+                if ol == NeedMoreData:
+                    break
+                om.append(ol)
+                n1 += 1
+            self.assertTrue(n == n1)
+        self.assertTrue(len(om) == nt)
+        self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
+
 
 
 class TestParsers(TestEmailBase):
author	R. David Murray <rdmurray@bitdance.com>	2010-07-17 01:19:57 (GMT)
committer	R. David Murray <rdmurray@bitdance.com>	2010-07-17 01:19:57 (GMT)
commit	45bf773f605bdee3b4f8334a97d6130a75b9286a (patch)
tree	66d5b495715cdd792ec445b32edbc5ebda0ca3b0 /Lib/email
parent	cbe1a4e28fb8dcd9fa1a17a1160ccc1c2ff9ff99 (diff)
download	cpython-45bf773f605bdee3b4f8334a97d6130a75b9286a.zip cpython-45bf773f605bdee3b4f8334a97d6130a75b9286a.tar.gz cpython-45bf773f605bdee3b4f8334a97d6130a75b9286a.tar.bz2