#5610: use \Z not $ so we don't eat extra chars when body part ends with \r\n.

If a body part ended with \r\n, feedparser, using '$' to terminate its search for the newline, would match on the \r\n, and think that it needed to strip two characters in order to account for the line end before the boundary. That made it chop one too many characters off the end of the body part. Using \Z makes the match correct. Patch and test by Tony Nelson.
author: R. David Murray <rdmurray@bitdance.com> 2010-06-03 15:43:20 (GMT)
committer: R. David Murray <rdmurray@bitdance.com> 2010-06-03 15:43:20 (GMT)
commit: 61746d580e956bc2dda7fcf230a581e463a4d186 (patch)
tree: 1e84de1954226de043d62d82ce411bcdb2e9d2ed
parent: 23152ea5bdf4b2e54bbf9a1efa0c7eba292d38cf (diff)
download: cpython-61746d580e956bc2dda7fcf230a581e463a4d186.zip
cpython-61746d580e956bc2dda7fcf230a581e463a4d186.tar.gz
cpython-61746d580e956bc2dda7fcf230a581e463a4d186.tar.bz2
3 files changed, 22 insertions, 1 deletions
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
index afb02b3..163fada 100644
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -28,7 +28,7 @@ from email import message
 
 NLCRE = re.compile('\r\n|\r|\n')
 NLCRE_bol = re.compile('(\r\n|\r|\n)')
-NLCRE_eol = re.compile('(\r\n|\r|\n)$')
+NLCRE_eol = re.compile('(\r\n|\r|\n)\Z')
 NLCRE_crack = re.compile('(\r\n|\r|\n)')
 # RFC 2822 $3.6.8 Optional fields.  ftext is %d33-57 / %d59-126, Any character
 # except controls, SP, and ":".
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
index 7d01079..94eec86 100644
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -2610,6 +2610,24 @@ Here's the message body
         eq(headers, ['A', 'B', 'CC'])
         eq(msg.get_payload(), 'body')
 
+    def test_CRLFLF_at_end_of_part(self):
+        # issue 5610: feedparser should not eat two chars from body part ending
+        # with "\r\n\n".
+        m = (
+            "From: foo@bar.com\n"
+            "To: baz\n"
+            "Mime-Version: 1.0\n"
+            "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
+            "\n"
+            "--BOUNDARY\n"
+            "Content-Type: text/plain\n"
+            "\n"
+            "body ending with CRLF newline\r\n"
+            "\n"
+            "--BOUNDARY--\n"
+          )
+        msg = email.message_from_string(m)
+        self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
 
 
 class TestBase64(unittest.TestCase):
diff --git a/Misc/NEWS b/Misc/NEWS
index 25b265b..81891c1 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -46,6 +46,9 @@ C-API
 Library
 -------
 
+- Issue #5610: feedparser no longer eats extra characters at the end of
+  a body part if the body part ends with a \r\n.
+
 - Issue #8833: tarfile created hard link entries with a size field != 0 by
   mistake.
author	R. David Murray <rdmurray@bitdance.com>	2010-06-03 15:43:20 (GMT)
committer	R. David Murray <rdmurray@bitdance.com>	2010-06-03 15:43:20 (GMT)
commit	61746d580e956bc2dda7fcf230a581e463a4d186 (patch)
tree	1e84de1954226de043d62d82ce411bcdb2e9d2ed
parent	23152ea5bdf4b2e54bbf9a1efa0c7eba292d38cf (diff)
download	cpython-61746d580e956bc2dda7fcf230a581e463a4d186.zip cpython-61746d580e956bc2dda7fcf230a581e463a4d186.tar.gz cpython-61746d580e956bc2dda7fcf230a581e463a4d186.tar.bz2