Anthony Baxter's cleanup patch. Python project SF patch # 583190,

quoting: in non-strict mode, messages don't require a blank line at the end with a missing end-terminator. A single newline is sufficient now. Handle trailing whitespace at the end of a boundary. Had to switch from using string.split() to re.split() Handle whitespace on the end of a parameter list for Content-type. Handle whitespace on the end of a plain content-type header. Specifically, get_type(): Strip the content type string. _get_params_preserve(): Strip the parameter names and values on both sides. _parsebody(): Lots of changes as described above, with some stylistic changes by Barry (who hopefully didn't screw things up ;).
author: Barry Warsaw <barry@python.org> 2002-07-18 23:09:09 (GMT)
committer: Barry Warsaw <barry@python.org> 2002-07-18 23:09:09 (GMT)
commit: 7aeac9180e3d6df3d5db89ee7ff5941a81dc5a5d (patch)
tree: 0b729e7ad55be4cb686ce85bbdf9e0f8dfc8184d /Lib/email/Parser.py
parent: e21262ca9e286aee27741eb8bb69508a911ec10b (diff)
download: cpython-7aeac9180e3d6df3d5db89ee7ff5941a81dc5a5d.zip
cpython-7aeac9180e3d6df3d5db89ee7ff5941a81dc5a5d.tar.gz
cpython-7aeac9180e3d6df3d5db89ee7ff5941a81dc5a5d.tar.bz2
1 files changed, 25 insertions, 18 deletions
diff --git a/Lib/email/Parser.py b/Lib/email/Parser.py
index 228adbc..3081107 100644
--- a/Lib/email/Parser.py
+++ b/Lib/email/Parser.py
@@ -124,19 +124,25 @@ class Parser:
         if boundary:
             preamble = epilogue = None
             # Split into subparts.  The first boundary we're looking for won't
-            # have the leading newline since we're at the start of the body
-            # text.
+            # always have a leading newline since we're at the start of the
+            # body text, and there's not always a preamble before the first
+            # boundary.
             separator = '--' + boundary
             payload = fp.read()
-            start = payload.find(separator)
-            if start < 0:
+            # We use an RE here because boundaries can have trailing 
+            # whitespace.
+            mo = re.search(
+                r'(?P<sep>' + re.escape(separator) + r')(?P<ws>[ \t]*)',
+                payload)
+            if not mo:
                 raise Errors.BoundaryError(
                     "Couldn't find starting boundary: %s" % boundary)
+            start = mo.start()
             if start > 0:
                 # there's some pre-MIME boundary preamble
                 preamble = payload[0:start]
             # Find out what kind of line endings we're using
-            start += len(separator)
+            start += len(mo.group('sep')) + len(mo.group('ws'))
             cre = re.compile('\r\n|\r|\n')
             mo = cre.search(payload, start)
             if mo:
@@ -151,31 +157,32 @@ class Parser:
                 terminator = mo.start()
                 linesep = mo.group('sep')
                 if mo.end() < len(payload):
-                    # there's some post-MIME boundary epilogue
+                    # There's some post-MIME boundary epilogue
                     epilogue = payload[mo.end():]
             elif self._strict:
                 raise Errors.BoundaryError(
                         "Couldn't find terminating boundary: %s" % boundary)
             else:
-                # handle the case of no trailing boundary. I hate mail clients.
-                # check that it ends in a blank line
-                endre = re.compile('(?P<sep>\r\n|\r|\n){2}$')
-                mo = endre.search(payload)
+                # Handle the case of no trailing boundary.  Check that it ends
+                # in a blank line.  Some cases (spamspamspam) don't even have
+                # that!
+                mo = re.search('(?P<sep>\r\n|\r|\n){2}$', payload)
                 if not mo:
-                    raise Errors.BoundaryError(
-                        "Couldn't find terminating boundary, and no "+
-                        "trailing empty line")
-                else:
-                    linesep = mo.group('sep')
-                    terminator = len(payload)
+                    mo = re.search('(?P<sep>\r\n|\r|\n)$', payload)
+                    if not mo:
+                        raise Errors.BoundaryError(
+                          'No terminating boundary and no trailing empty line')
+                linesep = mo.group('sep')
+                terminator = len(payload)
             # We split the textual payload on the boundary separator, which
             # includes the trailing newline. If the container is a
             # multipart/digest then the subparts are by default message/rfc822 
             # instead of text/plain.  In that case, they'll have a optional 
             # block of MIME headers, then an empty line followed by the 
             # message headers.
-            separator += linesep
-            parts = payload[start:terminator].split(linesep + separator)
+            parts = re.split(
+                linesep + re.escape(separator) + r'[ \t]*' + linesep,
+                payload[start:terminator])
             for part in parts:
                 if isdigest: 
                     if part[0] == linesep:
author	Barry Warsaw <barry@python.org>	2002-07-18 23:09:09 (GMT)
committer	Barry Warsaw <barry@python.org>	2002-07-18 23:09:09 (GMT)
commit	7aeac9180e3d6df3d5db89ee7ff5941a81dc5a5d (patch)
tree	0b729e7ad55be4cb686ce85bbdf9e0f8dfc8184d /Lib/email/Parser.py
parent	e21262ca9e286aee27741eb8bb69508a911ec10b (diff)
download	cpython-7aeac9180e3d6df3d5db89ee7ff5941a81dc5a5d.zip cpython-7aeac9180e3d6df3d5db89ee7ff5941a81dc5a5d.tar.gz cpython-7aeac9180e3d6df3d5db89ee7ff5941a81dc5a5d.tar.bz2