diff options
author | Barry Warsaw <barry@python.org> | 2002-07-18 23:09:09 (GMT) |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2002-07-18 23:09:09 (GMT) |
commit | 7aeac9180e3d6df3d5db89ee7ff5941a81dc5a5d (patch) | |
tree | 0b729e7ad55be4cb686ce85bbdf9e0f8dfc8184d /Lib/email/Parser.py | |
parent | e21262ca9e286aee27741eb8bb69508a911ec10b (diff) | |
download | cpython-7aeac9180e3d6df3d5db89ee7ff5941a81dc5a5d.zip cpython-7aeac9180e3d6df3d5db89ee7ff5941a81dc5a5d.tar.gz cpython-7aeac9180e3d6df3d5db89ee7ff5941a81dc5a5d.tar.bz2 |
Anthony Baxter's cleanup patch. Python project SF patch # 583190,
quoting:
in non-strict mode, messages don't require a blank line at the end
with a missing end-terminator. A single newline is sufficient now.
Handle trailing whitespace at the end of a boundary. Had to switch
from using string.split() to re.split()
Handle whitespace on the end of a parameter list for Content-type.
Handle whitespace on the end of a plain content-type header.
Specifically,
get_type(): Strip the content type string.
_get_params_preserve(): Strip the parameter names and values on both
sides.
_parsebody(): Lots of changes as described above, with some stylistic
changes by Barry (who hopefully didn't screw things up ;).
Diffstat (limited to 'Lib/email/Parser.py')
-rw-r--r-- | Lib/email/Parser.py | 43 |
1 files changed, 25 insertions, 18 deletions
diff --git a/Lib/email/Parser.py b/Lib/email/Parser.py index 228adbc..3081107 100644 --- a/Lib/email/Parser.py +++ b/Lib/email/Parser.py @@ -124,19 +124,25 @@ class Parser: if boundary: preamble = epilogue = None # Split into subparts. The first boundary we're looking for won't - # have the leading newline since we're at the start of the body - # text. + # always have a leading newline since we're at the start of the + # body text, and there's not always a preamble before the first + # boundary. separator = '--' + boundary payload = fp.read() - start = payload.find(separator) - if start < 0: + # We use an RE here because boundaries can have trailing + # whitespace. + mo = re.search( + r'(?P<sep>' + re.escape(separator) + r')(?P<ws>[ \t]*)', + payload) + if not mo: raise Errors.BoundaryError( "Couldn't find starting boundary: %s" % boundary) + start = mo.start() if start > 0: # there's some pre-MIME boundary preamble preamble = payload[0:start] # Find out what kind of line endings we're using - start += len(separator) + start += len(mo.group('sep')) + len(mo.group('ws')) cre = re.compile('\r\n|\r|\n') mo = cre.search(payload, start) if mo: @@ -151,31 +157,32 @@ class Parser: terminator = mo.start() linesep = mo.group('sep') if mo.end() < len(payload): - # there's some post-MIME boundary epilogue + # There's some post-MIME boundary epilogue epilogue = payload[mo.end():] elif self._strict: raise Errors.BoundaryError( "Couldn't find terminating boundary: %s" % boundary) else: - # handle the case of no trailing boundary. I hate mail clients. - # check that it ends in a blank line - endre = re.compile('(?P<sep>\r\n|\r|\n){2}$') - mo = endre.search(payload) + # Handle the case of no trailing boundary. Check that it ends + # in a blank line. Some cases (spamspamspam) don't even have + # that! + mo = re.search('(?P<sep>\r\n|\r|\n){2}$', payload) if not mo: - raise Errors.BoundaryError( - "Couldn't find terminating boundary, and no "+ - "trailing empty line") - else: - linesep = mo.group('sep') - terminator = len(payload) + mo = re.search('(?P<sep>\r\n|\r|\n)$', payload) + if not mo: + raise Errors.BoundaryError( + 'No terminating boundary and no trailing empty line') + linesep = mo.group('sep') + terminator = len(payload) # We split the textual payload on the boundary separator, which # includes the trailing newline. If the container is a # multipart/digest then the subparts are by default message/rfc822 # instead of text/plain. In that case, they'll have a optional # block of MIME headers, then an empty line followed by the # message headers. - separator += linesep - parts = payload[start:terminator].split(linesep + separator) + parts = re.split( + linesep + re.escape(separator) + r'[ \t]*' + linesep, + payload[start:terminator]) for part in parts: if isdigest: if part[0] == linesep: |