diff options
Diffstat (limited to 'Lib/email/Parser.py')
| -rw-r--r-- | Lib/email/Parser.py | 30 | 
1 files changed, 20 insertions, 10 deletions
diff --git a/Lib/email/Parser.py b/Lib/email/Parser.py index 7177dfc..cb994ba 100644 --- a/Lib/email/Parser.py +++ b/Lib/email/Parser.py @@ -4,6 +4,7 @@  """A parser of RFC 2822 and MIME email messages.  """ +import re  from cStringIO import StringIO  from types import ListType @@ -117,25 +118,34 @@ class Parser:              if start > 0:                  # there's some pre-MIME boundary preamble                  preamble = payload[0:start] -            start += len(separator) + 1 + isdigest -            terminator = payload.find('\n' + separator + '--', start) -            if terminator < 0: +            # Find out what kind of line endings we're using +            start += len(separator) +            cre = re.compile('\r\n|\r|\n') +            mo = cre.search(payload, start) +            if mo: +                start += len(mo.group(0)) * (1 + isdigest) +            # We create a compiled regexp first because we need to be able to +            # specify the start position, and the module function doesn't +            # support this signature. :( +            cre = re.compile('(?P<sep>\r\n|\r|\n)' + +                             re.escape(separator) + '--') +            mo = cre.search(payload, start) +            if not mo:                  raise Errors.BoundaryError(                      "Couldn't find terminating boundary: %s" % boundary) -            if terminator+len(separator)+3 < len(payload): +            terminator = mo.start() +            linesep = mo.group('sep') +            if mo.end() < len(payload):                  # there's some post-MIME boundary epilogue -                epilogue = payload[terminator+len(separator)+3:] +                epilogue = payload[mo.end():]              # We split the textual payload on the boundary separator, which              # includes the trailing newline.  If the container is a              # multipart/digest then the subparts are by default message/rfc822              # instead of text/plain.  In that case, they'll have an extra              # newline before the headers to distinguish the message's headers              # from the subpart headers. -            if isdigest: -                separator += '\n\n' -            else: -                separator += '\n' -            parts = payload[start:terminator].split('\n' + separator) +            separator += linesep * (1 + isdigest) +            parts = payload[start:terminator].split(linesep + separator)              for part in parts:                  msgobj = self.parsestr(part)                  container.preamble = preamble  | 
