summaryrefslogtreecommitdiffstats
path: root/Lib/email
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/email')
-rw-r--r--Lib/email/__init__.py99
-rw-r--r--Lib/email/_parseaddr.py22
-rw-r--r--Lib/email/base64mime.py116
-rw-r--r--Lib/email/charset.py238
-rw-r--r--Lib/email/encoders.py34
-rw-r--r--Lib/email/feedparser.py19
-rw-r--r--Lib/email/generator.py263
-rw-r--r--Lib/email/header.py662
-rw-r--r--Lib/email/iterators.py10
-rw-r--r--Lib/email/message.py210
-rw-r--r--Lib/email/mime/audio.py6
-rw-r--r--Lib/email/parser.py51
-rw-r--r--Lib/email/quoprimime.py322
-rw-r--r--Lib/email/test/data/msg_15.txt2
-rw-r--r--Lib/email/test/data/msg_26.txt5
-rw-r--r--Lib/email/test/test_email.py2314
-rw-r--r--Lib/email/test/test_email_codecs.py48
-rw-r--r--Lib/email/test/test_email_codecs_renamed.py77
-rw-r--r--Lib/email/test/test_email_renamed.py3297
-rw-r--r--Lib/email/test/test_email_torture.py4
-rw-r--r--Lib/email/utils.py88
21 files changed, 2903 insertions, 4984 deletions
diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py
index a780ebe..ff16f6a 100644
--- a/Lib/email/__init__.py
+++ b/Lib/email/__init__.py
@@ -1,42 +1,25 @@
-# Copyright (C) 2001-2006 Python Software Foundation
+# Copyright (C) 2001-2007 Python Software Foundation
# Author: Barry Warsaw
# Contact: email-sig@python.org
"""A package for parsing, handling, and generating email messages."""
-__version__ = '4.0.3'
+__version__ = '5.1.0'
__all__ = [
- # Old names
- 'base64MIME',
- 'Charset',
- 'Encoders',
- 'Errors',
- 'Generator',
- 'Header',
- 'Iterators',
- 'Message',
- 'MIMEAudio',
- 'MIMEBase',
- 'MIMEImage',
- 'MIMEMessage',
- 'MIMEMultipart',
- 'MIMENonMultipart',
- 'MIMEText',
- 'Parser',
- 'quopriMIME',
- 'Utils',
- 'message_from_string',
- 'message_from_file',
- # new names
'base64mime',
'charset',
'encoders',
'errors',
+ 'feedparser',
'generator',
'header',
'iterators',
'message',
+ 'message_from_file',
+ 'message_from_binary_file',
+ 'message_from_string',
+ 'message_from_bytes',
'mime',
'parser',
'quoprimime',
@@ -56,6 +39,13 @@ def message_from_string(s, *args, **kws):
from email.parser import Parser
return Parser(*args, **kws).parsestr(s)
+def message_from_bytes(s, *args, **kws):
+ """Parse a bytes string into a Message object model.
+
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from email.parser import BytesParser
+ return BytesParser(*args, **kws).parsebytes(s)
def message_from_file(fp, *args, **kws):
"""Read a file and parse its contents into a Message object model.
@@ -65,59 +55,10 @@ def message_from_file(fp, *args, **kws):
from email.parser import Parser
return Parser(*args, **kws).parse(fp)
+def message_from_binary_file(fp, *args, **kws):
+ """Read a binary file and parse its contents into a Message object model.
-
-# Lazy loading to provide name mapping from new-style names (PEP 8 compatible
-# email 4.0 module names), to old-style names (email 3.0 module names).
-import sys
-
-class LazyImporter(object):
- def __init__(self, module_name):
- self.__name__ = 'email.' + module_name
-
- def __getattr__(self, name):
- __import__(self.__name__)
- mod = sys.modules[self.__name__]
- self.__dict__.update(mod.__dict__)
- return getattr(mod, name)
-
-
-_LOWERNAMES = [
- # email.<old name> -> email.<new name is lowercased old name>
- 'Charset',
- 'Encoders',
- 'Errors',
- 'FeedParser',
- 'Generator',
- 'Header',
- 'Iterators',
- 'Message',
- 'Parser',
- 'Utils',
- 'base64MIME',
- 'quopriMIME',
- ]
-
-_MIMENAMES = [
- # email.MIME<old name> -> email.mime.<new name is lowercased old name>
- 'Audio',
- 'Base',
- 'Image',
- 'Message',
- 'Multipart',
- 'NonMultipart',
- 'Text',
- ]
-
-for _name in _LOWERNAMES:
- importer = LazyImporter(_name.lower())
- sys.modules['email.' + _name] = importer
- setattr(sys.modules['email'], _name, importer)
-
-
-import email.mime
-for _name in _MIMENAMES:
- importer = LazyImporter('mime.' + _name.lower())
- sys.modules['email.MIME' + _name] = importer
- setattr(sys.modules['email'], 'MIME' + _name, importer)
- setattr(sys.modules['email.mime'], _name, importer)
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from email.parser import BytesParser
+ return BytesParser(*args, **kws).parse(fp)
diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py
index 690db2c..79573c6 100644
--- a/Lib/email/_parseaddr.py
+++ b/Lib/email/_parseaddr.py
@@ -64,8 +64,10 @@ def parsedate_tz(data):
if len(data) == 4:
s = data[3]
i = s.find('+')
+ if i == -1:
+ i = s.find('-')
if i > 0:
- data[3:] = [s[:i], s[i+1:]]
+ data[3:] = [s[:i], s[i:]]
else:
data.append('') # Dummy tz
if len(data) < 5:
@@ -176,7 +178,7 @@ class AddrlistClass:
front of you.
Note: this class interface is deprecated and may be removed in the future.
- Use rfc822.AddressList instead.
+ Use email.utils.AddressList instead.
"""
def __init__(self, field):
@@ -199,14 +201,18 @@ class AddrlistClass:
self.commentlist = []
def gotonext(self):
- """Parse up to the start of the next address."""
+ """Skip white space and extract comments."""
+ wslist = []
while self.pos < len(self.field):
if self.field[self.pos] in self.LWS + '\n\r':
+ if self.field[self.pos] not in '\n\r':
+ wslist.append(self.field[self.pos])
self.pos += 1
elif self.field[self.pos] == '(':
self.commentlist.append(self.getcomment())
else:
break
+ return EMPTYSTRING.join(wslist)
def getaddrlist(self):
"""Parse all addresses.
@@ -319,16 +325,24 @@ class AddrlistClass:
self.gotonext()
while self.pos < len(self.field):
+ preserve_ws = True
if self.field[self.pos] == '.':
+ if aslist and not aslist[-1].strip():
+ aslist.pop()
aslist.append('.')
self.pos += 1
+ preserve_ws = False
elif self.field[self.pos] == '"':
aslist.append('"%s"' % quote(self.getquote()))
elif self.field[self.pos] in self.atomends:
+ if aslist and not aslist[-1].strip():
+ aslist.pop()
break
else:
aslist.append(self.getatom())
- self.gotonext()
+ ws = self.gotonext()
+ if preserve_ws and ws:
+ aslist.append(ws)
if self.pos >= len(self.field) or self.field[self.pos] != '@':
return EMPTYSTRING.join(aslist)
diff --git a/Lib/email/base64mime.py b/Lib/email/base64mime.py
index 4aa8000..f3bbac1 100644
--- a/Lib/email/base64mime.py
+++ b/Lib/email/base64mime.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2006 Python Software Foundation
+# Copyright (C) 2002-2007 Python Software Foundation
# Author: Ben Gertzfield
# Contact: email-sig@python.org
@@ -25,19 +25,17 @@ module.
"""
__all__ = [
- 'base64_len',
'body_decode',
'body_encode',
'decode',
'decodestring',
- 'encode',
- 'encodestring',
'header_encode',
+ 'header_length',
]
+from base64 import b64encode
from binascii import b2a_base64, a2b_base64
-from email.utils import fix_eols
CRLF = '\r\n'
NL = '\n'
@@ -49,11 +47,10 @@ MISC_LEN = 7
# Helpers
-def base64_len(s):
+def header_length(bytearray):
"""Return the length of s when it is encoded with base64."""
- groups_of_3, leftover = divmod(len(s), 3)
+ groups_of_3, leftover = divmod(len(bytearray), 3)
# 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
- # Thanks, Tim!
n = groups_of_3 * 4
if leftover:
n += 4
@@ -61,121 +58,60 @@ def base64_len(s):
-def header_encode(header, charset='iso-8859-1', keep_eols=False,
- maxlinelen=76, eol=NL):
+def header_encode(header_bytes, charset='iso-8859-1'):
"""Encode a single header line with Base64 encoding in a given charset.
- Defined in RFC 2045, this Base64 encoding is identical to normal Base64
- encoding, except that each line must be intelligently wrapped (respecting
- the Base64 encoding), and subsequent lines must start with a space.
-
charset names the character set to use to encode the header. It defaults
- to iso-8859-1.
-
- End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
- to the canonical email line separator \\r\\n unless the keep_eols
- parameter is True (the default is False).
-
- Each line of the header will be terminated in the value of eol, which
- defaults to "\\n". Set this to "\\r\\n" if you are using the result of
- this function directly in email.
-
- The resulting string will be in the form:
-
- "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
- =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
-
- with each line wrapped at, at most, maxlinelen characters (defaults to 76
- characters).
+ to iso-8859-1. Base64 encoding is defined in RFC 2045.
"""
- # Return empty headers unchanged
- if not header:
- return header
-
- if not keep_eols:
- header = fix_eols(header)
-
- # Base64 encode each line, in encoded chunks no greater than maxlinelen in
- # length, after the RFC chrome is added in.
- base64ed = []
- max_encoded = maxlinelen - len(charset) - MISC_LEN
- max_unencoded = max_encoded * 3 // 4
-
- for i in range(0, len(header), max_unencoded):
- base64ed.append(b2a_base64(header[i:i+max_unencoded]))
-
- # Now add the RFC chrome to each encoded chunk
- lines = []
- for line in base64ed:
- # Ignore the last character of each line if it is a newline
- if line.endswith(NL):
- line = line[:-1]
- # Add the chrome
- lines.append('=?%s?b?%s?=' % (charset, line))
- # Glue the lines together and return it. BAW: should we be able to
- # specify the leading whitespace in the joiner?
- joiner = eol + ' '
- return joiner.join(lines)
+ if not header_bytes:
+ return ""
+ if isinstance(header_bytes, str):
+ header_bytes = header_bytes.encode(charset)
+ encoded = b64encode(header_bytes).decode("ascii")
+ return '=?%s?b?%s?=' % (charset, encoded)
-def encode(s, binary=True, maxlinelen=76, eol=NL):
- """Encode a string with base64.
+def body_encode(s, maxlinelen=76, eol=NL):
+ r"""Encode a string with base64.
Each line will be wrapped at, at most, maxlinelen characters (defaults to
76 characters).
- If binary is False, end-of-line characters will be converted to the
- canonical email end-of-line sequence \\r\\n. Otherwise they will be left
- verbatim (this is the default).
-
- Each line of encoded text will end with eol, which defaults to "\\n". Set
- this to "\\r\\n" if you will be using the result of this function directly
+ Each line of encoded text will end with eol, which defaults to "\n". Set
+ this to "\r\n" if you will be using the result of this function directly
in an email.
"""
if not s:
return s
- if not binary:
- s = fix_eols(s)
-
encvec = []
max_unencoded = maxlinelen * 3 // 4
for i in range(0, len(s), max_unencoded):
# BAW: should encode() inherit b2a_base64()'s dubious behavior in
# adding a newline to the encoded string?
- enc = b2a_base64(s[i:i + max_unencoded])
+ enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
if enc.endswith(NL) and eol != NL:
enc = enc[:-1] + eol
encvec.append(enc)
return EMPTYSTRING.join(encvec)
-# For convenience and backwards compatibility w/ standard base64 module
-body_encode = encode
-encodestring = encode
-
-
-def decode(s, convert_eols=None):
- """Decode a raw base64 string.
-
- If convert_eols is set to a string value, all canonical email linefeeds,
- e.g. "\\r\\n", in the decoded text will be converted to the value of
- convert_eols. os.linesep is a good choice for convert_eols if you are
- decoding a text attachment.
+def decode(string):
+ """Decode a raw base64 string, returning a bytes object.
This function does not parse a full MIME header value encoded with
base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
level email.header class for that functionality.
"""
- if not s:
- return s
-
- dec = a2b_base64(s)
- if convert_eols:
- return dec.replace(CRLF, convert_eols)
- return dec
+ if not string:
+ return bytes()
+ elif isinstance(string, str):
+ return a2b_base64(string.encode('raw-unicode-escape'))
+ else:
+ return a2b_base64(string)
# For convenience and backwards compatibility w/ standard base64 module
diff --git a/Lib/email/charset.py b/Lib/email/charset.py
index dddaa76..f22be2c 100644
--- a/Lib/email/charset.py
+++ b/Lib/email/charset.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2001-2006 Python Software Foundation
+# Copyright (C) 2001-2007 Python Software Foundation
# Author: Ben Gertzfield, Barry Warsaw
# Contact: email-sig@python.org
@@ -9,7 +9,8 @@ __all__ = [
'add_codec',
]
-import codecs
+from functools import partial
+
import email.base64mime
import email.quoprimime
@@ -24,9 +25,11 @@ BASE64 = 2 # Base64
SHORTEST = 3 # the shorter of QP and base64, but only for headers
# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7
-MISC_LEN = 7
+RFC2047_CHROME_LEN = 7
DEFAULT_CHARSET = 'us-ascii'
+UNKNOWN8BIT = 'unknown-8bit'
+EMPTYSTRING = ''
@@ -58,8 +61,6 @@ CHARSETS = {
'iso-2022-jp': (BASE64, None, None),
'koi8-r': (BASE64, BASE64, None),
'utf-8': (SHORTEST, BASE64, 'utf-8'),
- # We're making this one up to represent raw unencoded 8-bit
- '8bit': (None, BASE64, 'utf-8'),
}
# Aliases for other commonly-used names for character sets. Map
@@ -153,6 +154,16 @@ def add_codec(charset, codecname):
+# Convenience function for encoding strings, taking into account
+# that they might be unknown-8bit (ie: have surrogate-escaped bytes)
+def _encode(string, codec):
+ if codec == UNKNOWN8BIT:
+ return string.encode('ascii', 'surrogateescape')
+ else:
+ return string.encode(codec)
+
+
+
class Charset:
"""Map character sets to their email properties.
@@ -203,19 +214,14 @@ class Charset:
# is already a unicode, we leave it at that, but ensure that the
# charset is ASCII, as the standard (RFC XXX) requires.
try:
- if isinstance(input_charset, unicode):
+ if isinstance(input_charset, str):
input_charset.encode('ascii')
else:
- input_charset = unicode(input_charset, 'ascii')
+ input_charset = str(input_charset, 'ascii')
except UnicodeError:
raise errors.CharsetError(input_charset)
- input_charset = input_charset.lower().encode('ascii')
- # Set the input charset after filtering through the aliases and/or codecs
- if not (input_charset in ALIASES or input_charset in CHARSETS):
- try:
- input_charset = codecs.lookup(input_charset).name
- except LookupError:
- pass
+ input_charset = input_charset.lower()
+ # Set the input charset after filtering through the aliases
self.input_charset = ALIASES.get(input_charset, input_charset)
# We can try to guess which encoding and conversion to use by the
# charset_map dictionary. Try that first, but let the user override
@@ -257,7 +263,7 @@ class Charset:
Returns "quoted-printable" if self.body_encoding is QP.
Returns "base64" if self.body_encoding is BASE64.
- Returns "7bit" otherwise.
+ Returns conversion function otherwise.
"""
assert self.body_encoding != SHORTEST
if self.body_encoding == QP:
@@ -267,60 +273,6 @@ class Charset:
else:
return encode_7or8bit
- def convert(self, s):
- """Convert a string from the input_codec to the output_codec."""
- if self.input_codec != self.output_codec:
- return unicode(s, self.input_codec).encode(self.output_codec)
- else:
- return s
-
- def to_splittable(self, s):
- """Convert a possibly multibyte string to a safely splittable format.
-
- Uses the input_codec to try and convert the string to Unicode, so it
- can be safely split on character boundaries (even for multibyte
- characters).
-
- Returns the string as-is if it isn't known how to convert it to
- Unicode with the input_charset.
-
- Characters that could not be converted to Unicode will be replaced
- with the Unicode replacement character U+FFFD.
- """
- if isinstance(s, unicode) or self.input_codec is None:
- return s
- try:
- return unicode(s, self.input_codec, 'replace')
- except LookupError:
- # Input codec not installed on system, so return the original
- # string unchanged.
- return s
-
- def from_splittable(self, ustr, to_output=True):
- """Convert a splittable string back into an encoded string.
-
- Uses the proper codec to try and convert the string from Unicode back
- into an encoded format. Return the string as-is if it is not Unicode,
- or if it could not be converted from Unicode.
-
- Characters that could not be converted from Unicode will be replaced
- with an appropriate character (usually '?').
-
- If to_output is True (the default), uses output_codec to convert to an
- encoded format. If to_output is False, uses input_codec.
- """
- if to_output:
- codec = self.output_codec
- else:
- codec = self.input_codec
- if not isinstance(ustr, unicode) or codec is None:
- return ustr
- try:
- return ustr.encode(codec, 'replace')
- except LookupError:
- # Output codec not installed
- return ustr
-
def get_output_charset(self):
"""Return the output character set.
@@ -329,69 +281,119 @@ class Charset:
"""
return self.output_charset or self.input_charset
- def encoded_header_len(self, s):
- """Return the length of the encoded header string."""
- cset = self.get_output_charset()
- # The len(s) of a 7bit encoding is len(s)
- if self.header_encoding == BASE64:
- return email.base64mime.base64_len(s) + len(cset) + MISC_LEN
- elif self.header_encoding == QP:
- return email.quoprimime.header_quopri_len(s) + len(cset) + MISC_LEN
- elif self.header_encoding == SHORTEST:
- lenb64 = email.base64mime.base64_len(s)
- lenqp = email.quoprimime.header_quopri_len(s)
- return min(lenb64, lenqp) + len(cset) + MISC_LEN
- else:
- return len(s)
-
- def header_encode(self, s, convert=False):
- """Header-encode a string, optionally converting it to output_charset.
-
- If convert is True, the string will be converted from the input
- charset to the output charset automatically. This is not useful for
- multibyte character sets, which have line length issues (multibyte
- characters must be split on a character, not a byte boundary); use the
- high-level Header class to deal with these issues. convert defaults
- to False.
+ def header_encode(self, string):
+ """Header-encode a string by converting it first to bytes.
The type of encoding (base64 or quoted-printable) will be based on
- self.header_encoding.
+ this charset's `header_encoding`.
+
+ :param string: A unicode string for the header. It must be possible
+ to encode this string to bytes using the character set's
+ output codec.
+ :return: The encoded string, with RFC 2047 chrome.
"""
- cset = self.get_output_charset()
- if convert:
- s = self.convert(s)
+ codec = self.output_codec or 'us-ascii'
+ header_bytes = _encode(string, codec)
# 7bit/8bit encodings return the string unchanged (modulo conversions)
+ encoder_module = self._get_encoder(header_bytes)
+ if encoder_module is None:
+ return string
+ return encoder_module.header_encode(header_bytes, codec)
+
+ def header_encode_lines(self, string, maxlengths):
+ """Header-encode a string by converting it first to bytes.
+
+ This is similar to `header_encode()` except that the string is fit
+ into maximum line lengths as given by the argument.
+
+ :param string: A unicode string for the header. It must be possible
+ to encode this string to bytes using the character set's
+ output codec.
+ :param maxlengths: Maximum line length iterator. Each element
+ returned from this iterator will provide the next maximum line
+ length. This parameter is used as an argument to built-in next()
+ and should never be exhausted. The maximum line lengths should
+ not count the RFC 2047 chrome. These line lengths are only a
+ hint; the splitter does the best it can.
+ :return: Lines of encoded strings, each with RFC 2047 chrome.
+ """
+ # See which encoding we should use.
+ codec = self.output_codec or 'us-ascii'
+ header_bytes = _encode(string, codec)
+ encoder_module = self._get_encoder(header_bytes)
+ encoder = partial(encoder_module.header_encode, charset=codec)
+ # Calculate the number of characters that the RFC 2047 chrome will
+ # contribute to each line.
+ charset = self.get_output_charset()
+ extra = len(charset) + RFC2047_CHROME_LEN
+ # Now comes the hard part. We must encode bytes but we can't split on
+ # bytes because some character sets are variable length and each
+ # encoded word must stand on its own. So the problem is you have to
+ # encode to bytes to figure out this word's length, but you must split
+ # on characters. This causes two problems: first, we don't know how
+ # many octets a specific substring of unicode characters will get
+ # encoded to, and second, we don't know how many ASCII characters
+ # those octets will get encoded to. Unless we try it. Which seems
+ # inefficient. In the interest of being correct rather than fast (and
+ # in the hope that there will be few encoded headers in any such
+ # message), brute force it. :(
+ lines = []
+ current_line = []
+ maxlen = next(maxlengths) - extra
+ for character in string:
+ current_line.append(character)
+ this_line = EMPTYSTRING.join(current_line)
+ length = encoder_module.header_length(_encode(this_line, charset))
+ if length > maxlen:
+ # This last character doesn't fit so pop it off.
+ current_line.pop()
+ # Does nothing fit on the first line?
+ if not lines and not current_line:
+ lines.append(None)
+ else:
+ separator = (' ' if lines else '')
+ joined_line = EMPTYSTRING.join(current_line)
+ header_bytes = _encode(joined_line, codec)
+ lines.append(encoder(header_bytes))
+ current_line = [character]
+ maxlen = next(maxlengths) - extra
+ joined_line = EMPTYSTRING.join(current_line)
+ header_bytes = _encode(joined_line, codec)
+ lines.append(encoder(header_bytes))
+ return lines
+
+ def _get_encoder(self, header_bytes):
if self.header_encoding == BASE64:
- return email.base64mime.header_encode(s, cset)
+ return email.base64mime
elif self.header_encoding == QP:
- return email.quoprimime.header_encode(s, cset, maxlinelen=None)
+ return email.quoprimime
elif self.header_encoding == SHORTEST:
- lenb64 = email.base64mime.base64_len(s)
- lenqp = email.quoprimime.header_quopri_len(s)
- if lenb64 < lenqp:
- return email.base64mime.header_encode(s, cset)
+ len64 = email.base64mime.header_length(header_bytes)
+ lenqp = email.quoprimime.header_length(header_bytes)
+ if len64 < lenqp:
+ return email.base64mime
else:
- return email.quoprimime.header_encode(s, cset, maxlinelen=None)
+ return email.quoprimime
else:
- return s
-
- def body_encode(self, s, convert=True):
- """Body-encode a string and convert it to output_charset.
+ return None
- If convert is True (the default), the string will be converted from
- the input charset to output charset automatically. Unlike
- header_encode(), there are no issues with byte boundaries and
- multibyte charsets in email bodies, so this is usually pretty safe.
+ def body_encode(self, string):
+ """Body-encode a string by converting it first to bytes.
The type of encoding (base64 or quoted-printable) will be based on
- self.body_encoding.
+ self.body_encoding. If body_encoding is None, we assume the
+ output charset is a 7bit encoding, so re-encoding the decoded
+ string using the ascii codec produces the correct string version
+ of the content.
"""
- if convert:
- s = self.convert(s)
# 7bit/8bit encodings return the string unchanged (module conversions)
if self.body_encoding is BASE64:
- return email.base64mime.body_encode(s)
+ if isinstance(string, str):
+ string = string.encode(self.output_charset)
+ return email.base64mime.body_encode(string)
elif self.body_encoding is QP:
- return email.quoprimime.body_encode(s)
+ return email.quoprimime.body_encode(string)
else:
- return s
+ if isinstance(string, str):
+ string = string.encode(self.output_charset).decode('ascii')
+ return string
diff --git a/Lib/email/encoders.py b/Lib/email/encoders.py
index af45e62..e5c099f 100644
--- a/Lib/email/encoders.py
+++ b/Lib/email/encoders.py
@@ -11,8 +11,8 @@ __all__ = [
'encode_quopri',
]
-import base64
+from base64 import encodebytes as _bencode
from quopri import encodestring as _encodestring
@@ -23,26 +23,13 @@ def _qencode(s):
return enc.replace(' ', '=20')
-def _bencode(s):
- # We can't quite use base64.encodestring() since it tacks on a "courtesy
- # newline". Blech!
- if not s:
- return s
- hasnewline = (s[-1] == '\n')
- value = base64.encodestring(s)
- if not hasnewline and value[-1] == '\n':
- return value[:-1]
- return value
-
-
-
def encode_base64(msg):
"""Encode the message's payload in Base64.
Also, add an appropriate Content-Transfer-Encoding header.
"""
orig = msg.get_payload()
- encdata = _bencode(orig)
+ encdata = str(_bencode(orig), 'ascii')
msg.set_payload(encdata)
msg['Content-Transfer-Encoding'] = 'base64'
@@ -67,12 +54,21 @@ def encode_7or8bit(msg):
# There's no payload. For backwards compatibility we use 7bit
msg['Content-Transfer-Encoding'] = '7bit'
return
- # We play a trick to make this go fast. If encoding to ASCII succeeds, we
- # know the data must be 7bit, otherwise treat it as 8bit.
+ # We play a trick to make this go fast. If encoding/decode to ASCII
+ # succeeds, we know the data must be 7bit, otherwise treat it as 8bit.
try:
- orig.encode('ascii')
+ if isinstance(orig, str):
+ orig.encode('ascii')
+ else:
+ orig.decode('ascii')
except UnicodeError:
- msg['Content-Transfer-Encoding'] = '8bit'
+ # iso-2022-* is non-ASCII but still 7-bit
+ charset = msg.get_charset()
+ output_cset = charset and charset.output_charset
+ if output_cset and output_cset.lower().startswith('iso-2022-'):
+ msg['Content-Transfer-Encoding'] = '7bit'
+ else:
+ msg['Content-Transfer-Encoding'] = '8bit'
else:
msg['Content-Transfer-Encoding'] = '7bit'
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
index 15db26d..16ed288 100644
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -19,7 +19,7 @@ the current message. Defects are just instances that live on the message
object's .defects attribute.
"""
-__all__ = ['FeedParser']
+__all__ = ['FeedParser', 'BytesFeedParser']
import re
@@ -126,7 +126,7 @@ class BufferedSubFile(object):
def __iter__(self):
return self
- def next(self):
+ def __next__(self):
line = self.readline()
if line == '':
raise StopIteration
@@ -142,7 +142,7 @@ class FeedParser:
self._factory = _factory
self._input = BufferedSubFile()
self._msgstack = []
- self._parse = self._parsegen().next
+ self._parse = self._parsegen().__next__
self._cur = None
self._last = None
self._headersonly = False
@@ -368,12 +368,12 @@ class FeedParser:
end = len(mo.group(0))
self._last.epilogue = epilogue[:-end]
else:
- payload = self._last.get_payload()
- if isinstance(payload, basestring):
+ payload = self._last._payload
+ if isinstance(payload, str):
mo = NLCRE_eol.search(payload)
if mo:
payload = payload[:-len(mo.group(0))]
- self._last.set_payload(payload)
+ self._last._payload = payload
self._input.pop_eof_matcher()
self._pop_message()
# Set the multipart up for newline cleansing, which will
@@ -482,3 +482,10 @@ class FeedParser:
if lastheader:
# XXX reconsider the joining of folded lines
self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n')
+
+
+class BytesFeedParser(FeedParser):
+ """Like FeedParser, but feed accepts bytes."""
+
+ def feed(self, data):
+ super().feed(data.decode('ascii', 'surrogateescape'))
diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index 5626ab9..c6bfb70 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -1,9 +1,10 @@
# Copyright (C) 2001-2010 Python Software Foundation
+# Author: Barry Warsaw
# Contact: email-sig@python.org
"""Classes to generate plain text from a message object tree."""
-__all__ = ['Generator', 'DecodedGenerator']
+__all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator']
import re
import sys
@@ -11,22 +12,15 @@ import time
import random
import warnings
-from cStringIO import StringIO
+from io import StringIO, BytesIO
from email.header import Header
+from email.message import _has_surrogates
UNDERSCORE = '_'
-NL = '\n'
+NL = '\n' # XXX: no longer used by the code below.
fcre = re.compile(r'^From ', re.MULTILINE)
-def _is8bitstring(s):
- if isinstance(s, str):
- try:
- unicode(s, 'us-ascii')
- except UnicodeError:
- return True
- return False
-
class Generator:
@@ -64,8 +58,8 @@ class Generator:
# Just delegate to the file object
self._fp.write(s)
- def flatten(self, msg, unixfrom=False):
- """Print the message object tree rooted at msg to the output file
+ def flatten(self, msg, unixfrom=False, linesep='\n'):
+ r"""Print the message object tree rooted at msg to the output file
specified when the Generator instance was created.
unixfrom is a flag that forces the printing of a Unix From_ delimiter
@@ -74,12 +68,26 @@ class Generator:
is False to inhibit the printing of any From_ delimiter.
Note that for subobjects, no From_ line is printed.
+
+ linesep specifies the characters used to indicate a new line in
+ the output. The default value is the most useful for typical
+ Python applications, but it can be set to \r\n to produce RFC-compliant
+ line separators when needed.
+
"""
+ # We use the _XXX constants for operating on data that comes directly
+ # from the msg, and _encoded_XXX constants for operating on data that
+ # has already been converted (to bytes in the BytesGenerator) and
+ # inserted into a temporary buffer.
+ self._NL = linesep
+ self._encoded_NL = self._encode(linesep)
+ self._EMPTY = ''
+ self._encoded_EMTPY = self._encode('')
if unixfrom:
ufrom = msg.get_unixfrom()
if not ufrom:
ufrom = 'From nobody ' + time.ctime(time.time())
- print >> self._fp, ufrom
+ self.write(ufrom + self._NL)
self._write(msg)
def clone(self, fp):
@@ -90,6 +98,27 @@ class Generator:
# Protected interface - undocumented ;/
#
+ # Note that we use 'self.write' when what we are writing is coming from
+ # the source, and self._fp.write when what we are writing is coming from a
+ # buffer (because the Bytes subclass has already had a chance to transform
+ # the data in its write method in that case). This is an entirely
+ # pragmatic split determined by experiment; we could be more general by
+ # always using write and having the Bytes subclass write method detect when
+ # it has already transformed the input; but, since this whole thing is a
+ # hack anyway this seems good enough.
+
+ # Similarly, we have _XXX and _encoded_XXX attributes that are used on
+ # source and buffer data, respectively.
+ _encoded_EMPTY = ''
+
+ def _new_buffer(self):
+ # BytesGenerator overrides this to return BytesIO.
+ return StringIO()
+
+ def _encode(self, s):
+ # BytesGenerator overrides this to encode strings to bytes.
+ return s
+
def _write(self, msg):
# We can't write the headers yet because of the following scenario:
# say a multipart message includes the boundary string somewhere in
@@ -98,13 +127,13 @@ class Generator:
# parameter.
#
# The way we do this, so as to make the _handle_*() methods simpler,
- # is to cache any subpart writes into a StringIO. The we write the
- # headers and the StringIO contents. That way, subpart handlers can
+ # is to cache any subpart writes into a buffer. The we write the
+ # headers and the buffer contents. That way, subpart handlers can
# Do The Right Thing, and can still modify the Content-Type: header if
# necessary.
oldfp = self._fp
try:
- self._fp = sfp = StringIO()
+ self._fp = sfp = self._new_buffer()
self._dispatch(msg)
finally:
self._fp = oldfp
@@ -139,31 +168,17 @@ class Generator:
def _write_headers(self, msg):
for h, v in msg.items():
- print >> self._fp, '%s:' % h,
- if self._maxheaderlen == 0:
- # Explicit no-wrapping
- print >> self._fp, v
- elif isinstance(v, Header):
- # Header instances know what to do
- print >> self._fp, v.encode()
- elif _is8bitstring(v):
- # If we have raw 8bit data in a byte string, we have no idea
- # what the encoding is. There is no safe way to split this
- # string. If it's ascii-subset, then we could do a normal
- # ascii split, but if it's multibyte then we could break the
- # string. There's no way to know so the least harm seems to
- # be to not split the string and risk it being too long.
- print >> self._fp, v
+ self.write('%s: ' % h)
+ if isinstance(v, Header):
+ self.write(v.encode(
+ maxlinelen=self._maxheaderlen, linesep=self._NL)+self._NL)
else:
- # Header's got lots of smarts, so use it. Note that this is
- # fundamentally broken though because we lose idempotency when
- # the header string is continued with tabs. It will now be
- # continued with spaces. This was reversedly broken before we
- # fixed bug 1974. Either way, we lose.
- print >> self._fp, Header(
- v, maxlinelen=self._maxheaderlen, header_name=h).encode()
+ # Header's got lots of smarts, so use it.
+ header = Header(v, maxlinelen=self._maxheaderlen,
+ header_name=h)
+ self.write(header.encode(linesep=self._NL)+self._NL)
# A blank line always separates headers from body
- print >> self._fp
+ self.write(self._NL)
#
# Handlers for writing types and subtypes
@@ -173,11 +188,17 @@ class Generator:
payload = msg.get_payload()
if payload is None:
return
- if not isinstance(payload, basestring):
+ if not isinstance(payload, str):
raise TypeError('string payload expected: %s' % type(payload))
+ if _has_surrogates(msg._payload):
+ charset = msg.get_param('charset')
+ if charset is not None:
+ del msg['content-transfer-encoding']
+ msg.set_payload(payload, charset)
+ payload = msg.get_payload()
if self._mangle_from_:
payload = fcre.sub('>From ', payload)
- self._fp.write(payload)
+ self.write(payload)
# Default body handler
_writeBody = _handle_text
@@ -190,25 +211,25 @@ class Generator:
subparts = msg.get_payload()
if subparts is None:
subparts = []
- elif isinstance(subparts, basestring):
+ elif isinstance(subparts, str):
# e.g. a non-strict parse of a message with no starting boundary.
- self._fp.write(subparts)
+ self.write(subparts)
return
elif not isinstance(subparts, list):
# Scalar payload
subparts = [subparts]
for part in subparts:
- s = StringIO()
+ s = self._new_buffer()
g = self.clone(s)
- g.flatten(part, unixfrom=False)
+ g.flatten(part, unixfrom=False, linesep=self._NL)
msgtexts.append(s.getvalue())
# BAW: What about boundaries that are wrapped in double-quotes?
boundary = msg.get_boundary()
if not boundary:
# Create a boundary that doesn't appear in any of the
# message texts.
- alltext = NL.join(msgtexts)
- boundary = _make_boundary(alltext)
+ alltext = self._encoded_NL.join(msgtexts)
+ boundary = self._make_boundary(alltext)
msg.set_boundary(boundary)
# If there's a preamble, write it out, with a trailing CRLF
if msg.preamble is not None:
@@ -216,9 +237,9 @@ class Generator:
preamble = fcre.sub('>From ', msg.preamble)
else:
preamble = msg.preamble
- print >> self._fp, preamble
+ self.write(preamble + self._NL)
# dash-boundary transport-padding CRLF
- print >> self._fp, '--' + boundary
+ self.write('--' + boundary + self._NL)
# body-part
if msgtexts:
self._fp.write(msgtexts.pop(0))
@@ -227,18 +248,18 @@ class Generator:
# --> CRLF body-part
for body_part in msgtexts:
# delimiter transport-padding CRLF
- print >> self._fp, '\n--' + boundary
+ self.write(self._NL + '--' + boundary + self._NL)
# body-part
self._fp.write(body_part)
# close-delimiter transport-padding
- self._fp.write('\n--' + boundary + '--')
+ self.write(self._NL + '--' + boundary + '--')
if msg.epilogue is not None:
- print >> self._fp
+ self.write(self._NL)
if self._mangle_from_:
epilogue = fcre.sub('>From ', msg.epilogue)
else:
epilogue = msg.epilogue
- self._fp.write(epilogue)
+ self.write(epilogue)
def _handle_multipart_signed(self, msg):
# The contents of signed parts has to stay unmodified in order to keep
@@ -257,23 +278,23 @@ class Generator:
# block and the boundary. Sigh.
blocks = []
for part in msg.get_payload():
- s = StringIO()
+ s = self._new_buffer()
g = self.clone(s)
- g.flatten(part, unixfrom=False)
+ g.flatten(part, unixfrom=False, linesep=self._NL)
text = s.getvalue()
- lines = text.split('\n')
+ lines = text.split(self._encoded_NL)
# Strip off the unnecessary trailing empty line
- if lines and lines[-1] == '':
- blocks.append(NL.join(lines[:-1]))
+ if lines and lines[-1] == self._encoded_EMPTY:
+ blocks.append(self._encoded_NL.join(lines[:-1]))
else:
blocks.append(text)
# Now join all the blocks with an empty line. This has the lovely
# effect of separating each block with an empty line, but not adding
# an extra one after the last one.
- self._fp.write(NL.join(blocks))
+ self._fp.write(self._encoded_NL.join(blocks))
def _handle_message(self, msg):
- s = StringIO()
+ s = self._new_buffer()
g = self.clone(s)
# The payload of a message/rfc822 part should be a multipart sequence
# of length 1. The zeroth element of the list should be the Message
@@ -286,10 +307,100 @@ class Generator:
# in that case we just emit the string body.
payload = msg.get_payload()
if isinstance(payload, list):
- g.flatten(msg.get_payload(0), unixfrom=False)
+ g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
payload = s.getvalue()
self._fp.write(payload)
+ # This used to be a module level function; we use a classmethod for this
+ # and _compile_re so we can continue to provide the module level function
+ # for backward compatibility by doing
+ # _make_boudary = Generator._make_boundary
+ # at the end of the module. It *is* internal, so we could drop that...
+ @classmethod
+ def _make_boundary(cls, text=None):
+ # Craft a random boundary. If text is given, ensure that the chosen
+ # boundary doesn't appear in the text.
+ token = random.randrange(sys.maxsize)
+ boundary = ('=' * 15) + (_fmt % token) + '=='
+ if text is None:
+ return boundary
+ b = boundary
+ counter = 0
+ while True:
+ cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
+ if not cre.search(text):
+ break
+ b = boundary + '.' + str(counter)
+ counter += 1
+ return b
+
+ @classmethod
+ def _compile_re(cls, s, flags):
+ return re.compile(s, flags)
+
+
+class BytesGenerator(Generator):
+ """Generates a bytes version of a Message object tree.
+
+ Functionally identical to the base Generator except that the output is
+ bytes and not string. When surrogates were used in the input to encode
+ bytes, these are decoded back to bytes for output.
+
+ The outfp object must accept bytes in its write method.
+ """
+
+ # Bytes versions of this constant for use in manipulating data from
+ # the BytesIO buffer.
+ _encoded_EMPTY = b''
+
+ def write(self, s):
+ self._fp.write(s.encode('ascii', 'surrogateescape'))
+
+ def _new_buffer(self):
+ return BytesIO()
+
+ def _encode(self, s):
+ return s.encode('ascii')
+
+ def _write_headers(self, msg):
+ # This is almost the same as the string version, except for handling
+ # strings with 8bit bytes.
+ for h, v in msg._headers:
+ self.write('%s: ' % h)
+ if isinstance(v, Header):
+ self.write(v.encode(maxlinelen=self._maxheaderlen)+self._NL)
+ elif _has_surrogates(v):
+ # If we have raw 8bit data in a byte string, we have no idea
+ # what the encoding is. There is no safe way to split this
+ # string. If it's ascii-subset, then we could do a normal
+ # ascii split, but if it's multibyte then we could break the
+ # string. There's no way to know so the least harm seems to
+ # be to not split the string and risk it being too long.
+ self.write(v+NL)
+ else:
+ # Header's got lots of smarts and this string is safe...
+ header = Header(v, maxlinelen=self._maxheaderlen,
+ header_name=h)
+ self.write(header.encode(linesep=self._NL)+self._NL)
+ # A blank line always separates headers from body
+ self.write(self._NL)
+
+ def _handle_text(self, msg):
+ # If the string has surrogates the original source was bytes, so
+ # just write it back out.
+ if msg._payload is None:
+ return
+ if _has_surrogates(msg._payload):
+ if self._mangle_from_:
+ msg._payload = fcre.sub(">From ", msg._payload)
+ self.write(msg._payload)
+ else:
+ super(BytesGenerator,self)._handle_text(msg)
+
+ @classmethod
+ def _compile_re(cls, s, flags):
+ return re.compile(s.encode('ascii'), flags)
+
_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
@@ -332,12 +443,12 @@ class DecodedGenerator(Generator):
for part in msg.walk():
maintype = part.get_content_maintype()
if maintype == 'text':
- print >> self, part.get_payload(decode=True)
+ print(part.get_payload(decode=False), file=self)
elif maintype == 'multipart':
# Just skip this
pass
else:
- print >> self, self._fmt % {
+ print(self._fmt % {
'type' : part.get_content_type(),
'maintype' : part.get_content_maintype(),
'subtype' : part.get_content_subtype(),
@@ -346,27 +457,13 @@ class DecodedGenerator(Generator):
'[no description]'),
'encoding' : part.get('Content-Transfer-Encoding',
'[no encoding]'),
- }
+ }, file=self)
-# Helper
-_width = len(repr(sys.maxint-1))
+# Helper used by Generator._make_boundary
+_width = len(repr(sys.maxsize-1))
_fmt = '%%0%dd' % _width
-def _make_boundary(text=None):
- # Craft a random boundary. If text is given, ensure that the chosen
- # boundary doesn't appear in the text.
- token = random.randrange(sys.maxint)
- boundary = ('=' * 15) + (_fmt % token) + '=='
- if text is None:
- return boundary
- b = boundary
- counter = 0
- while True:
- cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
- if not cre.search(text):
- break
- b = boundary + '.' + str(counter)
- counter += 1
- return b
+# Backward compatibility
+_make_boundary = Generator._make_boundary
diff --git a/Lib/email/header.py b/Lib/email/header.py
index 2cf870f..e33324a 100644
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2006 Python Software Foundation
+# Copyright (C) 2002-2007 Python Software Foundation
# Author: Ben Gertzfield, Barry Warsaw
# Contact: email-sig@python.org
@@ -17,15 +17,16 @@ import email.quoprimime
import email.base64mime
from email.errors import HeaderParseError
-from email.charset import Charset
+from email import charset as _charset
+Charset = _charset.Charset
NL = '\n'
SPACE = ' '
-USPACE = u' '
+BSPACE = b' '
SPACE8 = ' ' * 8
-UEMPTYSTRING = u''
-
-MAXLINELEN = 76
+EMPTYSTRING = ''
+MAXLINELEN = 78
+FWS = ' \t'
USASCII = Charset('us-ascii')
UTF8 = Charset('utf-8')
@@ -61,60 +62,82 @@ _max_append = email.quoprimime._max_append
def decode_header(header):
"""Decode a message header value without converting charset.
- Returns a list of (decoded_string, charset) pairs containing each of the
- decoded parts of the header. Charset is None for non-encoded parts of the
- header, otherwise a lower-case string containing the name of the character
- set specified in the encoded string.
+ Returns a list of (string, charset) pairs containing each of the decoded
+ parts of the header. Charset is None for non-encoded parts of the header,
+ otherwise a lower-case string containing the name of the character set
+ specified in the encoded string.
+
+ header may be a string that may or may not contain RFC2047 encoded words,
+ or it may be a Header object.
An email.errors.HeaderParseError may be raised when certain decoding error
occurs (e.g. a base64 decoding exception).
"""
- # If no encoding, just return the header
- header = str(header)
+ # If it is a Header object, we can just return the encoded chunks.
+ if hasattr(header, '_chunks'):
+ return [(_charset._encode(string, str(charset)), str(charset))
+ for string, charset in header._chunks]
+ # If no encoding, just return the header with no charset.
if not ecre.search(header):
return [(header, None)]
- decoded = []
- dec = ''
+ # First step is to parse all the encoded parts into triplets of the form
+ # (encoded_string, encoding, charset). For unencoded strings, the last
+ # two parts will be None.
+ words = []
for line in header.splitlines():
- # This line might not have an encoding in it
- if not ecre.search(line):
- decoded.append((line, None))
- continue
parts = ecre.split(line)
while parts:
- unenc = parts.pop(0).strip()
- if unenc:
- # Should we continue a long line?
- if decoded and decoded[-1][1] is None:
- decoded[-1] = (decoded[-1][0] + SPACE + unenc, None)
- else:
- decoded.append((unenc, None))
+ unencoded = parts.pop(0).strip()
+ if unencoded:
+ words.append((unencoded, None, None))
if parts:
- charset, encoding = [s.lower() for s in parts[0:2]]
- encoded = parts[2]
- dec = None
- if encoding == 'q':
- dec = email.quoprimime.header_decode(encoded)
- elif encoding == 'b':
- paderr = len(encoded) % 4 # Postel's law: add missing padding
- if paderr:
- encoded += '==='[:4 - paderr]
- try:
- dec = email.base64mime.decode(encoded)
- except binascii.Error:
- # Turn this into a higher level exception. BAW: Right
- # now we throw the lower level exception away but
- # when/if we get exception chaining, we'll preserve it.
- raise HeaderParseError
- if dec is None:
- dec = encoded
-
- if decoded and decoded[-1][1] == charset:
- decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1])
- else:
- decoded.append((dec, charset))
- del parts[0:3]
- return decoded
+ charset = parts.pop(0).lower()
+ encoding = parts.pop(0).lower()
+ encoded = parts.pop(0)
+ words.append((encoded, encoding, charset))
+ # The next step is to decode each encoded word by applying the reverse
+ # base64 or quopri transformation. decoded_words is now a list of the
+ # form (decoded_word, charset).
+ decoded_words = []
+ for encoded_string, encoding, charset in words:
+ if encoding is None:
+ # This is an unencoded word.
+ decoded_words.append((encoded_string, charset))
+ elif encoding == 'q':
+ word = email.quoprimime.header_decode(encoded_string)
+ decoded_words.append((word, charset))
+ elif encoding == 'b':
+ paderr = len(encoded_string) % 4 # Postel's law: add missing padding
+ if paderr:
+ encoded_string += '==='[:4 - paderr]
+ try:
+ word = email.base64mime.decode(encoded_string)
+ except binascii.Error:
+ raise HeaderParseError('Base64 decoding error')
+ else:
+ decoded_words.append((word, charset))
+ else:
+ raise AssertionError('Unexpected encoding: ' + encoding)
+ # Now convert all words to bytes and collapse consecutive runs of
+ # similarly encoded words.
+ collapsed = []
+ last_word = last_charset = None
+ for word, charset in decoded_words:
+ if isinstance(word, str):
+ word = bytes(word, 'raw-unicode-escape')
+ if last_word is None:
+ last_word = word
+ last_charset = charset
+ elif charset != last_charset:
+ collapsed.append((last_word, last_charset))
+ last_word = word
+ last_charset = charset
+ elif last_charset is None:
+ last_word += BSPACE + word
+ else:
+ last_word += word
+ collapsed.append((last_word, last_charset))
+ return collapsed
@@ -159,10 +182,11 @@ class Header:
charset is used both as s's initial charset and as the default for
subsequent .append() calls.
- The maximum line length can be specified explicit via maxlinelen. For
+ The maximum line length can be specified explicitly via maxlinelen. For
splitting the first line to a shorter value (to account for the field
header which isn't included in s, e.g. `Subject') pass in the name of
- the field in header_name. The default maxlinelen is 76.
+ the field in header_name. The default maxlinelen is 78 as recommended
+ by RFC 2822.
continuation_ws must be RFC 2822 compliant folding whitespace (usually
either a space or a hard tab) which will be prepended to continuation
@@ -172,60 +196,54 @@ class Header:
"""
if charset is None:
charset = USASCII
- if not isinstance(charset, Charset):
+ elif not isinstance(charset, Charset):
charset = Charset(charset)
self._charset = charset
self._continuation_ws = continuation_ws
- cws_expanded_len = len(continuation_ws.replace('\t', SPACE8))
- # BAW: I believe `chunks' and `maxlinelen' should be non-public.
self._chunks = []
if s is not None:
self.append(s, charset, errors)
if maxlinelen is None:
maxlinelen = MAXLINELEN
+ self._maxlinelen = maxlinelen
if header_name is None:
- # We don't know anything about the field header so the first line
- # is the same length as subsequent lines.
- self._firstlinelen = maxlinelen
+ self._headerlen = 0
else:
- # The first line should be shorter to take into account the field
- # header. Also subtract off 2 extra for the colon and space.
- self._firstlinelen = maxlinelen - len(header_name) - 2
- # Second and subsequent lines should subtract off the length in
- # columns of the continuation whitespace prefix.
- self._maxlinelen = maxlinelen - cws_expanded_len
+ # Take the separating colon and space into account.
+ self._headerlen = len(header_name) + 2
def __str__(self):
- """A synonym for self.encode()."""
- return self.encode()
-
- def __unicode__(self):
- """Helper for the built-in unicode function."""
+ """Return the string value of the header."""
+ self._normalize()
uchunks = []
lastcs = None
- for s, charset in self._chunks:
+ for string, charset in self._chunks:
# We must preserve spaces between encoded and non-encoded word
# boundaries, which means for us we need to add a space when we go
# from a charset to None/us-ascii, or from None/us-ascii to a
# charset. Only do this for the second and subsequent chunks.
nextcs = charset
+ if nextcs == _charset.UNKNOWN8BIT:
+ original_bytes = string.encode('ascii', 'surrogateescape')
+ string = original_bytes.decode('ascii', 'replace')
if uchunks:
if lastcs not in (None, 'us-ascii'):
if nextcs in (None, 'us-ascii'):
- uchunks.append(USPACE)
+ uchunks.append(SPACE)
nextcs = None
elif nextcs not in (None, 'us-ascii'):
- uchunks.append(USPACE)
+ uchunks.append(SPACE)
lastcs = nextcs
- uchunks.append(unicode(s, str(charset)))
- return UEMPTYSTRING.join(uchunks)
+ uchunks.append(string)
+ return EMPTYSTRING.join(uchunks)
# Rich comparison operators for equality only. BAW: does it make sense to
# have or explicitly disable <, <=, >, >= operators?
def __eq__(self, other):
# other may be a Header or a string. Both are fine so coerce
- # ourselves to a string, swap the args and do another comparison.
- return other == self.encode()
+ # ourselves to a unicode (of the unencoded header value), swap the
+ # args and do another comparison.
+ return other == str(self)
def __ne__(self, other):
return not self == other
@@ -239,140 +257,42 @@ class Header:
constructor is used.
s may be a byte string or a Unicode string. If it is a byte string
- (i.e. isinstance(s, str) is true), then charset is the encoding of
+ (i.e. isinstance(s, str) is false), then charset is the encoding of
that byte string, and a UnicodeError will be raised if the string
cannot be decoded with that charset. If s is a Unicode string, then
charset is a hint specifying the character set of the characters in
- the string. In this case, when producing an RFC 2822 compliant header
- using RFC 2047 rules, the Unicode string will be encoded using the
- following charsets in order: us-ascii, the charset hint, utf-8. The
- first character set not to provoke a UnicodeError is used.
+ the string. In either case, when producing an RFC 2822 compliant
+ header using RFC 2047 rules, the string will be encoded using the
+ output codec of the charset. If the string cannot be encoded to the
+ output codec, a UnicodeError will be raised.
- Optional `errors' is passed as the third argument to any unicode() or
- ustr.encode() call.
+ Optional `errors' is passed as the errors argument to the decode
+ call if s is a byte string.
"""
if charset is None:
charset = self._charset
elif not isinstance(charset, Charset):
charset = Charset(charset)
- # If the charset is our faux 8bit charset, leave the string unchanged
- if charset != '8bit':
- # We need to test that the string can be converted to unicode and
- # back to a byte string, given the input and output codecs of the
- # charset.
- if isinstance(s, str):
- # Possibly raise UnicodeError if the byte string can't be
- # converted to a unicode with the input codec of the charset.
- incodec = charset.input_codec or 'us-ascii'
- ustr = unicode(s, incodec, errors)
- # Now make sure that the unicode could be converted back to a
- # byte string with the output codec, which may be different
- # than the iput coded. Still, use the original byte string.
- outcodec = charset.output_codec or 'us-ascii'
- ustr.encode(outcodec, errors)
- elif isinstance(s, unicode):
- # Now we have to be sure the unicode string can be converted
- # to a byte string with a reasonable output codec. We want to
- # use the byte string in the chunk.
- for charset in USASCII, charset, UTF8:
- try:
- outcodec = charset.output_codec or 'us-ascii'
- s = s.encode(outcodec, errors)
- break
- except UnicodeError:
- pass
- else:
- assert False, 'utf-8 conversion failed'
- self._chunks.append((s, charset))
-
- def _split(self, s, charset, maxlinelen, splitchars):
- # Split up a header safely for use with encode_chunks.
- splittable = charset.to_splittable(s)
- encoded = charset.from_splittable(splittable, True)
- elen = charset.encoded_header_len(encoded)
- # If the line's encoded length first, just return it
- if elen <= maxlinelen:
- return [(encoded, charset)]
- # If we have undetermined raw 8bit characters sitting in a byte
- # string, we really don't know what the right thing to do is. We
- # can't really split it because it might be multibyte data which we
- # could break if we split it between pairs. The least harm seems to
- # be to not split the header at all, but that means they could go out
- # longer than maxlinelen.
- if charset == '8bit':
- return [(s, charset)]
- # BAW: I'm not sure what the right test here is. What we're trying to
- # do is be faithful to RFC 2822's recommendation that ($2.2.3):
- #
- # "Note: Though structured field bodies are defined in such a way that
- # folding can take place between many of the lexical tokens (and even
- # within some of the lexical tokens), folding SHOULD be limited to
- # placing the CRLF at higher-level syntactic breaks."
- #
- # For now, I can only imagine doing this when the charset is us-ascii,
- # although it's possible that other charsets may also benefit from the
- # higher-level syntactic breaks.
- elif charset == 'us-ascii':
- return self._split_ascii(s, charset, maxlinelen, splitchars)
- # BAW: should we use encoded?
- elif elen == len(s):
- # We can split on _maxlinelen boundaries because we know that the
- # encoding won't change the size of the string
- splitpnt = maxlinelen
- first = charset.from_splittable(splittable[:splitpnt], False)
- last = charset.from_splittable(splittable[splitpnt:], False)
- else:
- # Binary search for split point
- first, last = _binsplit(splittable, charset, maxlinelen)
- # first is of the proper length so just wrap it in the appropriate
- # chrome. last must be recursively split.
- fsplittable = charset.to_splittable(first)
- fencoded = charset.from_splittable(fsplittable, True)
- chunk = [(fencoded, charset)]
- return chunk + self._split(last, charset, self._maxlinelen, splitchars)
-
- def _split_ascii(self, s, charset, firstlen, splitchars):
- chunks = _split_ascii(s, firstlen, self._maxlinelen,
- self._continuation_ws, splitchars)
- return zip(chunks, [charset]*len(chunks))
-
- def _encode_chunks(self, newchunks, maxlinelen):
- # MIME-encode a header with many different charsets and/or encodings.
- #
- # Given a list of pairs (string, charset), return a MIME-encoded
- # string suitable for use in a header field. Each pair may have
- # different charsets and/or encodings, and the resulting header will
- # accurately reflect each setting.
- #
- # Each encoding can be email.utils.QP (quoted-printable, for
- # ASCII-like character sets like iso-8859-1), email.utils.BASE64
- # (Base64, for non-ASCII like character sets like KOI8-R and
- # iso-2022-jp), or None (no encoding).
- #
- # Each pair will be represented on a separate line; the resulting
- # string will be in the format:
- #
- # =?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
- # =?charset2?b?SvxyZ2VuIEL2aW5n?="
- chunks = []
- for header, charset in newchunks:
- if not header:
- continue
- if charset is None or charset.header_encoding is None:
- s = header
- else:
- s = charset.header_encode(header)
- # Don't add more folding whitespace than necessary
- if chunks and chunks[-1].endswith(' '):
- extra = ''
+ if not isinstance(s, str):
+ input_charset = charset.input_codec or 'us-ascii'
+ if input_charset == _charset.UNKNOWN8BIT:
+ s = s.decode('us-ascii', 'surrogateescape')
else:
- extra = ' '
- _max_append(chunks, s, maxlinelen, extra)
- joiner = NL + self._continuation_ws
- return joiner.join(chunks)
+ s = s.decode(input_charset, errors)
+ # Ensure that the bytes we're storing can be decoded to the output
+ # character set, otherwise an early error is raised.
+ output_charset = charset.output_codec or 'us-ascii'
+ if output_charset != _charset.UNKNOWN8BIT:
+ try:
+ s.encode(output_charset, errors)
+ except UnicodeEncodeError:
+ if output_charset!='us-ascii':
+ raise
+ charset = UTF8
+ self._chunks.append((s, charset))
- def encode(self, splitchars=';, '):
- """Encode a message header into an RFC-compliant format.
+ def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
+ r"""Encode a message header into an RFC-compliant format.
There are many issues involved in converting a given string for use in
an email header. Only certain character sets are readable in most
@@ -382,133 +302,245 @@ class Header:
75-character length limit on any given encoded header field, so
line-wrapping must be performed, even with double-byte character sets.
- This method will do its best to convert the string to the correct
- character set used in email, and encode and line wrap it safely with
- the appropriate scheme for that character set.
-
- If the given charset is not known or an error occurs during
- conversion, this function will return the header untouched.
-
- Optional splitchars is a string containing characters to split long
- ASCII lines on, in rough support of RFC 2822's `highest level
- syntactic breaks'. This doesn't affect RFC 2047 encoded lines.
+ Optional maxlinelen specifies the maximum length of each generated
+ line, exclusive of the linesep string. Individual lines may be longer
+ than maxlinelen if a folding point cannot be found. The first line
+ will be shorter by the length of the header name plus ": " if a header
+ name was specified at Header construction time. The default value for
+ maxlinelen is determined at header construction time.
+
+ Optional splitchars is a string containing characters which should be
+ given extra weight by the splitting algorithm during normal header
+ wrapping. This is in very rough support of RFC 2822's `higher level
+ syntactic breaks': split points preceded by a splitchar are preferred
+ during line splitting, with the characters preferred in the order in
+ which they appear in the string. Space and tab may be included in the
+ string to indicate whether preference should be given to one over the
+ other as a split point when other split chars do not appear in the line
+ being split. Splitchars does not affect RFC 2047 encoded lines.
+
+ Optional linesep is a string to be used to separate the lines of
+ the value. The default value is the most useful for typical
+ Python applications, but it can be set to \r\n to produce RFC-compliant
+ line separators when needed.
"""
- newchunks = []
- maxlinelen = self._firstlinelen
- lastlen = 0
- for s, charset in self._chunks:
- # The first bit of the next chunk should be just long enough to
- # fill the next line. Don't forget the space separating the
- # encoded words.
- targetlen = maxlinelen - lastlen - 1
- if targetlen < charset.encoded_header_len(''):
- # Stick it on the next line
- targetlen = maxlinelen
- newchunks += self._split(s, charset, targetlen, splitchars)
- lastchunk, lastcharset = newchunks[-1]
- lastlen = lastcharset.encoded_header_len(lastchunk)
- value = self._encode_chunks(newchunks, maxlinelen)
+ self._normalize()
+ if maxlinelen is None:
+ maxlinelen = self._maxlinelen
+ # A maxlinelen of 0 means don't wrap. For all practical purposes,
+ # choosing a huge number here accomplishes that and makes the
+ # _ValueFormatter algorithm much simpler.
+ if maxlinelen == 0:
+ maxlinelen = 1000000
+ formatter = _ValueFormatter(self._headerlen, maxlinelen,
+ self._continuation_ws, splitchars)
+ for string, charset in self._chunks:
+ lines = string.splitlines()
+ if lines:
+ formatter.feed('', lines[0], charset)
+ else:
+ formatter.feed('', '', charset)
+ for line in lines[1:]:
+ formatter.newline()
+ if charset.header_encoding is not None:
+ formatter.feed(self._continuation_ws, ' ' + line.lstrip(),
+ charset)
+ else:
+ sline = line.lstrip()
+ fws = line[:len(line)-len(sline)]
+ formatter.feed(fws, sline, charset)
+ if len(lines) > 1:
+ formatter.newline()
+ formatter.add_transition()
+ value = formatter._str(linesep)
if _embeded_header.search(value):
raise HeaderParseError("header value appears to contain "
"an embedded header: {!r}".format(value))
return value
+ def _normalize(self):
+ # Step 1: Normalize the chunks so that all runs of identical charsets
+ # get collapsed into a single unicode string.
+ chunks = []
+ last_charset = None
+ last_chunk = []
+ for string, charset in self._chunks:
+ if charset == last_charset:
+ last_chunk.append(string)
+ else:
+ if last_charset is not None:
+ chunks.append((SPACE.join(last_chunk), last_charset))
+ last_chunk = [string]
+ last_charset = charset
+ if last_chunk:
+ chunks.append((SPACE.join(last_chunk), last_charset))
+ self._chunks = chunks
+
-def _split_ascii(s, firstlen, restlen, continuation_ws, splitchars):
- lines = []
- maxlen = firstlen
- for line in s.splitlines():
- # Ignore any leading whitespace (i.e. continuation whitespace) already
- # on the line, since we'll be adding our own.
- line = line.lstrip()
- if len(line) < maxlen:
- lines.append(line)
- maxlen = restlen
- continue
- # Attempt to split the line at the highest-level syntactic break
- # possible. Note that we don't have a lot of smarts about field
+class _ValueFormatter:
+ def __init__(self, headerlen, maxlen, continuation_ws, splitchars):
+ self._maxlen = maxlen
+ self._continuation_ws = continuation_ws
+ self._continuation_ws_len = len(continuation_ws)
+ self._splitchars = splitchars
+ self._lines = []
+ self._current_line = _Accumulator(headerlen)
+
+ def _str(self, linesep):
+ self.newline()
+ return linesep.join(self._lines)
+
+ def __str__(self):
+ return self._str(NL)
+
+ def newline(self):
+ end_of_line = self._current_line.pop()
+ if end_of_line != (' ', ''):
+ self._current_line.push(*end_of_line)
+ if len(self._current_line) > 0:
+ if self._current_line.is_onlyws():
+ self._lines[-1] += str(self._current_line)
+ else:
+ self._lines.append(str(self._current_line))
+ self._current_line.reset()
+
+ def add_transition(self):
+ self._current_line.push(' ', '')
+
+ def feed(self, fws, string, charset):
+ # If the charset has no header encoding (i.e. it is an ASCII encoding)
+ # then we must split the header at the "highest level syntactic break"
+ # possible. Note that we don't have a lot of smarts about field
# syntax; we just try to break on semi-colons, then commas, then
- # whitespace.
- for ch in splitchars:
- if ch in line:
- break
- else:
- # There's nothing useful to split the line on, not even spaces, so
- # just append this line unchanged
- lines.append(line)
- maxlen = restlen
- continue
- # Now split the line on the character plus trailing whitespace
- cre = re.compile(r'%s\s*' % ch)
- if ch in ';,':
- eol = ch
+ # whitespace. Eventually, this should be pluggable.
+ if charset.header_encoding is None:
+ self._ascii_split(fws, string, self._splitchars)
+ return
+ # Otherwise, we're doing either a Base64 or a quoted-printable
+ # encoding which means we don't need to split the line on syntactic
+ # breaks. We can basically just find enough characters to fit on the
+ # current line, minus the RFC 2047 chrome. What makes this trickier
+ # though is that we have to split at octet boundaries, not character
+ # boundaries but it's only safe to split at character boundaries so at
+ # best we can only get close.
+ encoded_lines = charset.header_encode_lines(string, self._maxlengths())
+ # The first element extends the current line, but if it's None then
+ # nothing more fit on the current line so start a new line.
+ try:
+ first_line = encoded_lines.pop(0)
+ except IndexError:
+ # There are no encoded lines, so we're done.
+ return
+ if first_line is not None:
+ self._append_chunk(fws, first_line)
+ try:
+ last_line = encoded_lines.pop()
+ except IndexError:
+ # There was only one line.
+ return
+ self.newline()
+ self._current_line.push(self._continuation_ws, last_line)
+ # Everything else are full lines in themselves.
+ for line in encoded_lines:
+ self._lines.append(self._continuation_ws + line)
+
+ def _maxlengths(self):
+ # The first line's length.
+ yield self._maxlen - len(self._current_line)
+ while True:
+ yield self._maxlen - self._continuation_ws_len
+
+ def _ascii_split(self, fws, string, splitchars):
+ # The RFC 2822 header folding algorithm is simple in principle but
+ # complex in practice. Lines may be folded any place where "folding
+ # white space" appears by inserting a linesep character in front of the
+ # FWS. The complication is that not all spaces or tabs qualify as FWS,
+ # and we are also supposed to prefer to break at "higher level
+ # syntactic breaks". We can't do either of these without intimate
+ # knowledge of the structure of structured headers, which we don't have
+ # here. So the best we can do here is prefer to break at the specified
+ # splitchars, and hope that we don't choose any spaces or tabs that
+ # aren't legal FWS. (This is at least better than the old algorithm,
+ # where we would sometimes *introduce* FWS after a splitchar, or the
+ # algorithm before that, where we would turn all white space runs into
+ # single spaces or tabs.)
+ parts = re.split("(["+FWS+"]+)", fws+string)
+ if parts[0]:
+ parts[:0] = ['']
else:
- eol = ''
- joiner = eol + ' '
- joinlen = len(joiner)
- wslen = len(continuation_ws.replace('\t', SPACE8))
- this = []
- linelen = 0
- for part in cre.split(line):
- curlen = linelen + max(0, len(this)-1) * joinlen
- partlen = len(part)
- onfirstline = not lines
- # We don't want to split after the field name, if we're on the
- # first line and the field name is present in the header string.
- if ch == ' ' and onfirstline and \
- len(this) == 1 and fcre.match(this[0]):
- this.append(part)
- linelen += partlen
- elif curlen + partlen > maxlen:
- if this:
- lines.append(joiner.join(this) + eol)
- # If this part is longer than maxlen and we aren't already
- # splitting on whitespace, try to recursively split this line
- # on whitespace.
- if partlen > maxlen and ch != ' ':
- subl = _split_ascii(part, maxlen, restlen,
- continuation_ws, ' ')
- lines.extend(subl[:-1])
- this = [subl[-1]]
+ parts.pop(0)
+ for fws, part in zip(*[iter(parts)]*2):
+ self._append_chunk(fws, part)
+
+ def _append_chunk(self, fws, string):
+ self._current_line.push(fws, string)
+ if len(self._current_line) > self._maxlen:
+ # Find the best split point, working backward from the end.
+ # There might be none, on a long first line.
+ for ch in self._splitchars:
+ for i in range(self._current_line.part_count()-1, 0, -1):
+ if ch.isspace():
+ fws = self._current_line[i][0]
+ if fws and fws[0]==ch:
+ break
+ prevpart = self._current_line[i-1][1]
+ if prevpart and prevpart[-1]==ch:
+ break
else:
- this = [part]
- linelen = wslen + len(this[-1])
- maxlen = restlen
+ continue
+ break
else:
- this.append(part)
- linelen += partlen
- # Put any left over parts on a line by themselves
- if this:
- lines.append(joiner.join(this))
- return lines
+ fws, part = self._current_line.pop()
+ if self._current_line._initial_size > 0:
+ # There will be a header, so leave it on a line by itself.
+ self.newline()
+ if not fws:
+ # We don't use continuation_ws here because the whitespace
+ # after a header should always be a space.
+ fws = ' '
+ self._current_line.push(fws, part)
+ return
+ remainder = self._current_line.pop_from(i)
+ self._lines.append(str(self._current_line))
+ self._current_line.reset(remainder)
+
+
+class _Accumulator(list):
+
+ def __init__(self, initial_size=0):
+ self._initial_size = initial_size
+ super().__init__()
+
+ def push(self, fws, string):
+ self.append((fws, string))
+
+ def pop_from(self, i=0):
+ popped = self[i:]
+ self[i:] = []
+ return popped
+
+ def pop(self):
+ if self.part_count()==0:
+ return ('', '')
+ return super().pop()
+
+ def __len__(self):
+ return sum((len(fws)+len(part) for fws, part in self),
+ self._initial_size)
+ def __str__(self):
+ return EMPTYSTRING.join((EMPTYSTRING.join((fws, part))
+ for fws, part in self))
-
-def _binsplit(splittable, charset, maxlinelen):
- i = 0
- j = len(splittable)
- while i < j:
- # Invariants:
- # 1. splittable[:k] fits for all k <= i (note that we *assume*,
- # at the start, that splittable[:0] fits).
- # 2. splittable[:k] does not fit for any k > j (at the start,
- # this means we shouldn't look at any k > len(splittable)).
- # 3. We don't know about splittable[:k] for k in i+1..j.
- # 4. We want to set i to the largest k that fits, with i <= k <= j.
- #
- m = (i+j+1) >> 1 # ceiling((i+j)/2); i < m <= j
- chunk = charset.from_splittable(splittable[:m], True)
- chunklen = charset.encoded_header_len(chunk)
- if chunklen <= maxlinelen:
- # m is acceptable, so is a new lower bound.
- i = m
- else:
- # m is not acceptable, so final i must be < m.
- j = m - 1
- # i == j. Invariant #1 implies that splittable[:i] fits, and
- # invariant #2 implies that splittable[:i+1] does not fit, so i
- # is what we're looking for.
- first = charset.from_splittable(splittable[:i], False)
- last = charset.from_splittable(splittable[i:], False)
- return first, last
+ def reset(self, startval=None):
+ if startval is None:
+ startval = []
+ self[:] = startval
+ self._initial_size = 0
+
+ def is_onlyws(self):
+ return self._initial_size==0 and (not self or str(self).isspace())
+
+ def part_count(self):
+ return super().__len__()
diff --git a/Lib/email/iterators.py b/Lib/email/iterators.py
index e99f228..3adc4a0 100644
--- a/Lib/email/iterators.py
+++ b/Lib/email/iterators.py
@@ -12,7 +12,7 @@ __all__ = [
]
import sys
-from cStringIO import StringIO
+from io import StringIO
@@ -39,7 +39,7 @@ def body_line_iterator(msg, decode=False):
"""
for subpart in msg.walk():
payload = subpart.get_payload(decode=decode)
- if isinstance(payload, basestring):
+ if isinstance(payload, str):
for line in StringIO(payload):
yield line
@@ -63,11 +63,11 @@ def _structure(msg, fp=None, level=0, include_default=False):
if fp is None:
fp = sys.stdout
tab = ' ' * (level * 4)
- print >> fp, tab + msg.get_content_type(),
+ print(tab + msg.get_content_type(), end='', file=fp)
if include_default:
- print >> fp, '[%s]' % msg.get_default_type()
+ print(' [%s]' % msg.get_default_type(), file=fp)
else:
- print >> fp
+ print(file=fp)
if msg.is_multipart():
for subpart in msg.get_payload():
_structure(subpart, fp, level+1, include_default)
diff --git a/Lib/email/message.py b/Lib/email/message.py
index 7c93370..f1ffcdb 100644
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2001-2006 Python Software Foundation
+# Copyright (C) 2001-2007 Python Software Foundation
# Author: Barry Warsaw
# Contact: email-sig@python.org
@@ -8,14 +8,17 @@ __all__ = ['Message']
import re
import uu
+import base64
import binascii
import warnings
-from cStringIO import StringIO
+from io import BytesIO, StringIO
# Intrapackage imports
-import email.charset
from email import utils
from email import errors
+from email import header
+from email import charset as _charset
+Charset = _charset.Charset
SEMISPACE = '; '
@@ -23,14 +26,31 @@ SEMISPACE = '; '
# existence of which force quoting of the parameter value.
tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
+# How to figure out if we are processing strings that come from a byte
+# source with undecodable characters.
+_has_surrogates = re.compile(
+ '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
+
# Helper functions
+def _sanitize_header(name, value):
+ # If the header value contains surrogates, return a Header using
+ # the unknown-8bit charset to encode the bytes as encoded words.
+ if not isinstance(value, str):
+ # Assume it is already a header object
+ return value
+ if _has_surrogates(value):
+ return header.Header(value, charset=_charset.UNKNOWN8BIT,
+ header_name=name)
+ else:
+ return value
+
def _splitparam(param):
# Split header parameters. BAW: this may be too simple. It isn't
# strictly RFC 2045 (section 5.1) compliant, but it catches most headers
- # found in the wild. We may eventually need a full fledged parser
- # eventually.
- a, sep, b = param.partition(';')
+ # found in the wild. We may eventually need a full fledged parser.
+ # RDM: we might have a Header here; for now just stringify it.
+ a, sep, b = str(param).partition(';')
if not sep:
return a.strip(), None
return a.strip(), b.strip()
@@ -40,16 +60,26 @@ def _formatparam(param, value=None, quote=True):
This will quote the value if needed or if quote is true. If value is a
three tuple (charset, language, value), it will be encoded according
- to RFC2231 rules.
+ to RFC2231 rules. If it contains non-ascii characters it will likewise
+ be encoded according to RFC2231 rules, using the utf-8 charset and
+ a null language.
"""
if value is not None and len(value) > 0:
# A tuple is used for RFC 2231 encoded parameter values where items
# are (charset, language, value). charset is a string, not a Charset
- # instance.
+ # instance. RFC 2231 encoded values are never quoted, per RFC.
if isinstance(value, tuple):
# Encode as per RFC 2231
param += '*'
value = utils.encode_rfc2231(value[2], value[0], value[1])
+ return '%s=%s' % (param, value)
+ else:
+ try:
+ value.encode('ascii')
+ except UnicodeEncodeError:
+ param += '*'
+ value = utils.encode_rfc2231(value, 'utf-8', '')
+ return '%s=%s' % (param, value)
# BAW: Please check this. I think that if quote is set it should
# force quoting even if not necessary.
if quote or tspecials.search(value):
@@ -60,6 +90,8 @@ def _formatparam(param, value=None, quote=True):
return param
def _parseparam(s):
+ # RDM This might be a Header, so for now stringify it.
+ s = ';' + str(s)
plist = []
while s[:1] == ';':
s = s[1:]
@@ -119,21 +151,20 @@ class Message:
"""Return the entire formatted message as a string.
This includes the headers, body, and envelope header.
"""
- return self.as_string(unixfrom=True)
+ return self.as_string()
- def as_string(self, unixfrom=False):
+ def as_string(self, unixfrom=False, maxheaderlen=0):
"""Return the entire formatted message as a string.
Optional `unixfrom' when True, means include the Unix From_ envelope
header.
This is a convenience method and may not generate the message exactly
- as you intend because by default it mangles lines that begin with
- "From ". For more flexibility, use the flatten() method of a
+ as you intend. For more flexibility, use the flatten() method of a
Generator instance.
"""
from email.generator import Generator
fp = StringIO()
- g = Generator(fp)
+ g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
g.flatten(self, unixfrom=unixfrom)
return fp.getvalue()
@@ -185,34 +216,73 @@ class Message:
If the message is a multipart and the decode flag is True, then None
is returned.
"""
- if i is None:
- payload = self._payload
- elif not isinstance(self._payload, list):
- raise TypeError('Expected list, got %s' % type(self._payload))
- else:
- payload = self._payload[i]
- if decode:
- if self.is_multipart():
+ # Here is the logic table for this code, based on the email5.0.0 code:
+ # i decode is_multipart result
+ # ------ ------ ------------ ------------------------------
+ # None True True None
+ # i True True None
+ # None False True _payload (a list)
+ # i False True _payload element i (a Message)
+ # i False False error (not a list)
+ # i True False error (not a list)
+ # None False False _payload
+ # None True False _payload decoded (bytes)
+ # Note that Barry planned to factor out the 'decode' case, but that
+ # isn't so easy now that we handle the 8 bit data, which needs to be
+ # converted in both the decode and non-decode path.
+ if self.is_multipart():
+ if decode:
return None
- cte = self.get('content-transfer-encoding', '').lower()
- if cte == 'quoted-printable':
- return utils._qdecode(payload)
- elif cte == 'base64':
- try:
- return utils._bdecode(payload)
- except binascii.Error:
- # Incorrect padding
- return payload
- elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
- sfp = StringIO()
+ if i is None:
+ return self._payload
+ else:
+ return self._payload[i]
+ # For backward compatibility, Use isinstance and this error message
+ # instead of the more logical is_multipart test.
+ if i is not None and not isinstance(self._payload, list):
+ raise TypeError('Expected list, got %s' % type(self._payload))
+ payload = self._payload
+ # cte might be a Header, so for now stringify it.
+ cte = str(self.get('content-transfer-encoding', '')).lower()
+ # payload may be bytes here.
+ if isinstance(payload, str):
+ if _has_surrogates(payload):
+ bpayload = payload.encode('ascii', 'surrogateescape')
+ if not decode:
+ try:
+ payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
+ except LookupError:
+ payload = bpayload.decode('ascii', 'replace')
+ elif decode:
try:
- uu.decode(StringIO(payload+'\n'), sfp, quiet=True)
- payload = sfp.getvalue()
- except uu.Error:
- # Some decoding problem
- return payload
- # Everything else, including encodings with 8bit or 7bit are returned
- # unchanged.
+ bpayload = payload.encode('ascii')
+ except UnicodeError:
+ # This won't happen for RFC compliant messages (messages
+ # containing only ASCII codepoints in the unicode input).
+ # If it does happen, turn the string into bytes in a way
+ # guaranteed not to fail.
+ bpayload = payload.encode('raw-unicode-escape')
+ if not decode:
+ return payload
+ if cte == 'quoted-printable':
+ return utils._qdecode(bpayload)
+ elif cte == 'base64':
+ try:
+ return base64.b64decode(bpayload)
+ except binascii.Error:
+ # Incorrect padding
+ return bpayload
+ elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
+ in_file = BytesIO(bpayload)
+ out_file = BytesIO()
+ try:
+ uu.decode(in_file, out_file, quiet=True)
+ return out_file.getvalue()
+ except uu.Error:
+ # Some decoding problem
+ return bpayload
+ if isinstance(payload, str):
+ return bpayload
return payload
def set_payload(self, payload, charset=None):
@@ -238,18 +308,13 @@ class Message:
and encoded properly, if needed, when generating the plain text
representation of the message. MIME headers (MIME-Version,
Content-Type, Content-Transfer-Encoding) will be added as needed.
-
"""
if charset is None:
self.del_param('charset')
self._charset = None
return
- if isinstance(charset, basestring):
- charset = email.charset.Charset(charset)
- if not isinstance(charset, email.charset.Charset):
- raise TypeError(charset)
- # BAW: should we accept strings that can serve as arguments to the
- # Charset constructor?
+ if not isinstance(charset, Charset):
+ charset = Charset(charset)
self._charset = charset
if 'MIME-Version' not in self:
self.add_header('MIME-Version', '1.0')
@@ -258,9 +323,7 @@ class Message:
charset=charset.get_output_charset())
else:
self.set_param('charset', charset.get_output_charset())
- if isinstance(self._payload, unicode):
- self._payload = self._payload.encode(charset.output_charset)
- if str(charset) != charset.get_output_charset():
+ if charset != charset.get_output_charset():
self._payload = charset.body_encode(self._payload)
if 'Content-Transfer-Encoding' not in self:
cte = charset.get_body_encoding()
@@ -316,10 +379,9 @@ class Message:
def __contains__(self, name):
return name.lower() in [k.lower() for k, v in self._headers]
- def has_key(self, name):
- """Return true if the message contains the header."""
- missing = object()
- return self.get(name, missing) is not missing
+ def __iter__(self):
+ for field, value in self._headers:
+ yield field
def keys(self):
"""Return a list of all the message's header field names.
@@ -339,7 +401,7 @@ class Message:
Any fields deleted and re-inserted are always appended to the header
list.
"""
- return [v for k, v in self._headers]
+ return [_sanitize_header(k, v) for k, v in self._headers]
def items(self):
"""Get all the message's header fields and values.
@@ -349,7 +411,7 @@ class Message:
Any fields deleted and re-inserted are always appended to the header
list.
"""
- return self._headers[:]
+ return [(k, _sanitize_header(k, v)) for k, v in self._headers]
def get(self, name, failobj=None):
"""Get a header value.
@@ -360,7 +422,7 @@ class Message:
name = name.lower()
for k, v in self._headers:
if k.lower() == name:
- return v
+ return _sanitize_header(k, v)
return failobj
#
@@ -380,7 +442,7 @@ class Message:
name = name.lower()
for k, v in self._headers:
if k.lower() == name:
- values.append(v)
+ values.append(_sanitize_header(k, v))
if not values:
return failobj
return values
@@ -392,13 +454,18 @@ class Message:
additional parameters for the header field, with underscores converted
to dashes. Normally the parameter will be added as key="value" unless
value is None, in which case only the key will be added. If a
- parameter value contains non-ASCII characters it must be specified as a
+ parameter value contains non-ASCII characters it can be specified as a
three-tuple of (charset, language, value), in which case it will be
- encoded according to RFC2231 rules.
+ encoded according to RFC2231 rules. Otherwise it will be encoded using
+ the utf-8 charset and a language of ''.
- Example:
+ Examples:
msg.add_header('content-disposition', 'attachment', filename='bud.gif')
+ msg.add_header('content-disposition', 'attachment',
+ filename=('utf-8', '', Fußballer.ppt'))
+ msg.add_header('content-disposition', 'attachment',
+ filename='Fußballer.ppt'))
"""
parts = []
for k, v in _params.items():
@@ -497,7 +564,7 @@ class Message:
if value is missing:
return failobj
params = []
- for p in _parseparam(';' + value):
+ for p in _parseparam(value):
try:
name, val = p.split('=', 1)
name = name.strip()
@@ -546,17 +613,15 @@ class Message:
the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
LANGUAGE can be None, in which case you should consider VALUE to be
encoded in the us-ascii charset. You can usually ignore LANGUAGE.
+ The parameter value (either the returned string, or the VALUE item in
+ the 3-tuple) is always unquoted, unless unquote is set to False.
- Your application should be prepared to deal with 3-tuple return
- values, and can convert the parameter to a Unicode string like so:
+ If your application doesn't care whether the parameter was RFC 2231
+ encoded, it can turn the return value into a string as follows:
param = msg.get_param('foo')
- if isinstance(param, tuple):
- param = unicode(param[2], param[0] or 'us-ascii')
+ param = email.utils.collapse_rfc2231_value(rawparam)
- In any case, the parameter value (either the returned string, or the
- VALUE item in the 3-tuple) is always unquoted, unless unquote is set
- to False.
"""
if header not in self:
return failobj
@@ -762,14 +827,13 @@ class Message:
# LookupError will be raised if the charset isn't known to
# Python. UnicodeError will be raised if the encoded text
# contains a character not in the charset.
- charset = unicode(charset[2], pcharset).encode('us-ascii')
+ as_bytes = charset[2].encode('raw-unicode-escape')
+ charset = str(as_bytes, pcharset)
except (LookupError, UnicodeError):
charset = charset[2]
- # charset character must be in us-ascii range
+ # charset characters must be in us-ascii range
try:
- if isinstance(charset, str):
- charset = unicode(charset, 'us-ascii')
- charset = charset.encode('us-ascii')
+ charset.encode('us-ascii')
except UnicodeError:
return failobj
# RFC 2046, $4.1.2 says charsets are not case sensitive
diff --git a/Lib/email/mime/audio.py b/Lib/email/mime/audio.py
index c7290c4..fbc1189 100644
--- a/Lib/email/mime/audio.py
+++ b/Lib/email/mime/audio.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2001-2006 Python Software Foundation
+# Copyright (C) 2001-2007 Python Software Foundation
# Author: Anthony Baxter
# Contact: email-sig@python.org
@@ -8,7 +8,7 @@ __all__ = ['MIMEAudio']
import sndhdr
-from cStringIO import StringIO
+from io import BytesIO
from email import encoders
from email.mime.nonmultipart import MIMENonMultipart
@@ -30,7 +30,7 @@ def _whatsnd(data):
command and use the standard 'magic' file, as shipped with a modern Unix.
"""
hdr = data[:512]
- fakefile = StringIO(hdr)
+ fakefile = BytesIO(hdr)
for testfn in sndhdr.tests:
res = testfn(hdr, fakefile)
if res is not None:
diff --git a/Lib/email/parser.py b/Lib/email/parser.py
index 2fcaf25..1c931ea 100644
--- a/Lib/email/parser.py
+++ b/Lib/email/parser.py
@@ -1,13 +1,13 @@
-# Copyright (C) 2001-2006 Python Software Foundation
+# Copyright (C) 2001-2007 Python Software Foundation
# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
# Contact: email-sig@python.org
"""A parser of RFC 2822 and MIME email messages."""
-__all__ = ['Parser', 'HeaderParser']
+__all__ = ['Parser', 'HeaderParser', 'BytesParser']
import warnings
-from cStringIO import StringIO
+from io import StringIO, TextIOWrapper
from email.feedparser import FeedParser
from email.message import Message
@@ -89,3 +89,48 @@ class HeaderParser(Parser):
def parsestr(self, text, headersonly=True):
return Parser.parsestr(self, text, True)
+
+
+class BytesParser:
+
+ def __init__(self, *args, **kw):
+ """Parser of binary RFC 2822 and MIME email messages.
+
+ Creates an in-memory object tree representing the email message, which
+ can then be manipulated and turned over to a Generator to return the
+ textual representation of the message.
+
+ The input must be formatted as a block of RFC 2822 headers and header
+ continuation lines, optionally preceeded by a `Unix-from' header. The
+ header block is terminated either by the end of the input or by a
+ blank line.
+
+ _class is the class to instantiate for new message objects when they
+ must be created. This class must have a constructor that can take
+ zero arguments. Default is Message.Message.
+ """
+ self.parser = Parser(*args, **kw)
+
+ def parse(self, fp, headersonly=False):
+ """Create a message structure from the data in a binary file.
+
+ Reads all the data from the file and returns the root of the message
+ structure. Optional headersonly is a flag specifying whether to stop
+ parsing after reading the headers or not. The default is False,
+ meaning it parses the entire contents of the file.
+ """
+ fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
+ with fp:
+ return self.parser.parse(fp, headersonly)
+
+
+ def parsebytes(self, text, headersonly=False):
+ """Create a message structure from a byte string.
+
+ Returns the root of the message structure. Optional headersonly is a
+ flag specifying whether to stop parsing after reading the headers or
+ not. The default is False, meaning it parses the entire contents of
+ the file.
+ """
+ text = text.decode('ASCII', errors='surrogateescape')
+ return self.parser.parsestr(text, headersonly)
diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py
index 0c18a9e..78638d5 100644
--- a/Lib/email/quoprimime.py
+++ b/Lib/email/quoprimime.py
@@ -29,70 +29,84 @@ wrapping issues, use the email.header module.
__all__ = [
'body_decode',
'body_encode',
- 'body_quopri_check',
- 'body_quopri_len',
+ 'body_length',
'decode',
'decodestring',
- 'encode',
- 'encodestring',
'header_decode',
'header_encode',
- 'header_quopri_check',
- 'header_quopri_len',
+ 'header_length',
'quote',
'unquote',
]
import re
+import io
-from string import hexdigits
-from email.utils import fix_eols
+from string import ascii_letters, digits, hexdigits
CRLF = '\r\n'
NL = '\n'
+EMPTYSTRING = ''
-# See also Charset.py
-MISC_LEN = 7
+# Build a mapping of octets to the expansion of that octet. Since we're only
+# going to have 256 of these things, this isn't terribly inefficient
+# space-wise. Remember that headers and bodies have different sets of safe
+# characters. Initialize both maps with the full expansion, and then override
+# the safe bytes with the more compact form.
+_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256))
+_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy()
+
+# Safe header bytes which need no encoding.
+for c in b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii'):
+ _QUOPRI_HEADER_MAP[c] = chr(c)
+# Headers have one other special encoding; spaces become underscores.
+_QUOPRI_HEADER_MAP[ord(' ')] = '_'
+
+# Safe body bytes which need no encoding.
+for c in (b' !"#$%&\'()*+,-./0123456789:;<>'
+ b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`'
+ b'abcdefghijklmnopqrstuvwxyz{|}~\t'):
+ _QUOPRI_BODY_MAP[c] = chr(c)
-hqre = re.compile(r'[^-a-zA-Z0-9!*+/ ]')
-bqre = re.compile(r'[^ !-<>-~\t]')
-
# Helpers
-def header_quopri_check(c):
- """Return True if the character should be escaped with header quopri."""
- return bool(hqre.match(c))
+def header_check(octet):
+ """Return True if the octet should be escaped with header quopri."""
+ return chr(octet) != _QUOPRI_HEADER_MAP[octet]
-def body_quopri_check(c):
- """Return True if the character should be escaped with body quopri."""
- return bool(bqre.match(c))
+def body_check(octet):
+ """Return True if the octet should be escaped with body quopri."""
+ return chr(octet) != _QUOPRI_BODY_MAP[octet]
-def header_quopri_len(s):
- """Return the length of str when it is encoded with header quopri."""
- count = 0
- for c in s:
- if hqre.match(c):
- count += 3
- else:
- count += 1
- return count
+def header_length(bytearray):
+ """Return a header quoted-printable encoding length.
+ Note that this does not include any RFC 2047 chrome added by
+ `header_encode()`.
+
+ :param bytearray: An array of bytes (a.k.a. octets).
+ :return: The length in bytes of the byte array when it is encoded with
+ quoted-printable for headers.
+ """
+ return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray)
-def body_quopri_len(str):
- """Return the length of str when it is encoded with body quopri."""
- count = 0
- for c in str:
- if bqre.match(c):
- count += 3
- else:
- count += 1
- return count
+
+def body_length(bytearray):
+ """Return a body quoted-printable encoding length.
+
+ :param bytearray: An array of bytes (a.k.a. octets).
+ :return: The length in bytes of the byte array when it is encoded with
+ quoted-printable for bodies.
+ """
+ return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray)
def _max_append(L, s, maxlen, extra=''):
+ if not isinstance(s, str):
+ s = chr(s)
if not L:
L.append(s.lstrip())
elif len(L[-1]) + len(s) <= maxlen:
@@ -107,12 +121,11 @@ def unquote(s):
def quote(c):
- return "=%02X" % ord(c)
+ return '=%02X' % ord(c)
+
-
-def header_encode(header, charset="iso-8859-1", keep_eols=False,
- maxlinelen=76, eol=NL):
+def header_encode(header_bytes, charset='iso-8859-1'):
"""Encode a single header line with quoted-printable (like) encoding.
Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but
@@ -120,149 +133,122 @@ def header_encode(header, charset="iso-8859-1", keep_eols=False,
bit characters (and some 8 bit) to remain more or less readable in non-RFC
2045 aware mail clients.
- charset names the character set to use to encode the header. It defaults
- to iso-8859-1.
-
- The resulting string will be in the form:
-
- "=?charset?q?I_f=E2rt_in_your_g=E8n=E8ral_dire=E7tion?\\n
- =?charset?q?Silly_=C8nglish_Kn=EEghts?="
-
- with each line wrapped safely at, at most, maxlinelen characters (defaults
- to 76 characters). If maxlinelen is None, the entire string is encoded in
- one chunk with no splitting.
-
- End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
- to the canonical email line separator \\r\\n unless the keep_eols
- parameter is True (the default is False).
-
- Each line of the header will be terminated in the value of eol, which
- defaults to "\\n". Set this to "\\r\\n" if you are using the result of
- this function directly in email.
+ charset names the character set to use in the RFC 2046 header. It
+ defaults to iso-8859-1.
"""
- # Return empty headers unchanged
- if not header:
- return header
-
- if not keep_eols:
- header = fix_eols(header)
-
- # Quopri encode each line, in encoded chunks no greater than maxlinelen in
- # length, after the RFC chrome is added in.
- quoted = []
- if maxlinelen is None:
- # An obnoxiously large number that's good enough
- max_encoded = 100000
- else:
- max_encoded = maxlinelen - len(charset) - MISC_LEN - 1
-
- for c in header:
- # Space may be represented as _ instead of =20 for readability
- if c == ' ':
- _max_append(quoted, '_', max_encoded)
- # These characters can be included verbatim
- elif not hqre.match(c):
- _max_append(quoted, c, max_encoded)
- # Otherwise, replace with hex value like =E2
- else:
- _max_append(quoted, "=%02X" % ord(c), max_encoded)
-
+ # Return empty headers as an empty string.
+ if not header_bytes:
+ return ''
+ # Iterate over every byte, encoding if necessary.
+ encoded = []
+ for octet in header_bytes:
+ encoded.append(_QUOPRI_HEADER_MAP[octet])
# Now add the RFC chrome to each encoded chunk and glue the chunks
- # together. BAW: should we be able to specify the leading whitespace in
- # the joiner?
- joiner = eol + ' '
- return joiner.join(['=?%s?q?%s?=' % (charset, line) for line in quoted])
+ # together.
+ return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded))
+
+
+class _body_accumulator(io.StringIO):
+
+ def __init__(self, maxlinelen, eol, *args, **kw):
+ super().__init__(*args, **kw)
+ self.eol = eol
+ self.maxlinelen = self.room = maxlinelen
+
+ def write_str(self, s):
+ """Add string s to the accumulated body."""
+ self.write(s)
+ self.room -= len(s)
+
+ def newline(self):
+ """Write eol, then start new line."""
+ self.write_str(self.eol)
+ self.room = self.maxlinelen
+
+ def write_soft_break(self):
+ """Write a soft break, then start a new line."""
+ self.write_str('=')
+ self.newline()
+
+ def write_wrapped(self, s, extra_room=0):
+ """Add a soft line break if needed, then write s."""
+ if self.room < len(s) + extra_room:
+ self.write_soft_break()
+ self.write_str(s)
+
+ def write_char(self, c, is_last_char):
+ if not is_last_char:
+ # Another character follows on this line, so we must leave
+ # extra room, either for it or a soft break, and whitespace
+ # need not be quoted.
+ self.write_wrapped(c, extra_room=1)
+ elif c not in ' \t':
+ # For this and remaining cases, no more characters follow,
+ # so there is no need to reserve extra room (since a hard
+ # break will immediately follow).
+ self.write_wrapped(c)
+ elif self.room >= 3:
+ # It's a whitespace character at end-of-line, and we have room
+ # for the three-character quoted encoding.
+ self.write(quote(c))
+ elif self.room == 2:
+ # There's room for the whitespace character and a soft break.
+ self.write(c)
+ self.write_soft_break()
+ else:
+ # There's room only for a soft break. The quoted whitespace
+ # will be the only content on the subsequent line.
+ self.write_soft_break()
+ self.write(quote(c))
-
-def encode(body, binary=False, maxlinelen=76, eol=NL):
+def body_encode(body, maxlinelen=76, eol=NL):
"""Encode with quoted-printable, wrapping at maxlinelen characters.
- If binary is False (the default), end-of-line characters will be converted
- to the canonical email end-of-line sequence \\r\\n. Otherwise they will
- be left verbatim.
-
Each line of encoded text will end with eol, which defaults to "\\n". Set
this to "\\r\\n" if you will be using the result of this function directly
in an email.
- Each line will be wrapped at, at most, maxlinelen characters (defaults to
- 76 characters). Long lines will have the `soft linefeed' quoted-printable
- character "=" appended to them, so the decoded text will be identical to
- the original text.
+ Each line will be wrapped at, at most, maxlinelen characters before the
+ eol string (maxlinelen defaults to 76 characters, the maximum value
+ permitted by RFC 2045). Long lines will have the 'soft line break'
+ quoted-printable character "=" appended to them, so the decoded text will
+ be identical to the original text.
+
+ The minimum maxlinelen is 4 to have room for a quoted character ("=XX")
+ followed by a soft line break. Smaller values will generate a
+ ValueError.
+
"""
+
+ if maxlinelen < 4:
+ raise ValueError("maxlinelen must be at least 4")
if not body:
return body
- if not binary:
- body = fix_eols(body)
-
- # BAW: We're accumulating the body text by string concatenation. That
- # can't be very efficient, but I don't have time now to rewrite it. It
- # just feels like this algorithm could be more efficient.
- encoded_body = ''
- lineno = -1
- # Preserve line endings here so we can check later to see an eol needs to
- # be added to the output later.
- lines = body.splitlines(1)
- for line in lines:
- # But strip off line-endings for processing this line.
- if line.endswith(CRLF):
- line = line[:-2]
- elif line[-1] in CRLF:
- line = line[:-1]
-
- lineno += 1
- encoded_line = ''
- prev = None
- linelen = len(line)
- # Now we need to examine every character to see if it needs to be
- # quopri encoded. BAW: again, string concatenation is inefficient.
- for j in range(linelen):
- c = line[j]
- prev = c
- if bqre.match(c):
+ # The last line may or may not end in eol, but all other lines do.
+ last_has_eol = (body[-1] in '\r\n')
+
+ # This accumulator will make it easier to build the encoded body.
+ encoded_body = _body_accumulator(maxlinelen, eol)
+
+ lines = body.splitlines()
+ last_line_no = len(lines) - 1
+ for line_no, line in enumerate(lines):
+ last_char_index = len(line) - 1
+ for i, c in enumerate(line):
+ if body_check(ord(c)):
c = quote(c)
- elif j+1 == linelen:
- # Check for whitespace at end of line; special case
- if c not in ' \t':
- encoded_line += c
- prev = c
- continue
- # Check to see to see if the line has reached its maximum length
- if len(encoded_line) + len(c) >= maxlinelen:
- encoded_body += encoded_line + '=' + eol
- encoded_line = ''
- encoded_line += c
- # Now at end of line..
- if prev and prev in ' \t':
- # Special case for whitespace at end of file
- if lineno + 1 == len(lines):
- prev = quote(prev)
- if len(encoded_line) + len(prev) > maxlinelen:
- encoded_body += encoded_line + '=' + eol + prev
- else:
- encoded_body += encoded_line + prev
- # Just normal whitespace at end of line
- else:
- encoded_body += encoded_line + prev + '=' + eol
- encoded_line = ''
- # Now look at the line we just finished and it has a line ending, we
- # need to add eol to the end of the line.
- if lines[lineno].endswith(CRLF) or lines[lineno][-1] in CRLF:
- encoded_body += encoded_line + eol
- else:
- encoded_body += encoded_line
- encoded_line = ''
- return encoded_body
+ encoded_body.write_char(c, i==last_char_index)
+ # Add an eol if input line had eol. All input lines have eol except
+ # possibly the last one.
+ if line_no < last_line_no or last_has_eol:
+ encoded_body.newline()
+ return encoded_body.getvalue()
-# For convenience and backwards compatibility w/ standard base64 module
-body_encode = encode
-encodestring = encode
-
# BAW: I'm not sure if the intent was for the signature of this function to be
# the same as base64MIME.decode() or not...
def decode(encoded, eol=NL):
@@ -307,7 +293,7 @@ def decode(encoded, eol=NL):
if i == n:
decoded += eol
# Special case if original string did not end with eol
- if not encoded.endswith(eol) and decoded.endswith(eol):
+ if encoded[-1] not in '\r\n' and decoded.endswith(eol):
decoded = decoded[:-1]
return decoded
@@ -317,7 +303,7 @@ body_decode = decode
decodestring = decode
-
+
def _unquote_match(match):
"""Turn a match in the form =AB to the ASCII character with value 0xab"""
s = match.group(0)
@@ -333,4 +319,4 @@ def header_decode(s):
the high level email.header class for that functionality.
"""
s = s.replace('_', ' ')
- return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s)
+ return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, re.ASCII)
diff --git a/Lib/email/test/data/msg_15.txt b/Lib/email/test/data/msg_15.txt
index 33b8487..0025624 100644
--- a/Lib/email/test/data/msg_15.txt
+++ b/Lib/email/test/data/msg_15.txt
@@ -9,7 +9,7 @@ Mime-version: 1.0
Content-type: multipart/mixed;
boundary="MS_Mac_OE_3071477847_720252_MIME_Part"
-> Denne meddelelse er i MIME-format. Da dit postl¾sningsprogram ikke forstŒr dette format, kan del af eller hele meddelelsen v¾re ul¾selig.
+> Denne meddelelse er i MIME-format. Da dit postl
--MS_Mac_OE_3071477847_720252_MIME_Part
Content-type: multipart/alternative;
diff --git a/Lib/email/test/data/msg_26.txt b/Lib/email/test/data/msg_26.txt
index e13203a..58efaa9 100644
--- a/Lib/email/test/data/msg_26.txt
+++ b/Lib/email/test/data/msg_26.txt
@@ -24,7 +24,8 @@ Simple email with attachment.
--1618492860--2051301190--113853680
-Content-Type: application/riscos; name="clock.bmp,69c"; type=BMP; load=&fff69c4b; exec=&355dd4d1; access=&03
+Content-Type: application/riscos; name="clock.bmp,69c"; type=BMP;
+ load=&fff69c4b; exec=&355dd4d1; access=&03
Content-Disposition: attachment; filename="clock.bmp"
Content-Transfer-Encoding: base64
@@ -42,4 +43,4 @@ AAMwgAgAAAAACDAAAAu7t7cwAAgDgAAAAABzcIAAAAAAAAgDMwAAAAAAN7uwgAAAAAgH
MzMAAAAACH97tzAAAAALu3c3gAAAAAAL+7tzDABAu7f7cAAAAAAACA+3MA7EQAv/sIAA
AAAAAAAIAAAAAAAAAIAAAAAA
---1618492860--2051301190--113853680-- \ No newline at end of file
+--1618492860--2051301190--113853680--
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
index b32da9d..352b9b1 100644
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -3,6 +3,7 @@
# email package unit tests
import os
+import re
import sys
import time
import base64
@@ -10,29 +11,32 @@ import difflib
import unittest
import warnings
import textwrap
-from cStringIO import StringIO
+
+from io import StringIO, BytesIO
+from itertools import chain
import email
-from email.Charset import Charset
-from email.Header import Header, decode_header, make_header
-from email.Parser import Parser, HeaderParser
-from email.Generator import Generator, DecodedGenerator
-from email.Message import Message
-from email.MIMEAudio import MIMEAudio
-from email.MIMEText import MIMEText
-from email.MIMEImage import MIMEImage
-from email.MIMEBase import MIMEBase
-from email.MIMEMessage import MIMEMessage
-from email.MIMEMultipart import MIMEMultipart
-from email import Utils
-from email import Errors
-from email import Encoders
-from email import Iterators
-from email import base64MIME
-from email import quopriMIME
-
-from test.test_support import findfile, run_unittest
+from email.charset import Charset
+from email.header import Header, decode_header, make_header
+from email.parser import Parser, HeaderParser
+from email.generator import Generator, DecodedGenerator, BytesGenerator
+from email.message import Message
+from email.mime.application import MIMEApplication
+from email.mime.audio import MIMEAudio
+from email.mime.text import MIMEText
+from email.mime.image import MIMEImage
+from email.mime.base import MIMEBase
+from email.mime.message import MIMEMessage
+from email.mime.multipart import MIMEMultipart
+from email import utils
+from email import errors
+from email import encoders
+from email import iterators
+from email import base64mime
+from email import quoprimime
+
+from test.support import findfile, run_unittest, unlink
from email.test import __file__ as landmark
@@ -42,9 +46,9 @@ SPACE = ' '
-def openfile(filename, mode='r'):
+def openfile(filename, *args, **kws):
path = os.path.join(os.path.dirname(landmark), 'data', filename)
- return open(path, mode)
+ return open(path, *args, **kws)
@@ -55,18 +59,14 @@ class TestEmailBase(unittest.TestCase):
if first != second:
sfirst = str(first)
ssecond = str(second)
- diff = difflib.ndiff(sfirst.splitlines(), ssecond.splitlines())
- fp = StringIO()
- print >> fp, NL, NL.join(diff)
- raise self.failureException, fp.getvalue()
+ rfirst = [repr(line) for line in sfirst.splitlines()]
+ rsecond = [repr(line) for line in ssecond.splitlines()]
+ diff = difflib.ndiff(rfirst, rsecond)
+ raise self.failureException(NL + NL.join(diff))
def _msgobj(self, filename):
- fp = openfile(findfile(filename))
- try:
- msg = email.message_from_file(fp)
- finally:
- fp.close()
- return msg
+ with openfile(findfile(filename)) as fp:
+ return email.message_from_file(fp)
@@ -177,7 +177,7 @@ class TestMessageAPI(TestEmailBase):
eq(value, 'multipart/mixed; boundary="BOUNDARY"')
# And this one has no Content-Type: header at all.
msg = self._msgobj('msg_03.txt')
- self.assertRaises(Errors.HeaderParseError,
+ self.assertRaises(errors.HeaderParseError,
msg.set_boundary, 'BOUNDARY')
def test_make_boundary(self):
@@ -194,12 +194,12 @@ class TestMessageAPI(TestEmailBase):
def test_message_rfc822_only(self):
# Issue 7970: message/rfc822 not in multipart parsed by
# HeaderParser caused an exception when flattened.
- fp = openfile(findfile('msg_46.txt'))
- msgdata = fp.read()
- parser = email.Parser.HeaderParser()
+ with openfile(findfile('msg_46.txt')) as fp:
+ msgdata = fp.read()
+ parser = HeaderParser()
msg = parser.parsestr(msgdata)
out = StringIO()
- gen = email.Generator.Generator(out, True, 0)
+ gen = Generator(out, True, 0)
gen.flatten(msg, False)
self.assertEqual(out.getvalue(), msgdata)
@@ -210,20 +210,20 @@ class TestMessageAPI(TestEmailBase):
eq(msg.get_payload(decode=True), None)
# Subpart 1 is 7bit encoded
eq(msg.get_payload(0).get_payload(decode=True),
- 'This is a 7bit encoded message.\n')
+ b'This is a 7bit encoded message.\n')
# Subpart 2 is quopri
eq(msg.get_payload(1).get_payload(decode=True),
- '\xa1This is a Quoted Printable encoded message!\n')
+ b'\xa1This is a Quoted Printable encoded message!\n')
# Subpart 3 is base64
eq(msg.get_payload(2).get_payload(decode=True),
- 'This is a Base64 encoded message.')
+ b'This is a Base64 encoded message.')
# Subpart 4 is base64 with a trailing newline, which
# used to be stripped (issue 7143).
eq(msg.get_payload(3).get_payload(decode=True),
- 'This is a Base64 encoded message.\n')
+ b'This is a Base64 encoded message.\n')
# Subpart 5 has no Content-Transfer-Encoding: header.
eq(msg.get_payload(4).get_payload(decode=True),
- 'This has no Content-Transfer-Encoding: header.\n')
+ b'This has no Content-Transfer-Encoding: header.\n')
def test_get_decoded_uu_payload(self):
eq = self.assertEqual
@@ -231,32 +231,20 @@ class TestMessageAPI(TestEmailBase):
msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
msg['content-transfer-encoding'] = cte
- eq(msg.get_payload(decode=True), 'hello world')
+ eq(msg.get_payload(decode=True), b'hello world')
# Now try some bogus data
msg.set_payload('foo')
- eq(msg.get_payload(decode=True), 'foo')
+ eq(msg.get_payload(decode=True), b'foo')
- def test_decode_bogus_uu_payload_quietly(self):
+ def test_get_payload_n_raises_on_non_multipart(self):
msg = Message()
- msg.set_payload('begin 664 foo.txt\n%<W1F=0000H \n \nend\n')
- msg['Content-Transfer-Encoding'] = 'x-uuencode'
- old_stderr = sys.stderr
- try:
- sys.stderr = sfp = StringIO()
- # We don't care about the payload
- msg.get_payload(decode=True)
- finally:
- sys.stderr = old_stderr
- self.assertEqual(sfp.getvalue(), '')
+ self.assertRaises(TypeError, msg.get_payload, 1)
def test_decoded_generator(self):
eq = self.assertEqual
msg = self._msgobj('msg_07.txt')
- fp = openfile('msg_17.txt')
- try:
+ with openfile('msg_17.txt') as fp:
text = fp.read()
- finally:
- fp.close()
s = StringIO()
g = DecodedGenerator(s)
g.flatten(msg)
@@ -275,24 +263,12 @@ class TestMessageAPI(TestEmailBase):
self.assertTrue('TO' in msg)
def test_as_string(self):
- eq = self.assertEqual
+ eq = self.ndiffAssertEqual
msg = self._msgobj('msg_01.txt')
- fp = openfile('msg_01.txt')
- try:
- # BAW 30-Mar-2009 Evil be here. So, the generator is broken with
- # respect to long line breaking. It's also not idempotent when a
- # header from a parsed message is continued with tabs rather than
- # spaces. Before we fixed bug 1974 it was reversedly broken,
- # i.e. headers that were continued with spaces got continued with
- # tabs. For Python 2.x there's really no good fix and in Python
- # 3.x all this stuff is re-written to be right(er). Chris Withers
- # convinced me that using space as the default continuation
- # character is less bad for more applications.
- text = fp.read().replace('\t', ' ')
- finally:
- fp.close()
- eq(text, msg.as_string())
- fullrepr = str(msg)
+ with openfile('msg_01.txt') as fp:
+ text = fp.read()
+ eq(text, str(msg))
+ fullrepr = msg.as_string(unixfrom=True)
lines = fullrepr.split('\n')
self.assertTrue(lines[0].startswith('From '))
eq(text, NL.join(lines[1:]))
@@ -371,12 +347,13 @@ class TestMessageAPI(TestEmailBase):
"Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
- def test_has_key(self):
+ def test_field_containment(self):
+ unless = self.assertTrue
msg = email.message_from_string('Header: exists')
- self.assertTrue(msg.has_key('header'))
- self.assertTrue(msg.has_key('Header'))
- self.assertTrue(msg.has_key('HEADER'))
- self.assertFalse(msg.has_key('headeri'))
+ unless('header' in msg)
+ unless('Header' in msg)
+ unless('HEADER' in msg)
+ self.assertFalse('headerx' in msg)
def test_set_param(self):
eq = self.assertEqual
@@ -418,6 +395,17 @@ class TestMessageAPI(TestEmailBase):
msg.del_param('filename', 'content-disposition')
self.assertEqual(msg['content-disposition'], 'attachment')
+ def test_del_param_on_nonexistent_header(self):
+ msg = Message()
+ msg.del_param('filename', 'content-disposition')
+
+ def test_del_nonexistent_param(self):
+ msg = Message()
+ msg.add_header('Content-Type', 'text/plain', charset='utf-8')
+ existing_header = msg['Content-Type']
+ msg.del_param('foobar', header='Content-Type')
+ self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
+
def test_set_type(self):
eq = self.assertEqual
msg = Message()
@@ -545,30 +533,117 @@ class TestMessageAPI(TestEmailBase):
msg['content-type'] = 'audio/x-midi'
msg['content-transfer-encoding'] = 'base64'
msg.set_payload(x)
+ self.assertEqual(msg.get_payload(decode=True),
+ bytes(x, 'raw-unicode-escape'))
+
+ def test_broken_unicode_payload(self):
+ # This test improves coverage but is not a compliance test.
+ # The behavior in this situation is currently undefined by the API.
+ x = 'this is a br\xf6ken thing to do'
+ msg = Message()
+ msg['content-type'] = 'text/plain'
+ msg['content-transfer-encoding'] = '8bit'
+ msg.set_payload(x)
+ self.assertEqual(msg.get_payload(decode=True),
+ bytes(x, 'raw-unicode-escape'))
+
+ def test_questionable_bytes_payload(self):
+ # This test improves coverage but is not a compliance test,
+ # since it involves poking inside the black box.
+ x = 'this is a quéstionable thing to do'.encode('utf-8')
+ msg = Message()
+ msg['content-type'] = 'text/plain; charset="utf-8"'
+ msg['content-transfer-encoding'] = '8bit'
+ msg._payload = x
self.assertEqual(msg.get_payload(decode=True), x)
- def test_get_content_charset(self):
+ # Issue 1078919
+ def test_ascii_add_header(self):
msg = Message()
- msg.set_charset('us-ascii')
- self.assertEqual('us-ascii', msg.get_content_charset())
- msg.set_charset(u'us-ascii')
- self.assertEqual('us-ascii', msg.get_content_charset())
+ msg.add_header('Content-Disposition', 'attachment',
+ filename='bud.gif')
+ self.assertEqual('attachment; filename="bud.gif"',
+ msg['Content-Disposition'])
+
+ def test_noascii_add_header(self):
+ msg = Message()
+ msg.add_header('Content-Disposition', 'attachment',
+ filename="Fußballer.ppt")
+ self.assertEqual(
+ 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
+ msg['Content-Disposition'])
+
+ def test_nonascii_add_header_via_triple(self):
+ msg = Message()
+ msg.add_header('Content-Disposition', 'attachment',
+ filename=('iso-8859-1', '', 'Fußballer.ppt'))
+ self.assertEqual(
+ 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
+ msg['Content-Disposition'])
+
+ def test_ascii_add_header_with_tspecial(self):
+ msg = Message()
+ msg.add_header('Content-Disposition', 'attachment',
+ filename="windows [filename].ppt")
+ self.assertEqual(
+ 'attachment; filename="windows [filename].ppt"',
+ msg['Content-Disposition'])
+
+ def test_nonascii_add_header_with_tspecial(self):
+ msg = Message()
+ msg.add_header('Content-Disposition', 'attachment',
+ filename="Fußballer [filename].ppt")
+ self.assertEqual(
+ "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
+ msg['Content-Disposition'])
+
+ def test_add_header_with_name_only_param(self):
+ msg = Message()
+ msg.add_header('Content-Disposition', 'inline', foo_bar=None)
+ self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
+
+ def test_add_header_with_no_value(self):
+ msg = Message()
+ msg.add_header('X-Status', None)
+ self.assertEqual('', msg['X-Status'])
# Issue 5871: reject an attempt to embed a header inside a header value
# (header injection attack).
def test_embeded_header_via_Header_rejected(self):
msg = Message()
msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
- self.assertRaises(Errors.HeaderParseError, msg.as_string)
+ self.assertRaises(errors.HeaderParseError, msg.as_string)
def test_embeded_header_via_string_rejected(self):
msg = Message()
msg['Dummy'] = 'dummy\nX-Injected-Header: test'
- self.assertRaises(Errors.HeaderParseError, msg.as_string)
-
+ self.assertRaises(errors.HeaderParseError, msg.as_string)
+
+ def test_unicode_header_defaults_to_utf8_encoding(self):
+ # Issue 14291
+ m = MIMEText('abc\n')
+ m['Subject'] = 'É test'
+ self.assertEqual(str(m),textwrap.dedent("""\
+ Content-Type: text/plain; charset="us-ascii"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 7bit
+ Subject: =?utf-8?q?=C3=89_test?=
+
+ abc
+ """))
-# Test the email.Encoders module
+# Test the email.encoders module
class TestEncoders(unittest.TestCase):
+
+ def test_EncodersEncode_base64(self):
+ with openfile('PyBanner048.gif', 'rb') as fp:
+ bindata = fp.read()
+ mimed = email.mime.image.MIMEImage(bindata)
+ base64ed = mimed.get_payload()
+ # the transfer-encoded body lines should all be <=76 characters
+ lines = base64ed.split('\n')
+ self.assertLessEqual(max([ len(x) for x in lines ]), 76)
+
def test_encode_empty_payload(self):
eq = self.assertEqual
msg = Message()
@@ -592,12 +667,15 @@ class TestEncoders(unittest.TestCase):
# whose output character set is 7bit gets a transfer-encoding
# of 7bit.
eq = self.assertEqual
- msg = email.MIMEText.MIMEText('\xca\xb8', _charset='euc-jp')
+ msg = MIMEText('æ–‡', _charset='euc-jp')
eq(msg['content-transfer-encoding'], '7bit')
# Test long header wrapping
class TestLongHeaders(TestEmailBase):
+
+ maxDiff = None
+
def test_split_long_continuation(self):
eq = self.ndiffAssertEqual
msg = email.message_from_string("""\
@@ -612,8 +690,8 @@ test
g.flatten(msg)
eq(sfp.getvalue(), """\
Subject: bug demonstration
- 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
- more text
+\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
+\tmore text
test
""")
@@ -629,7 +707,7 @@ bug demonstration
bug demonstration
\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
\tmore text""")
- h = Header(hstr)
+ h = Header(hstr.replace('\t', ' '))
eq(h.encode(), """\
bug demonstration
12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
@@ -640,9 +718,20 @@ bug demonstration
g = Charset("iso-8859-1")
cz = Charset("iso-8859-2")
utf8 = Charset("utf-8")
- g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
- cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
- utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
+ g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
+ b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
+ b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
+ b'bef\xf6rdert. ')
+ cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
+ b'd\xf9vtipu.. ')
+ utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
+ '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
+ '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
+ '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
+ '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
+ 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
+ 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
+ '\u3044\u307e\u3059\u3002')
h = Header(g_head, g, header_name='Subject')
h.append(cz_head, cz)
h.append(utf8_head, utf8)
@@ -652,31 +741,31 @@ bug demonstration
g = Generator(sfp)
g.flatten(msg)
eq(sfp.getvalue(), """\
-Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerd?=
- =?iso-8859-1?q?erband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndi?=
- =?iso-8859-1?q?schen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Kling?=
- =?iso-8859-1?q?en_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_met?=
- =?iso-8859-2?q?ropole_se_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
- =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE?=
- =?utf-8?b?44G+44Gb44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB?=
- =?utf-8?b?44GC44Go44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CM?=
- =?utf-8?q?Wenn_ist_das_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das?=
- =?utf-8?b?IE9kZXIgZGllIEZsaXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBow==?=
- =?utf-8?b?44Gm44GE44G+44GZ44CC?=
+Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
+ =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
+ =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
+ =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
+ =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
+ =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
+ =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
+ =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
+ =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
+ =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
+ =?utf-8?b?44CC?=
""")
- eq(h.encode(), """\
-=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerd?=
- =?iso-8859-1?q?erband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndi?=
- =?iso-8859-1?q?schen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Kling?=
- =?iso-8859-1?q?en_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_met?=
- =?iso-8859-2?q?ropole_se_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
- =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE?=
- =?utf-8?b?44G+44Gb44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB?=
- =?utf-8?b?44GC44Go44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CM?=
- =?utf-8?q?Wenn_ist_das_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das?=
- =?utf-8?b?IE9kZXIgZGllIEZsaXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBow==?=
- =?utf-8?b?44Gm44GE44G+44GZ44CC?=""")
+ eq(h.encode(maxlinelen=76), """\
+=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
+ =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
+ =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
+ =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
+ =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
+ =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
+ =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
+ =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
+ =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
+ =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
+ =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
def test_long_header_encode(self):
eq = self.ndiffAssertEqual
@@ -687,7 +776,7 @@ Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerd?=
wasnipoop; giraffes="very-long-necked-animals";
spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
- def test_long_header_encode_with_tab_continuation(self):
+ def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
eq = self.ndiffAssertEqual
h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
@@ -695,8 +784,32 @@ wasnipoop; giraffes="very-long-necked-animals";
continuation_ws='\t')
eq(h.encode(), '''\
wasnipoop; giraffes="very-long-necked-animals";
+ spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
+
+ def test_long_header_encode_with_tab_continuation(self):
+ eq = self.ndiffAssertEqual
+ h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
+ 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
+ header_name='X-Foobar-Spoink-Defrobnit',
+ continuation_ws='\t')
+ eq(h.encode(), '''\
+wasnipoop; giraffes="very-long-necked-animals";
\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
+ def test_header_encode_with_different_output_charset(self):
+ h = Header('æ–‡', 'euc-jp')
+ self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
+
+ def test_long_header_encode_with_different_output_charset(self):
+ h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
+ b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
+ b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
+ b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
+ res = """\
+=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
+ =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
+ self.assertEqual(h.encode(), res)
+
def test_header_splitter(self):
eq = self.ndiffAssertEqual
msg = MIMEText('')
@@ -721,7 +834,7 @@ X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
eq = self.ndiffAssertEqual
msg = Message()
msg['From'] = 'test@dom.ain'
- msg['References'] = SPACE.join(['<%d@dom.ain>' % i for i in range(10)])
+ msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
msg.set_payload('Test')
sfp = StringIO()
g = Generator(sfp)
@@ -733,12 +846,167 @@ References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
Test""")
+ def test_last_split_chunk_does_not_fit(self):
+ eq = self.ndiffAssertEqual
+ h = Header('Subject: the first part of this is short, but_the_second'
+ '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
+ '_all_by_itself')
+ eq(h.encode(), """\
+Subject: the first part of this is short,
+ but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
+
+ def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
+ eq = self.ndiffAssertEqual
+ h = Header(', but_the_second'
+ '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
+ '_all_by_itself')
+ eq(h.encode(), """\
+,
+ but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
+
+ def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
+ eq = self.ndiffAssertEqual
+ h = Header(', , but_the_second'
+ '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
+ '_all_by_itself')
+ eq(h.encode(), """\
+, ,
+ but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
+
+ def test_trailing_splitable_on_overlong_unsplitable(self):
+ eq = self.ndiffAssertEqual
+ h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
+ 'be_on_a_line_all_by_itself;')
+ eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
+ "be_on_a_line_all_by_itself;")
+
+ def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
+ eq = self.ndiffAssertEqual
+ h = Header('; '
+ 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
+ 'be_on_a_line_all_by_itself; ')
+ eq(h.encode(), """\
+;
+ this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
+
+ def test_long_header_with_multiple_sequential_split_chars(self):
+ eq = self.ndiffAssertEqual
+ h = Header('This is a long line that has two whitespaces in a row. '
+ 'This used to cause truncation of the header when folded')
+ eq(h.encode(), """\
+This is a long line that has two whitespaces in a row. This used to cause
+ truncation of the header when folded""")
+
+ def test_splitter_split_on_punctuation_only_if_fws(self):
+ eq = self.ndiffAssertEqual
+ h = Header('thisverylongheaderhas;semicolons;and,commas,but'
+ 'they;arenotlegal;fold,points')
+ eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
+ "arenotlegal;fold,points")
+
+ def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
+ eq = self.ndiffAssertEqual
+ h = Header('this is a test where we need to have more than one line '
+ 'before; our final line that is just too big to fit;; '
+ 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
+ 'be_on_a_line_all_by_itself;')
+ eq(h.encode(), """\
+this is a test where we need to have more than one line before;
+ our final line that is just too big to fit;;
+ this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
+
+ def test_overlong_last_part_followed_by_split_point(self):
+ eq = self.ndiffAssertEqual
+ h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
+ 'be_on_a_line_all_by_itself ')
+ eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
+ "should_be_on_a_line_all_by_itself ")
+
+ def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
+ eq = self.ndiffAssertEqual
+ h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
+ 'before_our_final_line_; ; '
+ 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
+ 'be_on_a_line_all_by_itself; ')
+ eq(h.encode(), """\
+this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
+ ;
+ this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
+
+ def test_multiline_with_overlong_last_part_followed_by_split_point(self):
+ eq = self.ndiffAssertEqual
+ h = Header('this is a test where we need to have more than one line '
+ 'before our final line; ; '
+ 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
+ 'be_on_a_line_all_by_itself; ')
+ eq(h.encode(), """\
+this is a test where we need to have more than one line before our final line;
+ ;
+ this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
+
+ def test_long_header_with_whitespace_runs(self):
+ eq = self.ndiffAssertEqual
+ msg = Message()
+ msg['From'] = 'test@dom.ain'
+ msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
+ msg.set_payload('Test')
+ sfp = StringIO()
+ g = Generator(sfp)
+ g.flatten(msg)
+ eq(sfp.getvalue(), """\
+From: test@dom.ain
+References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
+ <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
+ <foo@dom.ain> <foo@dom.ain>\x20\x20
+
+Test""")
+
+ def test_long_run_with_semi_header_splitter(self):
+ eq = self.ndiffAssertEqual
+ msg = Message()
+ msg['From'] = 'test@dom.ain'
+ msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
+ msg.set_payload('Test')
+ sfp = StringIO()
+ g = Generator(sfp)
+ g.flatten(msg)
+ eq(sfp.getvalue(), """\
+From: test@dom.ain
+References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
+ <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
+ <foo@dom.ain>; abc
+
+Test""")
+
+ def test_splitter_split_on_punctuation_only_if_fws(self):
+ eq = self.ndiffAssertEqual
+ msg = Message()
+ msg['From'] = 'test@dom.ain'
+ msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
+ 'they;arenotlegal;fold,points')
+ msg.set_payload('Test')
+ sfp = StringIO()
+ g = Generator(sfp)
+ g.flatten(msg)
+ # XXX the space after the header should not be there.
+ eq(sfp.getvalue(), """\
+From: test@dom.ain
+References:\x20
+ thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
+
+Test""")
+
def test_no_split_long_header(self):
eq = self.ndiffAssertEqual
hstr = 'References: ' + 'x' * 80
- h = Header(hstr, continuation_ws='\t')
+ h = Header(hstr)
+ # These come on two lines because Headers are really field value
+ # classes and don't really know about their field names.
eq(h.encode(), """\
-References: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
+References:
+ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
+ h = Header('x' * 80)
+ eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
def test_splitting_multiple_long_lines(self):
eq = self.ndiffAssertEqual
@@ -750,17 +1018,17 @@ from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org
h = Header(hstr, continuation_ws='\t')
eq(h.encode(), """\
from babylon.socal-raves.org (localhost [127.0.0.1]);
-\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
-\tfor <mailman-admin@babylon.socal-raves.org>;
-\tSat, 2 Feb 2002 17:00:06 -0800 (PST)
+ by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
+ for <mailman-admin@babylon.socal-raves.org>;
+ Sat, 2 Feb 2002 17:00:06 -0800 (PST)
\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
-\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
-\tfor <mailman-admin@babylon.socal-raves.org>;
-\tSat, 2 Feb 2002 17:00:06 -0800 (PST)
+ by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
+ for <mailman-admin@babylon.socal-raves.org>;
+ Sat, 2 Feb 2002 17:00:06 -0800 (PST)
\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
-\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
-\tfor <mailman-admin@babylon.socal-raves.org>;
-\tSat, 2 Feb 2002 17:00:06 -0800 (PST)""")
+ by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
+ for <mailman-admin@babylon.socal-raves.org>;
+ Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
def test_splitting_first_line_only_is_long(self):
eq = self.ndiffAssertEqual
@@ -773,7 +1041,7 @@ from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.
continuation_ws='\t')
eq(h.encode(), """\
from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
-\thelo=cthulhu.gerg.ca)
+ helo=cthulhu.gerg.ca)
\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
\tid 17k4h5-00034i-00
\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
@@ -784,29 +1052,52 @@ from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
h = Header('Britische Regierung gibt', 'iso-8859-1',
header_name='Subject')
h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
+ eq(h.encode(maxlinelen=76), """\
+=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
+ =?iso-8859-1?q?hore-Windkraftprojekte?=""")
msg['Subject'] = h
- eq(msg.as_string(), """\
-Subject: =?iso-8859-1?q?Britische_Regierung_gibt?= =?iso-8859-1?q?gr=FCnes?=
- =?iso-8859-1?q?_Licht_f=FCr_Offshore-Windkraftprojekte?=
+ eq(msg.as_string(maxheaderlen=76), """\
+Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
+ =?iso-8859-1?q?hore-Windkraftprojekte?=
+
+""")
+ eq(msg.as_string(maxheaderlen=0), """\
+Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
""")
def test_long_8bit_header_no_charset(self):
eq = self.ndiffAssertEqual
msg = Message()
- msg['Reply-To'] = 'Britische Regierung gibt gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte <a-very-long-address@example.com>'
- eq(msg.as_string(), """\
-Reply-To: Britische Regierung gibt gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte <a-very-long-address@example.com>
+ header_string = ('Britische Regierung gibt gr\xfcnes Licht '
+ 'f\xfcr Offshore-Windkraftprojekte '
+ '<a-very-long-address@example.com>')
+ msg['Reply-To'] = header_string
+ eq(msg.as_string(maxheaderlen=78), """\
+Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
+ =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
+
+""")
+ msg = Message()
+ msg['Reply-To'] = Header(header_string,
+ header_name='Reply-To')
+ eq(msg.as_string(maxheaderlen=78), """\
+Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
+ =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
""")
def test_long_to_header(self):
eq = self.ndiffAssertEqual
- to = '"Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,"Someone Test #B" <someone@umich.edu>, "Someone Test #C" <someone@eecs.umich.edu>, "Someone Test #D" <someone@eecs.umich.edu>'
+ to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
+ '<someone@eecs.umich.edu>, '
+ '"Someone Test #B" <someone@umich.edu>, '
+ '"Someone Test #C" <someone@eecs.umich.edu>, '
+ '"Someone Test #D" <someone@eecs.umich.edu>')
msg = Message()
msg['To'] = to
- eq(msg.as_string(0), '''\
-To: "Someone Test #A" <someone@eecs.umich.edu>, <someone@eecs.umich.edu>,
+ eq(msg.as_string(maxheaderlen=78), '''\
+To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
"Someone Test #B" <someone@umich.edu>,
"Someone Test #C" <someone@eecs.umich.edu>,
"Someone Test #D" <someone@eecs.umich.edu>
@@ -818,7 +1109,7 @@ To: "Someone Test #A" <someone@eecs.umich.edu>, <someone@eecs.umich.edu>,
s = 'This is an example of string which has almost the limit of header length.'
h = Header(s)
h.append('Add another line.')
- eq(h.encode(), """\
+ eq(h.encode(maxlinelen=76), """\
This is an example of string which has almost the limit of header length.
Add another line.""")
@@ -833,24 +1124,30 @@ This is an example of string which has almost the limit of header length.
def test_long_field_name(self):
eq = self.ndiffAssertEqual
fn = 'X-Very-Very-Very-Long-Header-Name'
- gs = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
+ gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
+ 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
+ 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
+ 'bef\xf6rdert. ')
h = Header(gs, 'iso-8859-1', header_name=fn)
# BAW: this seems broken because the first line is too long
- eq(h.encode(), """\
-=?iso-8859-1?q?Die_Mieter_treten_hier_?=
- =?iso-8859-1?q?ein_werden_mit_einem_Foerderband_komfortabel_den_Korridor_?=
- =?iso-8859-1?q?entlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_g?=
- =?iso-8859-1?q?egen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
+ eq(h.encode(maxlinelen=76), """\
+=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
+ =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
+ =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
+ =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
def test_long_received_header(self):
- h = 'from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; Wed, 05 Mar 2003 18:10:18 -0700'
+ h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
+ 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
+ 'Wed, 05 Mar 2003 18:10:18 -0700')
msg = Message()
msg['Received-1'] = Header(h, continuation_ws='\t')
msg['Received-2'] = h
- self.assertEqual(msg.as_string(), """\
+ # This should be splitting on spaces not semicolons.
+ self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
-\throthgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
-\tWed, 05 Mar 2003 18:10:18 -0700
+ hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
+ Wed, 05 Mar 2003 18:10:18 -0700
Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Wed, 05 Mar 2003 18:10:18 -0700
@@ -858,16 +1155,21 @@ Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
""")
def test_string_headerinst_eq(self):
- h = '<15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner\'s message of "Thu, 6 Mar 2003 13:58:21 +0100")'
+ h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
+ 'tu-muenchen.de> (David Bremner\'s message of '
+ '"Thu, 6 Mar 2003 13:58:21 +0100")')
msg = Message()
- msg['Received'] = Header(h, header_name='Received',
- continuation_ws='\t')
- msg['Received'] = h
- self.ndiffAssertEqual(msg.as_string(), """\
-Received: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de>
-\t(David Bremner's message of "Thu, 6 Mar 2003 13:58:21 +0100")
-Received: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de>
- (David Bremner's message of "Thu, 6 Mar 2003 13:58:21 +0100")
+ msg['Received-1'] = Header(h, header_name='Received-1',
+ continuation_ws='\t')
+ msg['Received-2'] = h
+ # XXX The space after the ':' should not be there.
+ self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
+Received-1:\x20
+ <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
+ Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
+Received-2:\x20
+ <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
+ Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
""")
@@ -875,25 +1177,34 @@ Received: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de>
eq = self.ndiffAssertEqual
msg = Message()
t = """\
- iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
+iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
msg['Face-1'] = t
msg['Face-2'] = Header(t, header_name='Face-2')
- eq(msg.as_string(), """\
-Face-1: iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
+ msg['Face-3'] = ' ' + t
+ # XXX This splitting is all wrong. It the first value line should be
+ # snug against the field name or the space after the header not there.
+ eq(msg.as_string(maxheaderlen=78), """\
+Face-1:\x20
+ iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
-Face-2: iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
+Face-2:\x20
+ iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
+ locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
+Face-3:\x20
+ iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
""")
def test_another_long_multiline_header(self):
eq = self.ndiffAssertEqual
- m = '''\
-Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
- Wed, 16 Oct 2002 07:41:11 -0700'''
+ m = ('Received: from siimage.com '
+ '([172.25.1.3]) by zima.siliconimage.com with '
+ 'Microsoft SMTPSVC(5.0.2195.4905); '
+ 'Wed, 16 Oct 2002 07:41:11 -0700')
msg = email.message_from_string(m)
- eq(msg.as_string(), '''\
+ eq(msg.as_string(maxheaderlen=78), '''\
Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
@@ -901,20 +1212,35 @@ Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
def test_long_lines_with_different_header(self):
eq = self.ndiffAssertEqual
- h = """\
-List-Unsubscribe: <https://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
- <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>"""
+ h = ('List-Unsubscribe: '
+ '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
+ ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
+ '?subject=unsubscribe>')
msg = Message()
msg['List'] = h
msg['List'] = Header(h, header_name='List')
- eq(msg.as_string(), """\
-List: List-Unsubscribe: <https://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
- <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
-List: List-Unsubscribe: <https://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
- <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
+ eq(msg.as_string(maxheaderlen=78), """\
+List: List-Unsubscribe:
+ <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
+ <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
+List: List-Unsubscribe:
+ <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
+ <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
""")
+ def test_long_rfc2047_header_with_embedded_fws(self):
+ h = Header(textwrap.dedent("""\
+ We're going to pretend this header is in a non-ascii character set
+ \tto see if line wrapping with encoded words and embedded
+ folding white space works"""),
+ charset='utf-8',
+ header_name='Test')
+ self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
+ =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
+ =?utf-8?q?cter_set?=
+ =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
+ =?utf-8?q?_folding_white_space_works?=""")+'\n')
# Test mangling of "From " lines in the body of a message
@@ -972,6 +1298,20 @@ Blah blah blah
self.assertEqual(len([1 for x in s.getvalue().split('\n')
if x.startswith('>From ')]), 2)
+ def test_mangled_from_with_bad_bytes(self):
+ source = textwrap.dedent("""\
+ Content-Type: text/plain; charset="utf-8"
+ MIME-Version: 1.0
+ Content-Transfer-Encoding: 8bit
+ From: aaa@bbb.org
+
+ """).encode('utf-8')
+ msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
+ b = BytesIO()
+ g = BytesGenerator(b, mangle_from_=True)
+ g.flatten(msg)
+ self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
+
# Test the basic MIMEAudio class
class TestMIMEAudio(unittest.TestCase):
@@ -982,11 +1322,8 @@ class TestMIMEAudio(unittest.TestCase):
# package. The trailing empty string on the .join() is significant
# since findfile() will do a dirname().
datadir = os.path.join(os.path.dirname(landmark), 'data', '')
- fp = open(findfile('audiotest.au', datadir), 'rb')
- try:
+ with open(findfile('audiotest.au', datadir), 'rb') as fp:
self._audiodata = fp.read()
- finally:
- fp.close()
self._au = MIMEAudio(self._audiodata)
def test_guess_minor_type(self):
@@ -994,7 +1331,8 @@ class TestMIMEAudio(unittest.TestCase):
def test_encoding(self):
payload = self._au.get_payload()
- self.assertEqual(base64.decodestring(payload), self._audiodata)
+ self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
+ self._audiodata)
def test_checkSetMinor(self):
au = MIMEAudio(self._audiodata, 'fish')
@@ -1025,11 +1363,8 @@ class TestMIMEAudio(unittest.TestCase):
# Test the basic MIMEImage class
class TestMIMEImage(unittest.TestCase):
def setUp(self):
- fp = openfile('PyBanner048.gif')
- try:
+ with openfile('PyBanner048.gif', 'rb') as fp:
self._imgdata = fp.read()
- finally:
- fp.close()
self._im = MIMEImage(self._imgdata)
def test_guess_minor_type(self):
@@ -1037,7 +1372,8 @@ class TestMIMEImage(unittest.TestCase):
def test_encoding(self):
payload = self._im.get_payload()
- self.assertEqual(base64.decodestring(payload), self._imgdata)
+ self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
+ self._imgdata)
def test_checkSetMinor(self):
im = MIMEImage(self._imgdata, 'fish')
@@ -1065,6 +1401,24 @@ class TestMIMEImage(unittest.TestCase):
+# Test the basic MIMEApplication class
+class TestMIMEApplication(unittest.TestCase):
+ def test_headers(self):
+ eq = self.assertEqual
+ msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
+ eq(msg.get_content_type(), 'application/octet-stream')
+ eq(msg['content-transfer-encoding'], 'base64')
+
+ def test_body(self):
+ eq = self.assertEqual
+ bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
+ msg = MIMEApplication(bytesdata)
+ # whitespace in the cte encoded block is RFC-irrelevant.
+ eq(msg.get_payload().strip(), '+vv8/f7/')
+ eq(msg.get_payload(decode=True), bytesdata)
+
+
+
# Test the basic MIMEText class
class TestMIMEText(unittest.TestCase):
def setUp(self):
@@ -1090,29 +1444,31 @@ class TestMIMEText(unittest.TestCase):
eq(msg.get_charset().input_charset, 'us-ascii')
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
- def test_7bit_unicode_input(self):
+ def test_7bit_input(self):
eq = self.assertEqual
- msg = MIMEText(u'hello there', _charset='us-ascii')
+ msg = MIMEText('hello there', _charset='us-ascii')
eq(msg.get_charset().input_charset, 'us-ascii')
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
- def test_7bit_unicode_input_no_charset(self):
+ def test_7bit_input_no_charset(self):
eq = self.assertEqual
- msg = MIMEText(u'hello there')
+ msg = MIMEText('hello there')
eq(msg.get_charset(), 'us-ascii')
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
self.assertTrue('hello there' in msg.as_string())
- def test_8bit_unicode_input(self):
- teststr = u'\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
+ def test_utf8_input(self):
+ teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
eq = self.assertEqual
msg = MIMEText(teststr, _charset='utf-8')
eq(msg.get_charset().output_charset, 'utf-8')
eq(msg['content-type'], 'text/plain; charset="utf-8"')
eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
- def test_8bit_unicode_input_no_charset(self):
- teststr = u'\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
+ @unittest.skip("can't fix because of backward compat in email5, "
+ "will fix in email6")
+ def test_utf8_input_no_charset(self):
+ teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
@@ -1120,12 +1476,8 @@ class TestMIMEText(unittest.TestCase):
# Test complicated multipart/* messages
class TestMultipart(TestEmailBase):
def setUp(self):
- fp = openfile('PyBanner048.gif')
- try:
+ with openfile('PyBanner048.gif', 'rb') as fp:
data = fp.read()
- finally:
- fp.close()
-
container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
image = MIMEImage(data, name='dingusfish.gif')
image.add_header('content-disposition', 'attachment',
@@ -1151,7 +1503,7 @@ This is the dingus fish.
sign = '-'
else:
sign = '+'
- tzoffset = ' %s%04d' % (sign, tzsecs // 36)
+ tzoffset = ' %s%04d' % (sign, tzsecs / 36)
container['Date'] = time.strftime(
'%a, %d %b %Y %H:%M:%S',
time.localtime(now)) + tzoffset
@@ -1418,7 +1770,7 @@ hello world
# parts.
msg = self._msgobj('msg_38.txt')
sfp = StringIO()
- Iterators._structure(msg, sfp)
+ iterators._structure(msg, sfp)
eq(sfp.getvalue(), """\
multipart/mixed
multipart/mixed
@@ -1436,7 +1788,7 @@ multipart/mixed
# parsed is closest to the spirit of RFC 2046
msg = self._msgobj('msg_39.txt')
sfp = StringIO()
- Iterators._structure(msg, sfp)
+ iterators._structure(msg, sfp)
eq(sfp.getvalue(), """\
multipart/mixed
multipart/mixed
@@ -1513,16 +1865,16 @@ class TestNonConformant(TestEmailBase):
unless(hasattr(inner, 'defects'))
self.assertEqual(len(inner.defects), 1)
unless(isinstance(inner.defects[0],
- Errors.StartBoundaryNotFoundDefect))
+ errors.StartBoundaryNotFoundDefect))
def test_multipart_no_boundary(self):
unless = self.assertTrue
msg = self._msgobj('msg_25.txt')
unless(isinstance(msg.get_payload(), str))
self.assertEqual(len(msg.defects), 2)
- unless(isinstance(msg.defects[0], Errors.NoBoundaryInMultipartDefect))
+ unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
unless(isinstance(msg.defects[1],
- Errors.MultipartInvariantViolationDefect))
+ errors.MultipartInvariantViolationDefect))
def test_invalid_content_type(self):
eq = self.assertEqual
@@ -1578,9 +1930,9 @@ counter to RFC 2822, there's no separating newline here
msg = self._msgobj('msg_41.txt')
unless(hasattr(msg, 'defects'))
self.assertEqual(len(msg.defects), 2)
- unless(isinstance(msg.defects[0], Errors.NoBoundaryInMultipartDefect))
+ unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
unless(isinstance(msg.defects[1],
- Errors.MultipartInvariantViolationDefect))
+ errors.MultipartInvariantViolationDefect))
def test_missing_start_boundary(self):
outer = self._msgobj('msg_42.txt')
@@ -1595,7 +1947,7 @@ counter to RFC 2822, there's no separating newline here
bad = outer.get_payload(1).get_payload(0)
self.assertEqual(len(bad.defects), 1)
self.assertTrue(isinstance(bad.defects[0],
- Errors.StartBoundaryNotFoundDefect))
+ errors.StartBoundaryNotFoundDefect))
def test_first_line_is_continuation_header(self):
eq = self.assertEqual
@@ -1605,46 +1957,49 @@ counter to RFC 2822, there's no separating newline here
eq(msg.get_payload(), 'Line 2\nLine 3')
eq(len(msg.defects), 1)
self.assertTrue(isinstance(msg.defects[0],
- Errors.FirstHeaderLineIsContinuationDefect))
+ errors.FirstHeaderLineIsContinuationDefect))
eq(msg.defects[0].line, ' Line 1\n')
-
# Test RFC 2047 header encoding and decoding
-class TestRFC2047(unittest.TestCase):
+class TestRFC2047(TestEmailBase):
def test_rfc2047_multiline(self):
eq = self.assertEqual
s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
dh = decode_header(s)
eq(dh, [
- ('Re:', None),
- ('r\x8aksm\x9arg\x8cs', 'mac-iceland'),
- ('baz foo bar', None),
- ('r\x8aksm\x9arg\x8cs', 'mac-iceland')])
- eq(str(make_header(dh)),
- """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar
- =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""")
+ (b'Re:', None),
+ (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
+ (b'baz foo bar', None),
+ (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
+ header = make_header(dh)
+ eq(str(header),
+ 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
+ self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
+Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
+ =?mac-iceland?q?=9Arg=8Cs?=""")
def test_whitespace_eater_unicode(self):
eq = self.assertEqual
s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
dh = decode_header(s)
- eq(dh, [('Andr\xe9', 'iso-8859-1'), ('Pirard <pirard@dom.ain>', None)])
- hu = unicode(make_header(dh)).encode('latin-1')
- eq(hu, 'Andr\xe9 Pirard <pirard@dom.ain>')
+ eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
+ (b'Pirard <pirard@dom.ain>', None)])
+ header = str(make_header(dh))
+ eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
def test_whitespace_eater_unicode_2(self):
eq = self.assertEqual
s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
dh = decode_header(s)
- eq(dh, [('The', None), ('quick brown fox', 'iso-8859-1'),
- ('jumped over the', None), ('lazy dog', 'iso-8859-1')])
- hu = make_header(dh).__unicode__()
- eq(hu, u'The quick brown fox jumped over the lazy dog')
+ eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
+ (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
+ hu = str(make_header(dh))
+ eq(hu, 'The quick brown fox jumped over the lazy dog')
- def test_rfc2047_without_whitespace(self):
+ def test_rfc2047_missing_whitespace(self):
s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
dh = decode_header(s)
self.assertEqual(dh, [(s, None)])
@@ -1652,15 +2007,15 @@ class TestRFC2047(unittest.TestCase):
def test_rfc2047_with_whitespace(self):
s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
dh = decode_header(s)
- self.assertEqual(dh, [('Sm', None), ('\xf6', 'iso-8859-1'),
- ('rg', None), ('\xe5', 'iso-8859-1'),
- ('sbord', None)])
+ self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
+ (b'rg', None), (b'\xe5', 'iso-8859-1'),
+ (b'sbord', None)])
def test_rfc2047_B_bad_padding(self):
s = '=?iso-8859-1?B?%s?='
data = [ # only test complete bytes
- ('dm==', 'v'), ('dm=', 'v'), ('dm', 'v'),
- ('dmk=', 'vi'), ('dmk', 'vi')
+ ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
+ ('dmk=', b'vi'), ('dmk', b'vi')
]
for q, a in data:
dh = decode_header(s % q)
@@ -1676,11 +2031,8 @@ class TestRFC2047(unittest.TestCase):
# Test the MIMEMessage class
class TestMIMEMessage(TestEmailBase):
def setUp(self):
- fp = openfile('msg_11.txt')
- try:
+ with openfile('msg_11.txt') as fp:
self._text = fp.read()
- finally:
- fp.close()
def test_type_error(self):
self.assertRaises(TypeError, MIMEMessage, 'a plain string')
@@ -1707,7 +2059,7 @@ class TestMIMEMessage(TestEmailBase):
msg2 = Message()
msg2['Subject'] = 'subpart 2'
r = MIMEMessage(msg1)
- self.assertRaises(Errors.MultipartConversionError, r.attach, msg2)
+ self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
def test_generate(self):
# First craft the message to be encapsulated
@@ -1802,11 +2154,8 @@ Your message cannot be delivered to the following recipients:
def test_epilogue(self):
eq = self.ndiffAssertEqual
- fp = openfile('msg_21.txt')
- try:
+ with openfile('msg_21.txt') as fp:
text = fp.read()
- finally:
- fp.close()
msg = Message()
msg['From'] = 'aperson@dom.ain'
msg['To'] = 'bperson@dom.ain'
@@ -1860,11 +2209,8 @@ Two
def test_default_type(self):
eq = self.assertEqual
- fp = openfile('msg_30.txt')
- try:
+ with openfile('msg_30.txt') as fp:
msg = email.message_from_file(fp)
- finally:
- fp.close()
container1 = msg.get_payload(0)
eq(container1.get_default_type(), 'message/rfc822')
eq(container1.get_content_type(), 'message/rfc822')
@@ -1880,11 +2226,8 @@ Two
def test_default_type_with_explicit_container_type(self):
eq = self.assertEqual
- fp = openfile('msg_28.txt')
- try:
+ with openfile('msg_28.txt') as fp:
msg = email.message_from_file(fp)
- finally:
- fp.close()
container1 = msg.get_payload(0)
eq(container1.get_default_type(), 'message/rfc822')
eq(container1.get_content_type(), 'message/rfc822')
@@ -1992,20 +2335,20 @@ message 2
# should be identical. Note: that we ignore the Unix-From since that may
# contain a changed date.
class TestIdempotent(TestEmailBase):
+
+ linesep = '\n'
+
def _msgobj(self, filename):
- fp = openfile(filename)
- try:
+ with openfile(filename) as fp:
data = fp.read()
- finally:
- fp.close()
msg = email.message_from_string(data)
return msg, data
- def _idempotent(self, msg, text):
+ def _idempotent(self, msg, text, unixfrom=False):
eq = self.ndiffAssertEqual
s = StringIO()
g = Generator(s, maxheaderlen=0)
- g.flatten(msg)
+ g.flatten(msg, unixfrom=unixfrom)
eq(text, s.getvalue())
def test_parse_text_message(self):
@@ -2092,6 +2435,14 @@ class TestIdempotent(TestEmailBase):
msg, text = self._msgobj('msg_36.txt')
self._idempotent(msg, text)
+ def test_message_delivery_status(self):
+ msg, text = self._msgobj('msg_43.txt')
+ self._idempotent(msg, text, unixfrom=True)
+
+ def test_message_signed_idempotent(self):
+ msg, text = self._msgobj('msg_45.txt')
+ self._idempotent(msg, text)
+
def test_content_type(self):
eq = self.assertEqual
unless = self.assertTrue
@@ -2104,16 +2455,16 @@ class TestIdempotent(TestEmailBase):
params[pk] = pv
eq(params['report-type'], 'delivery-status')
eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
- eq(msg.preamble, 'This is a MIME-encapsulated message.\n')
- eq(msg.epilogue, '\n')
+ eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
+ eq(msg.epilogue, self.linesep)
eq(len(msg.get_payload()), 3)
# Make sure the subparts are what we expect
msg1 = msg.get_payload(0)
eq(msg1.get_content_type(), 'text/plain')
- eq(msg1.get_payload(), 'Yadda yadda yadda\n')
+ eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
msg2 = msg.get_payload(1)
eq(msg2.get_content_type(), 'text/plain')
- eq(msg2.get_payload(), 'Yadda yadda yadda\n')
+ eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
msg3 = msg.get_payload(2)
eq(msg3.get_content_type(), 'message/rfc822')
self.assertTrue(isinstance(msg3, Message))
@@ -2122,7 +2473,7 @@ class TestIdempotent(TestEmailBase):
eq(len(payload), 1)
msg4 = payload[0]
unless(isinstance(msg4, Message))
- eq(msg4.get_payload(), 'Yadda yadda yadda\n')
+ eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
def test_parser(self):
eq = self.assertEqual
@@ -2139,18 +2490,15 @@ class TestIdempotent(TestEmailBase):
self.assertTrue(isinstance(msg1, Message))
eq(msg1.get_content_type(), 'text/plain')
self.assertTrue(isinstance(msg1.get_payload(), str))
- eq(msg1.get_payload(), '\n')
+ eq(msg1.get_payload(), self.linesep)
# Test various other bits of the package's functionality
class TestMiscellaneous(TestEmailBase):
def test_message_from_string(self):
- fp = openfile('msg_01.txt')
- try:
+ with openfile('msg_01.txt') as fp:
text = fp.read()
- finally:
- fp.close()
msg = email.message_from_string(text)
s = StringIO()
# Don't wrap/continue long headers since we're trying to test
@@ -2160,8 +2508,7 @@ class TestMiscellaneous(TestEmailBase):
self.assertEqual(text, s.getvalue())
def test_message_from_file(self):
- fp = openfile('msg_01.txt')
- try:
+ with openfile('msg_01.txt') as fp:
text = fp.read()
fp.seek(0)
msg = email.message_from_file(fp)
@@ -2171,16 +2518,12 @@ class TestMiscellaneous(TestEmailBase):
g = Generator(s, maxheaderlen=0)
g.flatten(msg)
self.assertEqual(text, s.getvalue())
- finally:
- fp.close()
def test_message_from_string_with_class(self):
unless = self.assertTrue
- fp = openfile('msg_01.txt')
- try:
+ with openfile('msg_01.txt') as fp:
text = fp.read()
- finally:
- fp.close()
+
# Create a subclass
class MyMessage(Message):
pass
@@ -2188,11 +2531,8 @@ class TestMiscellaneous(TestEmailBase):
msg = email.message_from_string(text, MyMessage)
unless(isinstance(msg, MyMessage))
# Try something more complicated
- fp = openfile('msg_02.txt')
- try:
+ with openfile('msg_02.txt') as fp:
text = fp.read()
- finally:
- fp.close()
msg = email.message_from_string(text, MyMessage)
for subpart in msg.walk():
unless(isinstance(subpart, MyMessage))
@@ -2203,92 +2543,88 @@ class TestMiscellaneous(TestEmailBase):
class MyMessage(Message):
pass
- fp = openfile('msg_01.txt')
- try:
+ with openfile('msg_01.txt') as fp:
msg = email.message_from_file(fp, MyMessage)
- finally:
- fp.close()
unless(isinstance(msg, MyMessage))
# Try something more complicated
- fp = openfile('msg_02.txt')
- try:
+ with openfile('msg_02.txt') as fp:
msg = email.message_from_file(fp, MyMessage)
- finally:
- fp.close()
for subpart in msg.walk():
unless(isinstance(subpart, MyMessage))
def test__all__(self):
module = __import__('email')
- all = module.__all__
- all.sort()
- self.assertEqual(all, [
- # Old names
- 'Charset', 'Encoders', 'Errors', 'Generator',
- 'Header', 'Iterators', 'MIMEAudio', 'MIMEBase',
- 'MIMEImage', 'MIMEMessage', 'MIMEMultipart',
- 'MIMENonMultipart', 'MIMEText', 'Message',
- 'Parser', 'Utils', 'base64MIME',
- # new names
- 'base64mime', 'charset', 'encoders', 'errors', 'generator',
- 'header', 'iterators', 'message', 'message_from_file',
- 'message_from_string', 'mime', 'parser',
- 'quopriMIME', 'quoprimime', 'utils',
+ self.assertEqual(sorted(module.__all__), [
+ 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
+ 'generator', 'header', 'iterators', 'message',
+ 'message_from_binary_file', 'message_from_bytes',
+ 'message_from_file', 'message_from_string', 'mime', 'parser',
+ 'quoprimime', 'utils',
])
def test_formatdate(self):
now = time.time()
- self.assertEqual(Utils.parsedate(Utils.formatdate(now))[:6],
+ self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
time.gmtime(now)[:6])
def test_formatdate_localtime(self):
now = time.time()
self.assertEqual(
- Utils.parsedate(Utils.formatdate(now, localtime=True))[:6],
+ utils.parsedate(utils.formatdate(now, localtime=True))[:6],
time.localtime(now)[:6])
def test_formatdate_usegmt(self):
now = time.time()
self.assertEqual(
- Utils.formatdate(now, localtime=False),
+ utils.formatdate(now, localtime=False),
time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
self.assertEqual(
- Utils.formatdate(now, localtime=False, usegmt=True),
+ utils.formatdate(now, localtime=False, usegmt=True),
time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
def test_parsedate_none(self):
- self.assertEqual(Utils.parsedate(''), None)
+ self.assertEqual(utils.parsedate(''), None)
def test_parsedate_compact(self):
# The FWS after the comma is optional
- self.assertEqual(Utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
- Utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
+ self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
+ utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
def test_parsedate_no_dayofweek(self):
eq = self.assertEqual
- eq(Utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
+ eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
(2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
def test_parsedate_compact_no_dayofweek(self):
eq = self.assertEqual
- eq(Utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
+ eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
(2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
+ def test_parsedate_no_space_before_positive_offset(self):
+ self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
+ (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
+
+ def test_parsedate_no_space_before_negative_offset(self):
+ # Issue 1155362: we already handled '+' for this case.
+ self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
+ (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
+
+
def test_parsedate_acceptable_to_time_functions(self):
eq = self.assertEqual
- timetup = Utils.parsedate('5 Feb 2003 13:47:26 -0800')
+ timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
t = int(time.mktime(timetup))
eq(time.localtime(t)[:6], timetup[:6])
eq(int(time.strftime('%Y', timetup)), 2003)
- timetup = Utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
+ timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
t = int(time.mktime(timetup[:9]))
eq(time.localtime(t)[:6], timetup[:6])
eq(int(time.strftime('%Y', timetup[:9])), 2003)
def test_mktime_tz(self):
- self.assertEqual(Utils.mktime_tz((1970, 1, 1, 0, 0, 0,
+ self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
-1, -1, -1, 0)), 0)
- self.assertEqual(Utils.mktime_tz((1970, 1, 1, 0, 0, 0,
+ self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
-1, -1, -1, 1234)), -1234)
def test_parsedate_y2k(self):
@@ -2299,44 +2635,44 @@ class TestMiscellaneous(TestEmailBase):
obsoletes RFC822) requires four-digit years.
"""
- self.assertEqual(Utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
- Utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
- self.assertEqual(Utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
- Utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
+ self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
+ utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
+ self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
+ utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
def test_parseaddr_empty(self):
- self.assertEqual(Utils.parseaddr('<>'), ('', ''))
- self.assertEqual(Utils.formataddr(Utils.parseaddr('<>')), '')
+ self.assertEqual(utils.parseaddr('<>'), ('', ''))
+ self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
def test_noquote_dump(self):
self.assertEqual(
- Utils.formataddr(('A Silly Person', 'person@dom.ain')),
+ utils.formataddr(('A Silly Person', 'person@dom.ain')),
'A Silly Person <person@dom.ain>')
def test_escape_dump(self):
self.assertEqual(
- Utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
+ utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
r'"A \(Very\) Silly Person" <person@dom.ain>')
a = r'A \(Special\) Person'
b = 'person@dom.ain'
- self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b))
+ self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
def test_escape_backslashes(self):
self.assertEqual(
- Utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
+ utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
a = r'Arthur \Backslash\ Foobar'
b = 'person@dom.ain'
- self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b))
+ self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
def test_name_with_dot(self):
x = 'John X. Doe <jxd@example.com>'
y = '"John X. Doe" <jxd@example.com>'
a, b = ('John X. Doe', 'jxd@example.com')
- self.assertEqual(Utils.parseaddr(x), (a, b))
- self.assertEqual(Utils.parseaddr(y), (a, b))
+ self.assertEqual(utils.parseaddr(x), (a, b))
+ self.assertEqual(utils.parseaddr(y), (a, b))
# formataddr() quotes the name if there's a dot in it
- self.assertEqual(Utils.formataddr((a, b)), y)
+ self.assertEqual(utils.formataddr((a, b)), y)
def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
# issue 10005. Note that in the third test the second pair of
@@ -2349,32 +2685,42 @@ class TestMiscellaneous(TestEmailBase):
# not appear in an address outside of a quoted string. It is probably
# a sensible Postel interpretation, though.
eq = self.assertEqual
- eq(Utils.parseaddr('""example" example"@example.com'),
+ eq(utils.parseaddr('""example" example"@example.com'),
('', '""example" example"@example.com'))
- eq(Utils.parseaddr('"\\"example\\" example"@example.com'),
+ eq(utils.parseaddr('"\\"example\\" example"@example.com'),
('', '"\\"example\\" example"@example.com'))
- eq(Utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
+ eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
('', '"\\\\"example\\\\" example"@example.com'))
+ def test_parseaddr_preserves_spaces_in_local_part(self):
+ # issue 9286. A normal RFC5322 local part should not contain any
+ # folding white space, but legacy local parts can (they are a sequence
+ # of atoms, not dotatoms). On the other hand we strip whitespace from
+ # before the @ and around dots, on the assumption that the whitespace
+ # around the punctuation is a mistake in what would otherwise be
+ # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
+ self.assertEqual(('', "merwok wok@xample.com"),
+ utils.parseaddr("merwok wok@xample.com"))
+ self.assertEqual(('', "merwok wok@xample.com"),
+ utils.parseaddr("merwok wok@xample.com"))
+ self.assertEqual(('', "merwok wok@xample.com"),
+ utils.parseaddr(" merwok wok @xample.com"))
+ self.assertEqual(('', 'merwok"wok" wok@xample.com'),
+ utils.parseaddr('merwok"wok" wok@xample.com'))
+ self.assertEqual(('', 'merwok.wok.wok@xample.com'),
+ utils.parseaddr('merwok. wok . wok@xample.com'))
+
def test_multiline_from_comment(self):
x = """\
Foo
\tBar <foo@example.com>"""
- self.assertEqual(Utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
+ self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
def test_quote_dump(self):
self.assertEqual(
- Utils.formataddr(('A Silly; Person', 'person@dom.ain')),
+ utils.formataddr(('A Silly; Person', 'person@dom.ain')),
r'"A Silly; Person" <person@dom.ain>')
- def test_fix_eols(self):
- eq = self.assertEqual
- eq(Utils.fix_eols('hello'), 'hello')
- eq(Utils.fix_eols('hello\n'), 'hello\r\n')
- eq(Utils.fix_eols('hello\r'), 'hello\r\n')
- eq(Utils.fix_eols('hello\r\n'), 'hello\r\n')
- eq(Utils.fix_eols('hello\n\r'), 'hello\r\n\r\n')
-
def test_charset_richcomparisons(self):
eq = self.assertEqual
ne = self.assertNotEqual
@@ -2397,25 +2743,25 @@ Foo
def test_getaddresses(self):
eq = self.assertEqual
- eq(Utils.getaddresses(['aperson@dom.ain (Al Person)',
+ eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
'Bud Person <bperson@dom.ain>']),
[('Al Person', 'aperson@dom.ain'),
('Bud Person', 'bperson@dom.ain')])
def test_getaddresses_nasty(self):
eq = self.assertEqual
- eq(Utils.getaddresses(['foo: ;']), [('', '')])
- eq(Utils.getaddresses(
+ eq(utils.getaddresses(['foo: ;']), [('', '')])
+ eq(utils.getaddresses(
['[]*-- =~$']),
[('', ''), ('', ''), ('', '*--')])
- eq(Utils.getaddresses(
+ eq(utils.getaddresses(
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
def test_getaddresses_embedded_comment(self):
"""Test proper handling of a nested comment"""
eq = self.assertEqual
- addrs = Utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
+ addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
eq(addrs[0][1], 'foo@bar.com')
def test_utils_quote_unquote(self):
@@ -2439,9 +2785,9 @@ Foo
eq(charsets[0], 'utf-8')
charset = Charset(charsets[0])
eq(charset.get_body_encoding(), 'base64')
- msg.set_payload('hello world', charset=charset)
+ msg.set_payload(b'hello world', charset=charset)
eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
- eq(msg.get_payload(decode=True), 'hello world')
+ eq(msg.get_payload(decode=True), b'hello world')
eq(msg['content-transfer-encoding'], 'base64')
# Try another one
msg = Message()
@@ -2450,7 +2796,7 @@ Foo
eq(len(charsets), 1)
eq(charsets[0], 'us-ascii')
charset = Charset(charsets[0])
- eq(charset.get_body_encoding(), Encoders.encode_7or8bit)
+ eq(charset.get_body_encoding(), encoders.encode_7or8bit)
msg.set_payload('hello world', charset=charset)
eq(msg.get_payload(), 'hello world')
eq(msg['content-transfer-encoding'], '7bit')
@@ -2468,7 +2814,7 @@ Foo
# unreadline() of NeedMoreData.
msg = self._msgobj('msg_43.txt')
sfp = StringIO()
- Iterators._structure(msg, sfp)
+ iterators._structure(msg, sfp)
eq(sfp.getvalue(), """\
multipart/report
text/plain
@@ -2502,6 +2848,10 @@ multipart/report
text/rfc822-headers
""")
+ def test_make_msgid_domain(self):
+ self.assertEqual(
+ email.utils.make_msgid(domain='testdomain-string')[-19:],
+ '@testdomain-string>')
# Test the iterator/generators
@@ -2511,25 +2861,22 @@ class TestIterators(TestEmailBase):
neq = self.ndiffAssertEqual
# First a simple non-multipart message
msg = self._msgobj('msg_01.txt')
- it = Iterators.body_line_iterator(msg)
+ it = iterators.body_line_iterator(msg)
lines = list(it)
eq(len(lines), 6)
neq(EMPTYSTRING.join(lines), msg.get_payload())
# Now a more complicated multipart
msg = self._msgobj('msg_02.txt')
- it = Iterators.body_line_iterator(msg)
+ it = iterators.body_line_iterator(msg)
lines = list(it)
eq(len(lines), 43)
- fp = openfile('msg_19.txt')
- try:
+ with openfile('msg_19.txt') as fp:
neq(EMPTYSTRING.join(lines), fp.read())
- finally:
- fp.close()
def test_typed_subpart_iterator(self):
eq = self.assertEqual
msg = self._msgobj('msg_04.txt')
- it = Iterators.typed_subpart_iterator(msg, 'text')
+ it = iterators.typed_subpart_iterator(msg, 'text')
lines = []
subparts = 0
for subpart in it:
@@ -2546,7 +2893,7 @@ to reflect upon our own
def test_typed_subpart_iterator_default_type(self):
eq = self.assertEqual
msg = self._msgobj('msg_03.txt')
- it = Iterators.typed_subpart_iterator(msg, 'text', 'plain')
+ it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
lines = []
subparts = 0
for subpart in it:
@@ -2601,11 +2948,8 @@ class TestParsers(TestEmailBase):
def test_header_parser(self):
eq = self.assertEqual
# Parse only the headers of a complex multipart MIME document
- fp = openfile('msg_02.txt')
- try:
+ with openfile('msg_02.txt') as fp:
msg = HeaderParser().parse(fp)
- finally:
- fp.close()
eq(msg['from'], 'ppp-request@zzz.org')
eq(msg['to'], 'ppp@zzz.org')
eq(msg.get_content_type(), 'multipart/mixed')
@@ -2650,11 +2994,8 @@ Here's the message body
def test_crlf_separation(self):
eq = self.assertEqual
- fp = openfile('msg_26.txt', mode='rb')
- try:
+ with openfile('msg_26.txt', newline='\n') as fp:
msg = Parser().parse(fp)
- finally:
- fp.close()
eq(len(msg.get_payload()), 2)
part1 = msg.get_payload(0)
eq(part1.get_content_type(), 'text/plain')
@@ -2662,14 +3003,23 @@ Here's the message body
part2 = msg.get_payload(1)
eq(part2.get_content_type(), 'application/riscos')
+ def test_crlf_flatten(self):
+ # Using newline='\n' preserves the crlfs in this input file.
+ with openfile('msg_26.txt', newline='\n') as fp:
+ text = fp.read()
+ msg = email.message_from_string(text)
+ s = StringIO()
+ g = Generator(s)
+ g.flatten(msg, linesep='\r\n')
+ self.assertEqual(s.getvalue(), text)
+
+ maxDiff = None
+
def test_multipart_digest_with_extra_mime_headers(self):
eq = self.assertEqual
neq = self.ndiffAssertEqual
- fp = openfile('msg_28.txt')
- try:
+ with openfile('msg_28.txt') as fp:
msg = email.message_from_file(fp)
- finally:
- fp.close()
# Structure is:
# multipart/digest
# message/rfc822
@@ -2719,10 +3069,8 @@ Here's the message body
eq = self.assertEqual
m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
msg = email.message_from_string(m)
- eq(len(msg.keys()), 3)
- keys = msg.keys()
- keys.sort()
- eq(keys, ['!"#QUX;~', '>From', 'From'])
+ eq(len(msg), 3)
+ eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
eq(msg.get_payload(), 'body')
def test_rfc2822_space_not_allowed_in_header(self):
@@ -2760,11 +3108,446 @@ Here's the message body
self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
+class Test8BitBytesHandling(unittest.TestCase):
+ # In Python3 all input is string, but that doesn't work if the actual input
+ # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
+ # decode byte streams using the surrogateescape error handler, and
+ # reconvert to binary at appropriate places if we detect surrogates. This
+ # doesn't allow us to transform headers with 8bit bytes (they get munged),
+ # but it does allow us to parse and preserve them, and to decode body
+ # parts that use an 8bit CTE.
+
+ bodytest_msg = textwrap.dedent("""\
+ From: foo@bar.com
+ To: baz
+ Mime-Version: 1.0
+ Content-Type: text/plain; charset={charset}
+ Content-Transfer-Encoding: {cte}
+
+ {bodyline}
+ """)
+
+ def test_known_8bit_CTE(self):
+ m = self.bodytest_msg.format(charset='utf-8',
+ cte='8bit',
+ bodyline='pöstal').encode('utf-8')
+ msg = email.message_from_bytes(m)
+ self.assertEqual(msg.get_payload(), "pöstal\n")
+ self.assertEqual(msg.get_payload(decode=True),
+ "pöstal\n".encode('utf-8'))
+
+ def test_unknown_8bit_CTE(self):
+ m = self.bodytest_msg.format(charset='notavalidcharset',
+ cte='8bit',
+ bodyline='pöstal').encode('utf-8')
+ msg = email.message_from_bytes(m)
+ self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
+ self.assertEqual(msg.get_payload(decode=True),
+ "pöstal\n".encode('utf-8'))
+
+ def test_8bit_in_quopri_body(self):
+ # This is non-RFC compliant data...without 'decode' the library code
+ # decodes the body using the charset from the headers, and because the
+ # source byte really is utf-8 this works. This is likely to fail
+ # against real dirty data (ie: produce mojibake), but the data is
+ # invalid anyway so it is as good a guess as any. But this means that
+ # this test just confirms the current behavior; that behavior is not
+ # necessarily the best possible behavior. With 'decode' it is
+ # returning the raw bytes, so that test should be of correct behavior,
+ # or at least produce the same result that email4 did.
+ m = self.bodytest_msg.format(charset='utf-8',
+ cte='quoted-printable',
+ bodyline='p=C3=B6stál').encode('utf-8')
+ msg = email.message_from_bytes(m)
+ self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
+ self.assertEqual(msg.get_payload(decode=True),
+ 'pöstál\n'.encode('utf-8'))
+
+ def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
+ # This is similar to the previous test, but proves that if the 8bit
+ # byte is undecodeable in the specified charset, it gets replaced
+ # by the unicode 'unknown' character. Again, this may or may not
+ # be the ideal behavior. Note that if decode=False none of the
+ # decoders will get involved, so this is the only test we need
+ # for this behavior.
+ m = self.bodytest_msg.format(charset='ascii',
+ cte='quoted-printable',
+ bodyline='p=C3=B6stál').encode('utf-8')
+ msg = email.message_from_bytes(m)
+ self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
+ self.assertEqual(msg.get_payload(decode=True),
+ 'pöstál\n'.encode('utf-8'))
+
+ def test_8bit_in_base64_body(self):
+ # Sticking an 8bit byte in a base64 block makes it undecodable by
+ # normal means, so the block is returned undecoded, but as bytes.
+ m = self.bodytest_msg.format(charset='utf-8',
+ cte='base64',
+ bodyline='cMO2c3RhbAá=').encode('utf-8')
+ msg = email.message_from_bytes(m)
+ self.assertEqual(msg.get_payload(decode=True),
+ 'cMO2c3RhbAá=\n'.encode('utf-8'))
+
+ def test_8bit_in_uuencode_body(self):
+ # Sticking an 8bit byte in a uuencode block makes it undecodable by
+ # normal means, so the block is returned undecoded, but as bytes.
+ m = self.bodytest_msg.format(charset='utf-8',
+ cte='uuencode',
+ bodyline='<,.V<W1A; á ').encode('utf-8')
+ msg = email.message_from_bytes(m)
+ self.assertEqual(msg.get_payload(decode=True),
+ '<,.V<W1A; á \n'.encode('utf-8'))
+
+
+ headertest_headers = (
+ ('From: foo@bar.com', ('From', 'foo@bar.com')),
+ ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
+ ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
+ '\tJean de Baddie',
+ ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
+ 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
+ ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
+ ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
+ )
+ headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
+ '\nYes, they are flying.\n').encode('utf-8')
+
+ def test_get_8bit_header(self):
+ msg = email.message_from_bytes(self.headertest_msg)
+ self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
+ self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
+
+ def test_print_8bit_headers(self):
+ msg = email.message_from_bytes(self.headertest_msg)
+ self.assertEqual(str(msg),
+ textwrap.dedent("""\
+ From: {}
+ To: {}
+ Subject: {}
+ From: {}
+
+ Yes, they are flying.
+ """).format(*[expected[1] for (_, expected) in
+ self.headertest_headers]))
+
+ def test_values_with_8bit_headers(self):
+ msg = email.message_from_bytes(self.headertest_msg)
+ self.assertListEqual([str(x) for x in msg.values()],
+ ['foo@bar.com',
+ 'b\uFFFD\uFFFDz',
+ 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
+ 'coll\uFFFD\uFFFDgue, le pouf '
+ 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
+ '\tJean de Baddie',
+ "g\uFFFD\uFFFDst"])
+
+ def test_items_with_8bit_headers(self):
+ msg = email.message_from_bytes(self.headertest_msg)
+ self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
+ [('From', 'foo@bar.com'),
+ ('To', 'b\uFFFD\uFFFDz'),
+ ('Subject', 'Maintenant je vous '
+ 'pr\uFFFD\uFFFDsente '
+ 'mon coll\uFFFD\uFFFDgue, le pouf '
+ 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
+ '\tJean de Baddie'),
+ ('From', 'g\uFFFD\uFFFDst')])
+
+ def test_get_all_with_8bit_headers(self):
+ msg = email.message_from_bytes(self.headertest_msg)
+ self.assertListEqual([str(x) for x in msg.get_all('from')],
+ ['foo@bar.com',
+ 'g\uFFFD\uFFFDst'])
+
+ def test_get_content_type_with_8bit(self):
+ msg = email.message_from_bytes(textwrap.dedent("""\
+ Content-Type: text/pl\xA7in; charset=utf-8
+ """).encode('latin-1'))
+ self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
+ self.assertEqual(msg.get_content_maintype(), "text")
+ self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
+
+ def test_get_params_with_8bit(self):
+ msg = email.message_from_bytes(
+ 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
+ self.assertEqual(msg.get_params(header='x-header'),
+ [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
+ self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
+ # XXX: someday you might be able to get 'b\xa7r', for now you can't.
+ self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
+
+ def test_get_rfc2231_params_with_8bit(self):
+ msg = email.message_from_bytes(textwrap.dedent("""\
+ Content-Type: text/plain; charset=us-ascii;
+ title*=us-ascii'en'This%20is%20not%20f\xa7n"""
+ ).encode('latin-1'))
+ self.assertEqual(msg.get_param('title'),
+ ('us-ascii', 'en', 'This is not f\uFFFDn'))
+
+ def test_set_rfc2231_params_with_8bit(self):
+ msg = email.message_from_bytes(textwrap.dedent("""\
+ Content-Type: text/plain; charset=us-ascii;
+ title*=us-ascii'en'This%20is%20not%20f\xa7n"""
+ ).encode('latin-1'))
+ msg.set_param('title', 'test')
+ self.assertEqual(msg.get_param('title'), 'test')
+
+ def test_del_rfc2231_params_with_8bit(self):
+ msg = email.message_from_bytes(textwrap.dedent("""\
+ Content-Type: text/plain; charset=us-ascii;
+ title*=us-ascii'en'This%20is%20not%20f\xa7n"""
+ ).encode('latin-1'))
+ msg.del_param('title')
+ self.assertEqual(msg.get_param('title'), None)
+ self.assertEqual(msg.get_content_maintype(), 'text')
+
+ def test_get_payload_with_8bit_cte_header(self):
+ msg = email.message_from_bytes(textwrap.dedent("""\
+ Content-Transfer-Encoding: b\xa7se64
+ Content-Type: text/plain; charset=latin-1
+
+ payload
+ """).encode('latin-1'))
+ self.assertEqual(msg.get_payload(), 'payload\n')
+ self.assertEqual(msg.get_payload(decode=True), b'payload\n')
+
+ non_latin_bin_msg = textwrap.dedent("""\
+ From: foo@bar.com
+ To: báz
+ Subject: Maintenant je vous présente mon collègue, le pouf célèbre
+ \tJean de Baddie
+ Mime-Version: 1.0
+ Content-Type: text/plain; charset="utf-8"
+ Content-Transfer-Encoding: 8bit
+
+ Да, они летÑÑ‚.
+ """).encode('utf-8')
+
+ def test_bytes_generator(self):
+ msg = email.message_from_bytes(self.non_latin_bin_msg)
+ out = BytesIO()
+ email.generator.BytesGenerator(out).flatten(msg)
+ self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
+
+ def test_bytes_generator_handles_None_body(self):
+ #Issue 11019
+ msg = email.message.Message()
+ out = BytesIO()
+ email.generator.BytesGenerator(out).flatten(msg)
+ self.assertEqual(out.getvalue(), b"\n")
+
+ non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
+ From: foo@bar.com
+ To: =?unknown-8bit?q?b=C3=A1z?=
+ Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
+ =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
+ =?unknown-8bit?q?_Jean_de_Baddie?=
+ Mime-Version: 1.0
+ Content-Type: text/plain; charset="utf-8"
+ Content-Transfer-Encoding: base64
+
+ 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
+ """)
+
+ def test_generator_handles_8bit(self):
+ msg = email.message_from_bytes(self.non_latin_bin_msg)
+ out = StringIO()
+ email.generator.Generator(out).flatten(msg)
+ self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
+
+ def test_bytes_generator_with_unix_from(self):
+ # The unixfrom contains a current date, so we can't check it
+ # literally. Just make sure the first word is 'From' and the
+ # rest of the message matches the input.
+ msg = email.message_from_bytes(self.non_latin_bin_msg)
+ out = BytesIO()
+ email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
+ lines = out.getvalue().split(b'\n')
+ self.assertEqual(lines[0].split()[0], b'From')
+ self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
+
+ non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
+ non_latin_bin_msg_as7bit[2:4] = [
+ 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
+ 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
+ non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
+
+ def test_message_from_binary_file(self):
+ fn = 'test.msg'
+ self.addCleanup(unlink, fn)
+ with open(fn, 'wb') as testfile:
+ testfile.write(self.non_latin_bin_msg)
+ with open(fn, 'rb') as testfile:
+ m = email.parser.BytesParser().parse(testfile)
+ self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
+
+ latin_bin_msg = textwrap.dedent("""\
+ From: foo@bar.com
+ To: Dinsdale
+ Subject: Nudge nudge, wink, wink
+ Mime-Version: 1.0
+ Content-Type: text/plain; charset="latin-1"
+ Content-Transfer-Encoding: 8bit
+
+ oh là là, know what I mean, know what I mean?
+ """).encode('latin-1')
+
+ latin_bin_msg_as7bit = textwrap.dedent("""\
+ From: foo@bar.com
+ To: Dinsdale
+ Subject: Nudge nudge, wink, wink
+ Mime-Version: 1.0
+ Content-Type: text/plain; charset="iso-8859-1"
+ Content-Transfer-Encoding: quoted-printable
+
+ oh l=E0 l=E0, know what I mean, know what I mean?
+ """)
+
+ def test_string_generator_reencodes_to_quopri_when_appropriate(self):
+ m = email.message_from_bytes(self.latin_bin_msg)
+ self.assertEqual(str(m), self.latin_bin_msg_as7bit)
+
+ def test_decoded_generator_emits_unicode_body(self):
+ m = email.message_from_bytes(self.latin_bin_msg)
+ out = StringIO()
+ email.generator.DecodedGenerator(out).flatten(m)
+ #DecodedHeader output contains an extra blank line compared
+ #to the input message. RDM: not sure if this is a bug or not,
+ #but it is not specific to the 8bit->7bit conversion.
+ self.assertEqual(out.getvalue(),
+ self.latin_bin_msg.decode('latin-1')+'\n')
+
+ def test_bytes_feedparser(self):
+ bfp = email.feedparser.BytesFeedParser()
+ for i in range(0, len(self.latin_bin_msg), 10):
+ bfp.feed(self.latin_bin_msg[i:i+10])
+ m = bfp.close()
+ self.assertEqual(str(m), self.latin_bin_msg_as7bit)
+
+ def test_crlf_flatten(self):
+ with openfile('msg_26.txt', 'rb') as fp:
+ text = fp.read()
+ msg = email.message_from_bytes(text)
+ s = BytesIO()
+ g = email.generator.BytesGenerator(s)
+ g.flatten(msg, linesep='\r\n')
+ self.assertEqual(s.getvalue(), text)
+
+ def test_8bit_multipart(self):
+ # Issue 11605
+ source = textwrap.dedent("""\
+ Date: Fri, 18 Mar 2011 17:15:43 +0100
+ To: foo@example.com
+ From: foodwatch-Newsletter <bar@example.com>
+ Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
+ Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
+ MIME-Version: 1.0
+ Content-Type: multipart/alternative;
+ boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
+
+ --b1_76a486bee62b0d200f33dc2ca08220ad
+ Content-Type: text/plain; charset="utf-8"
+ Content-Transfer-Encoding: 8bit
+
+ Guten Tag, ,
+
+ mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
+ Nachrichten aus Japan.
+
+
+ --b1_76a486bee62b0d200f33dc2ca08220ad
+ Content-Type: text/html; charset="utf-8"
+ Content-Transfer-Encoding: 8bit
+
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+ "http://www.w3.org/TR/html4/loose.dtd">
+ <html lang="de">
+ <head>
+ <title>foodwatch - Newsletter</title>
+ </head>
+ <body>
+ <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
+ die Nachrichten aus Japan.</p>
+ </body>
+ </html>
+ --b1_76a486bee62b0d200f33dc2ca08220ad--
+
+ """).encode('utf-8')
+ msg = email.message_from_bytes(source)
+ s = BytesIO()
+ g = email.generator.BytesGenerator(s)
+ g.flatten(msg)
+ self.assertEqual(s.getvalue(), source)
+
+ def test_bytes_generator_b_encoding_linesep(self):
+ # Issue 14062: b encoding was tacking on an extra \n.
+ m = Message()
+ # This has enough non-ascii that it should always end up b encoded.
+ m['Subject'] = Header('žluÅ¥ouÄký kůň')
+ s = BytesIO()
+ g = email.generator.BytesGenerator(s)
+ g.flatten(m, linesep='\r\n')
+ self.assertEqual(
+ s.getvalue(),
+ b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
+
+ def test_generator_b_encoding_linesep(self):
+ # Since this broke in ByteGenerator, test Generator for completeness.
+ m = Message()
+ # This has enough non-ascii that it should always end up b encoded.
+ m['Subject'] = Header('žluÅ¥ouÄký kůň')
+ s = StringIO()
+ g = email.generator.Generator(s)
+ g.flatten(m, linesep='\r\n')
+ self.assertEqual(
+ s.getvalue(),
+ 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
+
+ maxDiff = None
+
+
+class BaseTestBytesGeneratorIdempotent:
+
+ maxDiff = None
+
+ def _msgobj(self, filename):
+ with openfile(filename, 'rb') as fp:
+ data = fp.read()
+ data = self.normalize_linesep_regex.sub(self.blinesep, data)
+ msg = email.message_from_bytes(data)
+ return msg, data
+
+ def _idempotent(self, msg, data, unixfrom=False):
+ b = BytesIO()
+ g = email.generator.BytesGenerator(b, maxheaderlen=0)
+ g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
+ self.assertByteStringsEqual(data, b.getvalue())
+
+ def assertByteStringsEqual(self, str1, str2):
+ # Not using self.blinesep here is intentional. This way the output
+ # is more useful when the failure results in mixed line endings.
+ self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
+
+
+class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
+ TestIdempotent):
+ linesep = '\n'
+ blinesep = b'\n'
+ normalize_linesep_regex = re.compile(br'\r\n')
+
+
+class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
+ TestIdempotent):
+ linesep = '\r\n'
+ blinesep = b'\r\n'
+ normalize_linesep_regex = re.compile(br'(?<!\r)\n')
+
+
class TestBase64(unittest.TestCase):
def test_len(self):
eq = self.assertEqual
- eq(base64MIME.base64_len('hello'),
- len(base64MIME.encode('hello', eol='')))
+ eq(base64mime.header_length('hello'),
+ len(base64mime.body_encode(b'hello', eol='')))
for size in range(15):
if size == 0 : bsize = 0
elif size <= 3 : bsize = 4
@@ -2772,31 +3555,29 @@ class TestBase64(unittest.TestCase):
elif size <= 9 : bsize = 12
elif size <= 12: bsize = 16
else : bsize = 20
- eq(base64MIME.base64_len('x'*size), bsize)
+ eq(base64mime.header_length('x' * size), bsize)
def test_decode(self):
eq = self.assertEqual
- eq(base64MIME.decode(''), '')
- eq(base64MIME.decode('aGVsbG8='), 'hello')
- eq(base64MIME.decode('aGVsbG8=', 'X'), 'hello')
- eq(base64MIME.decode('aGVsbG8NCndvcmxk\n', 'X'), 'helloXworld')
+ eq(base64mime.decode(''), b'')
+ eq(base64mime.decode('aGVsbG8='), b'hello')
def test_encode(self):
eq = self.assertEqual
- eq(base64MIME.encode(''), '')
- eq(base64MIME.encode('hello'), 'aGVsbG8=\n')
+ eq(base64mime.body_encode(b''), b'')
+ eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
# Test the binary flag
- eq(base64MIME.encode('hello\n'), 'aGVsbG8K\n')
- eq(base64MIME.encode('hello\n', 0), 'aGVsbG8NCg==\n')
+ eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
# Test the maxlinelen arg
- eq(base64MIME.encode('xxxx ' * 20, maxlinelen=40), """\
+ eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
eHh4eCB4eHh4IA==
""")
# Test the eol argument
- eq(base64MIME.encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
+ eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
+ """\
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
@@ -2805,134 +3586,322 @@ eHh4eCB4eHh4IA==\r
def test_header_encode(self):
eq = self.assertEqual
- he = base64MIME.header_encode
+ he = base64mime.header_encode
eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
- eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
+ eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
+ eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
# Test the charset option
eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
- # Test the keep_eols flag
- eq(he('hello\nworld', keep_eols=True),
- '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
- # Test the maxlinelen argument
- eq(he('xxxx ' * 20, maxlinelen=40), """\
-=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHggeHg=?=
- =?iso-8859-1?b?eHggeHh4eCB4eHh4IHh4eHg=?=
- =?iso-8859-1?b?IHh4eHggeHh4eCB4eHh4IHg=?=
- =?iso-8859-1?b?eHh4IHh4eHggeHh4eCB4eHg=?=
- =?iso-8859-1?b?eCB4eHh4IHh4eHggeHh4eCA=?=
- =?iso-8859-1?b?eHh4eCB4eHh4IHh4eHgg?=""")
- # Test the eol argument
- eq(he('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
-=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHggeHg=?=\r
- =?iso-8859-1?b?eHggeHh4eCB4eHh4IHh4eHg=?=\r
- =?iso-8859-1?b?IHh4eHggeHh4eCB4eHh4IHg=?=\r
- =?iso-8859-1?b?eHh4IHh4eHggeHh4eCB4eHg=?=\r
- =?iso-8859-1?b?eCB4eHh4IHh4eHggeHh4eCA=?=\r
- =?iso-8859-1?b?eHh4eCB4eHh4IHh4eHgg?=""")
+ eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
class TestQuopri(unittest.TestCase):
def setUp(self):
- self.hlit = [chr(x) for x in range(ord('a'), ord('z')+1)] + \
- [chr(x) for x in range(ord('A'), ord('Z')+1)] + \
- [chr(x) for x in range(ord('0'), ord('9')+1)] + \
- ['!', '*', '+', '-', '/', ' ']
- self.hnon = [chr(x) for x in range(256) if chr(x) not in self.hlit]
+ # Set of characters (as byte integers) that don't need to be encoded
+ # in headers.
+ self.hlit = list(chain(
+ range(ord('a'), ord('z') + 1),
+ range(ord('A'), ord('Z') + 1),
+ range(ord('0'), ord('9') + 1),
+ (c for c in b'!*+-/')))
+ # Set of characters (as byte integers) that do need to be encoded in
+ # headers.
+ self.hnon = [c for c in range(256) if c not in self.hlit]
assert len(self.hlit) + len(self.hnon) == 256
- self.blit = [chr(x) for x in range(ord(' '), ord('~')+1)] + ['\t']
- self.blit.remove('=')
- self.bnon = [chr(x) for x in range(256) if chr(x) not in self.blit]
+ # Set of characters (as byte integers) that don't need to be encoded
+ # in bodies.
+ self.blit = list(range(ord(' '), ord('~') + 1))
+ self.blit.append(ord('\t'))
+ self.blit.remove(ord('='))
+ # Set of characters (as byte integers) that do need to be encoded in
+ # bodies.
+ self.bnon = [c for c in range(256) if c not in self.blit]
assert len(self.blit) + len(self.bnon) == 256
- def test_header_quopri_check(self):
+ def test_quopri_header_check(self):
for c in self.hlit:
- self.assertFalse(quopriMIME.header_quopri_check(c))
+ self.assertFalse(quoprimime.header_check(c),
+ 'Should not be header quopri encoded: %s' % chr(c))
for c in self.hnon:
- self.assertTrue(quopriMIME.header_quopri_check(c))
+ self.assertTrue(quoprimime.header_check(c),
+ 'Should be header quopri encoded: %s' % chr(c))
- def test_body_quopri_check(self):
+ def test_quopri_body_check(self):
for c in self.blit:
- self.assertFalse(quopriMIME.body_quopri_check(c))
+ self.assertFalse(quoprimime.body_check(c),
+ 'Should not be body quopri encoded: %s' % chr(c))
for c in self.bnon:
- self.assertTrue(quopriMIME.body_quopri_check(c))
+ self.assertTrue(quoprimime.body_check(c),
+ 'Should be body quopri encoded: %s' % chr(c))
def test_header_quopri_len(self):
eq = self.assertEqual
- hql = quopriMIME.header_quopri_len
- enc = quopriMIME.header_encode
- for s in ('hello', 'h@e@l@l@o@'):
- # Empty charset and no line-endings. 7 == RFC chrome
- eq(hql(s), len(enc(s, charset='', eol=''))-7)
+ eq(quoprimime.header_length(b'hello'), 5)
+ # RFC 2047 chrome is not included in header_length().
+ eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
+ quoprimime.header_length(b'hello') +
+ # =?xxx?q?...?= means 10 extra characters
+ 10)
+ eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
+ # RFC 2047 chrome is not included in header_length().
+ eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
+ quoprimime.header_length(b'h@e@l@l@o@') +
+ # =?xxx?q?...?= means 10 extra characters
+ 10)
for c in self.hlit:
- eq(hql(c), 1)
+ eq(quoprimime.header_length(bytes([c])), 1,
+ 'expected length 1 for %r' % chr(c))
for c in self.hnon:
- eq(hql(c), 3)
+ # Space is special; it's encoded to _
+ if c == ord(' '):
+ continue
+ eq(quoprimime.header_length(bytes([c])), 3,
+ 'expected length 3 for %r' % chr(c))
+ eq(quoprimime.header_length(b' '), 1)
def test_body_quopri_len(self):
eq = self.assertEqual
- bql = quopriMIME.body_quopri_len
for c in self.blit:
- eq(bql(c), 1)
+ eq(quoprimime.body_length(bytes([c])), 1)
for c in self.bnon:
- eq(bql(c), 3)
+ eq(quoprimime.body_length(bytes([c])), 3)
def test_quote_unquote_idempotent(self):
for x in range(256):
c = chr(x)
- self.assertEqual(quopriMIME.unquote(quopriMIME.quote(c)), c)
+ self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
- def test_header_encode(self):
- eq = self.assertEqual
- he = quopriMIME.header_encode
- eq(he('hello'), '=?iso-8859-1?q?hello?=')
- eq(he('hello\nworld'), '=?iso-8859-1?q?hello=0D=0Aworld?=')
- # Test the charset option
- eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
- # Test the keep_eols flag
- eq(he('hello\nworld', keep_eols=True), '=?iso-8859-1?q?hello=0Aworld?=')
- # Test a non-ASCII character
- eq(he('hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
- # Test the maxlinelen argument
- eq(he('xxxx ' * 20, maxlinelen=40), """\
-=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
- =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
- =?iso-8859-1?q?_xxxx_xxxx_xxxx_xxxx_x?=
- =?iso-8859-1?q?xxx_xxxx_xxxx_xxxx_xxx?=
- =?iso-8859-1?q?x_xxxx_xxxx_?=""")
- # Test the eol argument
- eq(he('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
-=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=\r
- =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=\r
- =?iso-8859-1?q?_xxxx_xxxx_xxxx_xxxx_x?=\r
- =?iso-8859-1?q?xxx_xxxx_xxxx_xxxx_xxx?=\r
- =?iso-8859-1?q?x_xxxx_xxxx_?=""")
+ def _test_header_encode(self, header, expected_encoded_header, charset=None):
+ if charset is None:
+ encoded_header = quoprimime.header_encode(header)
+ else:
+ encoded_header = quoprimime.header_encode(header, charset)
+ self.assertEqual(encoded_header, expected_encoded_header)
- def test_decode(self):
- eq = self.assertEqual
- eq(quopriMIME.decode(''), '')
- eq(quopriMIME.decode('hello'), 'hello')
- eq(quopriMIME.decode('hello', 'X'), 'hello')
- eq(quopriMIME.decode('hello\nworld', 'X'), 'helloXworld')
+ def test_header_encode_null(self):
+ self._test_header_encode(b'', '')
+
+ def test_header_encode_one_word(self):
+ self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
+
+ def test_header_encode_two_lines(self):
+ self._test_header_encode(b'hello\nworld',
+ '=?iso-8859-1?q?hello=0Aworld?=')
+
+ def test_header_encode_non_ascii(self):
+ self._test_header_encode(b'hello\xc7there',
+ '=?iso-8859-1?q?hello=C7there?=')
+
+ def test_header_encode_alt_charset(self):
+ self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
+ charset='iso-8859-2')
+
+ def _test_header_decode(self, encoded_header, expected_decoded_header):
+ decoded_header = quoprimime.header_decode(encoded_header)
+ self.assertEqual(decoded_header, expected_decoded_header)
+
+ def test_header_decode_null(self):
+ self._test_header_decode('', '')
+
+ def test_header_decode_one_word(self):
+ self._test_header_decode('hello', 'hello')
+
+ def test_header_decode_two_lines(self):
+ self._test_header_decode('hello=0Aworld', 'hello\nworld')
+
+ def test_header_decode_non_ascii(self):
+ self._test_header_decode('hello=C7there', 'hello\xc7there')
+
+ def _test_decode(self, encoded, expected_decoded, eol=None):
+ if eol is None:
+ decoded = quoprimime.decode(encoded)
+ else:
+ decoded = quoprimime.decode(encoded, eol=eol)
+ self.assertEqual(decoded, expected_decoded)
+
+ def test_decode_null_word(self):
+ self._test_decode('', '')
+
+ def test_decode_null_line_null_word(self):
+ self._test_decode('\r\n', '\n')
+
+ def test_decode_one_word(self):
+ self._test_decode('hello', 'hello')
+
+ def test_decode_one_word_eol(self):
+ self._test_decode('hello', 'hello', eol='X')
+
+ def test_decode_one_line(self):
+ self._test_decode('hello\r\n', 'hello\n')
+
+ def test_decode_one_line_lf(self):
+ self._test_decode('hello\n', 'hello\n')
+
+ def test_decode_one_line_cr(self):
+ self._test_decode('hello\r', 'hello\n')
+
+ def test_decode_one_line_nl(self):
+ self._test_decode('hello\n', 'helloX', eol='X')
+
+ def test_decode_one_line_crnl(self):
+ self._test_decode('hello\r\n', 'helloX', eol='X')
+
+ def test_decode_one_line_one_word(self):
+ self._test_decode('hello\r\nworld', 'hello\nworld')
+
+ def test_decode_one_line_one_word_eol(self):
+ self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
+
+ def test_decode_two_lines(self):
+ self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
+
+ def test_decode_two_lines_eol(self):
+ self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
+
+ def test_decode_one_long_line(self):
+ self._test_decode('Spam' * 250, 'Spam' * 250)
+
+ def test_decode_one_space(self):
+ self._test_decode(' ', '')
+
+ def test_decode_multiple_spaces(self):
+ self._test_decode(' ' * 5, '')
+
+ def test_decode_one_line_trailing_spaces(self):
+ self._test_decode('hello \r\n', 'hello\n')
+
+ def test_decode_two_lines_trailing_spaces(self):
+ self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
+
+ def test_decode_quoted_word(self):
+ self._test_decode('=22quoted=20words=22', '"quoted words"')
+
+ def test_decode_uppercase_quoting(self):
+ self._test_decode('ab=CD=EF', 'ab\xcd\xef')
+
+ def test_decode_lowercase_quoting(self):
+ self._test_decode('ab=cd=ef', 'ab\xcd\xef')
+
+ def test_decode_soft_line_break(self):
+ self._test_decode('soft line=\r\nbreak', 'soft linebreak')
+
+ def test_decode_false_quoting(self):
+ self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
+
+ def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
+ kwargs = {}
+ if maxlinelen is None:
+ # Use body_encode's default.
+ maxlinelen = 76
+ else:
+ kwargs['maxlinelen'] = maxlinelen
+ if eol is None:
+ # Use body_encode's default.
+ eol = '\n'
+ else:
+ kwargs['eol'] = eol
+ encoded_body = quoprimime.body_encode(body, **kwargs)
+ self.assertEqual(encoded_body, expected_encoded_body)
+ if eol == '\n' or eol == '\r\n':
+ # We know how to split the result back into lines, so maxlinelen
+ # can be checked.
+ for line in encoded_body.splitlines():
+ self.assertLessEqual(len(line), maxlinelen)
+
+ def test_encode_null(self):
+ self._test_encode('', '')
+
+ def test_encode_null_lines(self):
+ self._test_encode('\n\n', '\n\n')
+
+ def test_encode_one_line(self):
+ self._test_encode('hello\n', 'hello\n')
+
+ def test_encode_one_line_crlf(self):
+ self._test_encode('hello\r\n', 'hello\n')
+
+ def test_encode_one_line_eol(self):
+ self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
+
+ def test_encode_one_space(self):
+ self._test_encode(' ', '=20')
+
+ def test_encode_one_line_one_space(self):
+ self._test_encode(' \n', '=20\n')
+
+# XXX: body_encode() expect strings, but uses ord(char) from these strings
+# to index into a 256-entry list. For code points above 255, this will fail.
+# Should there be a check for 8-bit only ord() values in body, or at least
+# a comment about the expected input?
+
+ def test_encode_two_lines_one_space(self):
+ self._test_encode(' \n \n', '=20\n=20\n')
+
+ def test_encode_one_word_trailing_spaces(self):
+ self._test_encode('hello ', 'hello =20')
+
+ def test_encode_one_line_trailing_spaces(self):
+ self._test_encode('hello \n', 'hello =20\n')
+
+ def test_encode_one_word_trailing_tab(self):
+ self._test_encode('hello \t', 'hello =09')
+
+ def test_encode_one_line_trailing_tab(self):
+ self._test_encode('hello \t\n', 'hello =09\n')
+
+ def test_encode_trailing_space_before_maxlinelen(self):
+ self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
+
+ def test_encode_trailing_space_at_maxlinelen(self):
+ self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
+
+ def test_encode_trailing_space_beyond_maxlinelen(self):
+ self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
+
+ def test_encode_whitespace_lines(self):
+ self._test_encode(' \n' * 5, '=20\n' * 5)
+
+ def test_encode_quoted_equals(self):
+ self._test_encode('a = b', 'a =3D b')
+
+ def test_encode_one_long_string(self):
+ self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
+
+ def test_encode_one_long_line(self):
+ self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
+
+ def test_encode_one_very_long_line(self):
+ self._test_encode('x' * 200 + '\n',
+ 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
+
+ def test_encode_one_long_line(self):
+ self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
+
+ def test_encode_shortest_maxlinelen(self):
+ self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
+
+ def test_encode_maxlinelen_too_small(self):
+ self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
def test_encode(self):
eq = self.assertEqual
- eq(quopriMIME.encode(''), '')
- eq(quopriMIME.encode('hello'), 'hello')
+ eq(quoprimime.body_encode(''), '')
+ eq(quoprimime.body_encode('hello'), 'hello')
# Test the binary flag
- eq(quopriMIME.encode('hello\r\nworld'), 'hello\nworld')
- eq(quopriMIME.encode('hello\r\nworld', 0), 'hello\nworld')
+ eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
# Test the maxlinelen arg
- eq(quopriMIME.encode('xxxx ' * 20, maxlinelen=40), """\
+ eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
x xxxx xxxx xxxx xxxx=20""")
# Test the eol argument
- eq(quopriMIME.encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
+ eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
+ """\
xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
x xxxx xxxx xxxx xxxx=20""")
- eq(quopriMIME.encode("""\
+ eq(quoprimime.body_encode("""\
one line
two line"""), """\
@@ -2945,23 +3914,22 @@ two line""")
# Test the Charset class
class TestCharset(unittest.TestCase):
def tearDown(self):
- from email import Charset as CharsetModule
+ from email import charset as CharsetModule
try:
del CharsetModule.CHARSETS['fake']
except KeyError:
pass
- def test_idempotent(self):
+ def test_codec_encodeable(self):
eq = self.assertEqual
# Make sure us-ascii = no Unicode conversion
c = Charset('us-ascii')
- s = 'Hello World!'
- sp = c.to_splittable(s)
- eq(s, c.from_splittable(sp))
- # test 8-bit idempotency with us-ascii
+ eq(c.header_encode('Hello World!'), 'Hello World!')
+ # Test 8-bit idempotency with us-ascii
s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
- sp = c.to_splittable(s)
- eq(s, c.from_splittable(sp))
+ self.assertRaises(UnicodeError, c.header_encode, s)
+ c = Charset('utf-8')
+ eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
def test_body_encode(self):
eq = self.assertEqual
@@ -2970,37 +3938,35 @@ class TestCharset(unittest.TestCase):
eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
# Try a charset with Base64 body encoding
c = Charset('utf-8')
- eq('aGVsbG8gd29ybGQ=\n', c.body_encode('hello world'))
+ eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
# Try a charset with None body encoding
c = Charset('us-ascii')
eq('hello world', c.body_encode('hello world'))
# Try the convert argument, where input codec != output codec
c = Charset('euc-jp')
# With apologies to Tokio Kikuchi ;)
- try:
- eq('\x1b$B5FCO;~IW\x1b(B',
- c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
- eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
- c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
- except LookupError:
- # We probably don't have the Japanese codecs installed
- pass
+ # XXX FIXME
+## try:
+## eq('\x1b$B5FCO;~IW\x1b(B',
+## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
+## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
+## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
+## except LookupError:
+## # We probably don't have the Japanese codecs installed
+## pass
# Testing SF bug #625509, which we have to fake, since there are no
# built-in encodings where the header encoding is QP but the body
# encoding is not.
- from email import Charset as CharsetModule
- CharsetModule.add_charset('fake', CharsetModule.QP, None)
+ from email import charset as CharsetModule
+ CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
c = Charset('fake')
- eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
+ eq('hello world', c.body_encode('hello world'))
def test_unicode_charset_name(self):
- charset = Charset(u'us-ascii')
+ charset = Charset('us-ascii')
self.assertEqual(str(charset), 'us-ascii')
- self.assertRaises(Errors.CharsetError, Charset, 'asc\xffii')
+ self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
- def test_codecs_aliases_accepted(self):
- charset = Charset('utf8')
- self.assertEqual(str(charset), 'utf-8')
# Test multilingual MIME headers.
@@ -3034,70 +4000,216 @@ class TestHeader(TestEmailBase):
g = Charset("iso-8859-1")
cz = Charset("iso-8859-2")
utf8 = Charset("utf-8")
- g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
- cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
- utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
+ g_head = (b'Die Mieter treten hier ein werden mit einem '
+ b'Foerderband komfortabel den Korridor entlang, '
+ b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
+ b'gegen die rotierenden Klingen bef\xf6rdert. ')
+ cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
+ b'd\xf9vtipu.. ')
+ utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
+ '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
+ '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
+ '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
+ '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
+ 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
+ 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
+ '\u3044\u307e\u3059\u3002')
h = Header(g_head, g)
h.append(cz_head, cz)
h.append(utf8_head, utf8)
- enc = h.encode()
+ enc = h.encode(maxlinelen=76)
eq(enc, """\
-=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_ko?=
- =?iso-8859-1?q?mfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wan?=
- =?iso-8859-1?q?dgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6?=
- =?iso-8859-1?q?rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
+=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
+ =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
+ =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
+ =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
=?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
=?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
=?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
=?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
- =?utf-8?q?_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das_Oder_die_Fl?=
- =?utf-8?b?aXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBo+OBpuOBhOOBvuOBmQ==?=
- =?utf-8?b?44CC?=""")
- eq(decode_header(enc),
- [(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"),
- (utf8_head, "utf-8")])
- ustr = unicode(h)
- eq(ustr.encode('utf-8'),
- 'Die Mieter treten hier ein werden mit einem Foerderband '
- 'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
- 'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
- 'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
- 'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
- '\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
- '\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
- '\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
- '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
- '\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
- '\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
- '\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
- '\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
- 'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
- 'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
- '\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82')
+ =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
+ =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
+ =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
+ decoded = decode_header(enc)
+ eq(len(decoded), 3)
+ eq(decoded[0], (g_head, 'iso-8859-1'))
+ eq(decoded[1], (cz_head, 'iso-8859-2'))
+ eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
+ ustr = str(h)
+ eq(ustr,
+ (b'Die Mieter treten hier ein werden mit einem Foerderband '
+ b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
+ b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
+ b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
+ b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
+ b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
+ b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
+ b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
+ b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
+ b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
+ b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
+ b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
+ b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
+ b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
+ b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
+ b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
+ ).decode('utf-8'))
# Test make_header()
newh = make_header(decode_header(enc))
- eq(newh, enc)
+ eq(newh, h)
+
+ def test_empty_header_encode(self):
+ h = Header()
+ self.assertEqual(h.encode(), '')
def test_header_ctor_default_args(self):
eq = self.ndiffAssertEqual
h = Header()
eq(h, '')
h.append('foo', Charset('iso-8859-1'))
- eq(h, '=?iso-8859-1?q?foo?=')
+ eq(h, 'foo')
def test_explicit_maxlinelen(self):
eq = self.ndiffAssertEqual
- hstr = 'A very long line that must get split to something other than at the 76th character boundary to test the non-default behavior'
+ hstr = ('A very long line that must get split to something other '
+ 'than at the 76th character boundary to test the non-default '
+ 'behavior')
h = Header(hstr)
eq(h.encode(), '''\
A very long line that must get split to something other than at the 76th
character boundary to test the non-default behavior''')
+ eq(str(h), hstr)
h = Header(hstr, header_name='Subject')
eq(h.encode(), '''\
A very long line that must get split to something other than at the
76th character boundary to test the non-default behavior''')
+ eq(str(h), hstr)
h = Header(hstr, maxlinelen=1024, header_name='Subject')
eq(h.encode(), hstr)
+ eq(str(h), hstr)
+
+ def test_quopri_splittable(self):
+ eq = self.ndiffAssertEqual
+ h = Header(charset='iso-8859-1', maxlinelen=20)
+ x = 'xxxx ' * 20
+ h.append(x)
+ s = h.encode()
+ eq(s, """\
+=?iso-8859-1?q?xxx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_?=""")
+ eq(x, str(make_header(decode_header(s))))
+ h = Header(charset='iso-8859-1', maxlinelen=40)
+ h.append('xxxx ' * 20)
+ s = h.encode()
+ eq(s, """\
+=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
+ =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
+ =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
+ =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
+ =?iso-8859-1?q?_xxxx_xxxx_?=""")
+ eq(x, str(make_header(decode_header(s))))
+
+ def test_base64_splittable(self):
+ eq = self.ndiffAssertEqual
+ h = Header(charset='koi8-r', maxlinelen=20)
+ x = 'xxxx ' * 20
+ h.append(x)
+ s = h.encode()
+ eq(s, """\
+=?koi8-r?b?eHh4?=
+ =?koi8-r?b?eCB4?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?IHh4?=
+ =?koi8-r?b?eHgg?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?eCB4?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?IHh4?=
+ =?koi8-r?b?eHgg?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?eCB4?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?IHh4?=
+ =?koi8-r?b?eHgg?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?eCB4?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?IHh4?=
+ =?koi8-r?b?eHgg?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?eCB4?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?IHh4?=
+ =?koi8-r?b?eHgg?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?eCB4?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?IHh4?=
+ =?koi8-r?b?eHgg?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?eCB4?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?IA==?=""")
+ eq(x, str(make_header(decode_header(s))))
+ h = Header(charset='koi8-r', maxlinelen=40)
+ h.append(x)
+ s = h.encode()
+ eq(s, """\
+=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
+ =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
+ =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
+ =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
+ =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
+ =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
+ eq(x, str(make_header(decode_header(s))))
def test_us_ascii_header(self):
eq = self.assertEqual
@@ -3111,7 +4223,7 @@ A very long line that must get split to something other than at the
eq = self.assertEqual
h = Header()
h.append('hello', 'iso-8859-1')
- eq(h, '=?iso-8859-1?q?hello?=')
+ eq(h, 'hello')
## def test_unicode_error(self):
## raises = self.assertRaises
@@ -3124,21 +4236,51 @@ A very long line that must get split to something other than at the
def test_utf8_shortest(self):
eq = self.assertEqual
- h = Header(u'p\xf6stal', 'utf-8')
+ h = Header('p\xf6stal', 'utf-8')
eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
- h = Header(u'\u83ca\u5730\u6642\u592b', 'utf-8')
+ h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
def test_bad_8bit_header(self):
raises = self.assertRaises
eq = self.assertEqual
- x = 'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
+ x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
raises(UnicodeError, Header, x)
h = Header()
raises(UnicodeError, h.append, x)
- eq(str(Header(x, errors='replace')), x)
+ e = x.decode('utf-8', 'replace')
+ eq(str(Header(x, errors='replace')), e)
h.append(x, errors='replace')
- eq(str(h), x)
+ eq(str(h), e)
+
+ def test_escaped_8bit_header(self):
+ x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
+ e = x.decode('ascii', 'surrogateescape')
+ h = Header(e, charset=email.charset.UNKNOWN8BIT)
+ self.assertEqual(str(h),
+ 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
+ self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
+
+ def test_header_handles_binary_unknown8bit(self):
+ x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
+ h = Header(x, charset=email.charset.UNKNOWN8BIT)
+ self.assertEqual(str(h),
+ 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
+ self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
+
+ def test_make_header_handles_binary_unknown8bit(self):
+ x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
+ h = Header(x, charset=email.charset.UNKNOWN8BIT)
+ h2 = email.header.make_header(email.header.decode_header(h))
+ self.assertEqual(str(h2),
+ 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
+ self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
+
+ def test_modify_returned_list_does_not_change_header(self):
+ h = Header('test')
+ chunks = email.header.decode_header(h)
+ chunks.append(('ascii', 'test2'))
+ self.assertEqual(str(h), 'test')
def test_encoded_adjacent_nonencoded(self):
eq = self.assertEqual
@@ -3154,7 +4296,7 @@ A very long line that must get split to something other than at the
eq = self.assertEqual
s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
parts = decode_header(s)
- eq(parts, [('Subject:', None), ('\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), ('zz.', None)])
+ eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
hdr = make_header(parts)
eq(hdr.encode(),
'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
@@ -3162,30 +4304,24 @@ A very long line that must get split to something other than at the
def test_broken_base64_header(self):
raises = self.assertRaises
s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
- raises(Errors.HeaderParseError, decode_header, s)
+ raises(errors.HeaderParseError, decode_header, s)
- # Issue 1078919
- def test_ascii_add_header(self):
- msg = Message()
- msg.add_header('Content-Disposition', 'attachment',
- filename='bud.gif')
- self.assertEqual('attachment; filename="bud.gif"',
- msg['Content-Disposition'])
+ def test_shift_jis_charset(self):
+ h = Header('æ–‡', charset='shift_jis')
+ self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
- def test_nonascii_add_header_via_triple(self):
+ def test_flatten_header_with_no_value(self):
+ # Issue 11401 (regression from email 4.x) Note that the space after
+ # the header doesn't reflect the input, but this is also the way
+ # email 4.x behaved. At some point it would be nice to fix that.
+ msg = email.message_from_string("EmptyHeader:")
+ self.assertEqual(str(msg), "EmptyHeader: \n\n")
+
+ def test_encode_preserves_leading_ws_on_value(self):
msg = Message()
- msg.add_header('Content-Disposition', 'attachment',
- filename=('iso-8859-1', '', 'Fu\xdfballer.ppt'))
- self.assertEqual(
- 'attachment; filename*="iso-8859-1\'\'Fu%DFballer.ppt"',
- msg['Content-Disposition'])
+ msg['SomeHeader'] = ' value with leading ws'
+ self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
- def test_encode_unaliased_charset(self):
- # Issue 1379416: when the charset has no output conversion,
- # output was accidentally getting coerced to unicode.
- res = Header('abc','iso-8859-2').encode()
- self.assertEqual(res, '=?iso-8859-2?q?abc?=')
- self.assertIsInstance(res, str)
# Test RFC 2231 header parameters (en/de)coding
@@ -3199,7 +4335,7 @@ class TestRFC2231(TestEmailBase):
('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
def test_set_param(self):
- eq = self.assertEqual
+ eq = self.ndiffAssertEqual
msg = Message()
msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
charset='us-ascii')
@@ -3212,11 +4348,11 @@ class TestRFC2231(TestEmailBase):
msg = self._msgobj('msg_01.txt')
msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
charset='us-ascii', language='en')
- self.ndiffAssertEqual(msg.as_string(), """\
+ eq(msg.as_string(maxheaderlen=78), """\
Return-Path: <bbb@zzz.org>
Delivered-To: bbb@zzz.org
Received: by mail.zzz.org (Postfix, from userid 889)
- id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
+\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
@@ -3225,7 +4361,7 @@ To: bbb@zzz.org
Subject: This is a test message
Date: Fri, 4 May 2001 14:05:44 -0400
Content-Type: text/plain; charset=us-ascii;
- title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
+ title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Hi,
@@ -3235,6 +4371,16 @@ Do you like this message?
-Me
""")
+ def test_set_param_requote(self):
+ msg = Message()
+ msg.set_param('title', 'foo')
+ self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
+ msg.set_param('title', 'bar', requote=False)
+ self.assertEqual(msg['content-type'], 'text/plain; title=bar')
+ # tspecial is still quoted.
+ msg.set_param('title', "(bar)bell", requote=False)
+ self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
+
def test_del_param(self):
eq = self.ndiffAssertEqual
msg = self._msgobj('msg_01.txt')
@@ -3242,11 +4388,11 @@ Do you like this message?
msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
charset='us-ascii', language='en')
msg.del_param('foo', header='Content-Type')
- eq(msg.as_string(), """\
+ eq(msg.as_string(maxheaderlen=78), """\
Return-Path: <bbb@zzz.org>
Delivered-To: bbb@zzz.org
Received: by mail.zzz.org (Postfix, from userid 889)
- id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
+\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
@@ -3255,7 +4401,7 @@ To: bbb@zzz.org
Subject: This is a test message
Date: Fri, 4 May 2001 14:05:44 -0400
Content-Type: text/plain; charset="us-ascii";
- title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
+ title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Hi,
@@ -3270,6 +4416,32 @@ Do you like this message?
msg = self._msgobj('msg_32.txt')
eq(msg.get_content_charset(), 'us-ascii')
+ def test_rfc2231_parse_rfc_quoting(self):
+ m = textwrap.dedent('''\
+ Content-Disposition: inline;
+ \tfilename*0*=''This%20is%20even%20more%20;
+ \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
+ \tfilename*2="is it not.pdf"
+
+ ''')
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_filename(),
+ 'This is even more ***fun*** is it not.pdf')
+ self.assertEqual(m, msg.as_string())
+
+ def test_rfc2231_parse_extra_quoting(self):
+ m = textwrap.dedent('''\
+ Content-Disposition: inline;
+ \tfilename*0*="''This%20is%20even%20more%20";
+ \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
+ \tfilename*2="is it not.pdf"
+
+ ''')
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_filename(),
+ 'This is even more ***fun*** is it not.pdf')
+ self.assertEqual(m, msg.as_string())
+
def test_rfc2231_no_language_or_charset(self):
m = '''\
Content-Transfer-Encoding: 8bit
@@ -3401,7 +4573,7 @@ Content-Disposition: inline;
'''
msg = email.message_from_string(m)
self.assertEqual(msg.get_filename(),
- u'This is even more ***fun*** is it not.pdf\ufffd')
+ 'This is even more ***fun*** is it not.pdf\ufffd')
def test_rfc2231_unknown_encoding(self):
m = """\
@@ -3506,12 +4678,9 @@ Content-Type: application/x-foo;
class TestSigned(TestEmailBase):
def _msg_and_obj(self, filename):
- fp = openfile(findfile(filename))
- try:
+ with openfile(findfile(filename)) as fp:
original = fp.read()
msg = email.message_from_string(original)
- finally:
- fp.close()
return original, msg
def _signed_parts_eq(self, original, result):
@@ -3527,6 +4696,11 @@ class TestSigned(TestEmailBase):
result = msg.as_string()
self._signed_parts_eq(original, result)
+ def test_long_headers_as_string_maxheaderlen(self):
+ original, msg = self._msg_and_obj('msg_45.txt')
+ result = msg.as_string(maxheaderlen=60)
+ self._signed_parts_eq(original, result)
+
def test_long_headers_flatten(self):
original, msg = self._msg_and_obj('msg_45.txt')
fp = StringIO()
diff --git a/Lib/email/test/test_email_codecs.py b/Lib/email/test/test_email_codecs.py
index 532750a..ca85f57 100644
--- a/Lib/email/test/test_email_codecs.py
+++ b/Lib/email/test/test_email_codecs.py
@@ -3,7 +3,7 @@
# email package unit tests for (optional) Asian codecs
import unittest
-from test.test_support import run_unittest
+from test.support import run_unittest
from email.test.test_email import TestEmailBase
from email.charset import Charset
@@ -13,7 +13,7 @@ from email.message import Message
# We're compatible with Python 2.3, but it doesn't have the built-in Asian
# codecs, so we have to skip all these tests.
try:
- unicode('foo', 'euc-jp')
+ str(b'foo', 'euc-jp')
except LookupError:
raise unittest.SkipTest
@@ -22,11 +22,14 @@ except LookupError:
class TestEmailAsianCodecs(TestEmailBase):
def test_japanese_codecs(self):
eq = self.ndiffAssertEqual
- j = Charset("euc-jp")
- g = Charset("iso-8859-1")
+ jcode = "euc-jp"
+ gcode = "iso-8859-1"
+ j = Charset(jcode)
+ g = Charset(gcode)
h = Header("Hello World!")
- jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa'
- ghello = 'Gr\xfc\xdf Gott!'
+ jhello = str(b'\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc'
+ b'\xa5\xeb\xa5\xc9\xa1\xaa', jcode)
+ ghello = str(b'Gr\xfc\xdf Gott!', gcode)
h.append(jhello, j)
h.append(ghello, g)
# BAW: This used to -- and maybe should -- fold the two iso-8859-1
@@ -36,13 +39,17 @@ class TestEmailAsianCodecs(TestEmailBase):
# encoded word.
eq(h.encode(), """\
Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
- =?iso-8859-1?q?Gr=FC=DF?= =?iso-8859-1?q?_Gott!?=""")
+ =?iso-8859-1?q?Gr=FC=DF_Gott!?=""")
eq(decode_header(h.encode()),
- [('Hello World!', None),
- ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
- ('Gr\xfc\xdf Gott!', 'iso-8859-1')])
- long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9'
- h = Header(long, j, header_name="Subject")
+ [(b'Hello World!', None),
+ (b'\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
+ (b'Gr\xfc\xdf Gott!', gcode)])
+ subject_bytes = (b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5'
+ b'\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2'
+ b'\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3'
+ b'\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9')
+ subject = str(subject_bytes, jcode)
+ h = Header(subject, j, header_name="Subject")
# test a very long header
enc = h.encode()
# TK: splitting point may differ by codec design and/or Header encoding
@@ -50,15 +57,24 @@ Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?=
=?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""")
# TK: full decode comparison
- eq(h.__unicode__().encode('euc-jp'), long)
+ eq(str(h).encode(jcode), subject_bytes)
+
+ def test_payload_encoding_utf8(self):
+ jhello = str(b'\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc'
+ b'\xa5\xeb\xa5\xc9\xa1\xaa', 'euc-jp')
+ msg = Message()
+ msg.set_payload(jhello, 'utf-8')
+ ustr = msg.get_payload(decode=True).decode(msg.get_content_charset())
+ self.assertEqual(jhello, ustr)
def test_payload_encoding(self):
- jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa'
jcode = 'euc-jp'
+ jhello = str(b'\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc'
+ b'\xa5\xeb\xa5\xc9\xa1\xaa', jcode)
msg = Message()
msg.set_payload(jhello, jcode)
- ustr = unicode(msg.get_payload(), msg.get_content_charset())
- self.assertEqual(jhello, ustr.encode(jcode))
+ ustr = msg.get_payload(decode=True).decode(msg.get_content_charset())
+ self.assertEqual(jhello, ustr)
diff --git a/Lib/email/test/test_email_codecs_renamed.py b/Lib/email/test/test_email_codecs_renamed.py
deleted file mode 100644
index 532750a..0000000
--- a/Lib/email/test/test_email_codecs_renamed.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (C) 2002-2006 Python Software Foundation
-# Contact: email-sig@python.org
-# email package unit tests for (optional) Asian codecs
-
-import unittest
-from test.test_support import run_unittest
-
-from email.test.test_email import TestEmailBase
-from email.charset import Charset
-from email.header import Header, decode_header
-from email.message import Message
-
-# We're compatible with Python 2.3, but it doesn't have the built-in Asian
-# codecs, so we have to skip all these tests.
-try:
- unicode('foo', 'euc-jp')
-except LookupError:
- raise unittest.SkipTest
-
-
-
-class TestEmailAsianCodecs(TestEmailBase):
- def test_japanese_codecs(self):
- eq = self.ndiffAssertEqual
- j = Charset("euc-jp")
- g = Charset("iso-8859-1")
- h = Header("Hello World!")
- jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa'
- ghello = 'Gr\xfc\xdf Gott!'
- h.append(jhello, j)
- h.append(ghello, g)
- # BAW: This used to -- and maybe should -- fold the two iso-8859-1
- # chunks into a single encoded word. However it doesn't violate the
- # standard to have them as two encoded chunks and maybe it's
- # reasonable <wink> for each .append() call to result in a separate
- # encoded word.
- eq(h.encode(), """\
-Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
- =?iso-8859-1?q?Gr=FC=DF?= =?iso-8859-1?q?_Gott!?=""")
- eq(decode_header(h.encode()),
- [('Hello World!', None),
- ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
- ('Gr\xfc\xdf Gott!', 'iso-8859-1')])
- long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9'
- h = Header(long, j, header_name="Subject")
- # test a very long header
- enc = h.encode()
- # TK: splitting point may differ by codec design and/or Header encoding
- eq(enc , """\
-=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?=
- =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""")
- # TK: full decode comparison
- eq(h.__unicode__().encode('euc-jp'), long)
-
- def test_payload_encoding(self):
- jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa'
- jcode = 'euc-jp'
- msg = Message()
- msg.set_payload(jhello, jcode)
- ustr = unicode(msg.get_payload(), msg.get_content_charset())
- self.assertEqual(jhello, ustr.encode(jcode))
-
-
-
-def suite():
- suite = unittest.TestSuite()
- suite.addTest(unittest.makeSuite(TestEmailAsianCodecs))
- return suite
-
-
-def test_main():
- run_unittest(TestEmailAsianCodecs)
-
-
-
-if __name__ == '__main__':
- unittest.main(defaultTest='suite')
diff --git a/Lib/email/test/test_email_renamed.py b/Lib/email/test/test_email_renamed.py
deleted file mode 100644
index 497b66b..0000000
--- a/Lib/email/test/test_email_renamed.py
+++ /dev/null
@@ -1,3297 +0,0 @@
-# Copyright (C) 2001-2007 Python Software Foundation
-# Contact: email-sig@python.org
-# email package unit tests
-
-import os
-import sys
-import time
-import base64
-import difflib
-import unittest
-import warnings
-from cStringIO import StringIO
-
-import email
-
-from email.charset import Charset
-from email.header import Header, decode_header, make_header
-from email.parser import Parser, HeaderParser
-from email.generator import Generator, DecodedGenerator
-from email.message import Message
-from email.mime.application import MIMEApplication
-from email.mime.audio import MIMEAudio
-from email.mime.text import MIMEText
-from email.mime.image import MIMEImage
-from email.mime.base import MIMEBase
-from email.mime.message import MIMEMessage
-from email.mime.multipart import MIMEMultipart
-from email import utils
-from email import errors
-from email import encoders
-from email import iterators
-from email import base64mime
-from email import quoprimime
-
-from test.test_support import findfile, run_unittest
-from email.test import __file__ as landmark
-
-
-NL = '\n'
-EMPTYSTRING = ''
-SPACE = ' '
-
-
-
-def openfile(filename, mode='r'):
- path = os.path.join(os.path.dirname(landmark), 'data', filename)
- return open(path, mode)
-
-
-
-# Base test class
-class TestEmailBase(unittest.TestCase):
- def ndiffAssertEqual(self, first, second):
- """Like assertEqual except use ndiff for readable output."""
- if first != second:
- sfirst = str(first)
- ssecond = str(second)
- diff = difflib.ndiff(sfirst.splitlines(), ssecond.splitlines())
- fp = StringIO()
- print >> fp, NL, NL.join(diff)
- raise self.failureException, fp.getvalue()
-
- def _msgobj(self, filename):
- fp = openfile(findfile(filename))
- try:
- msg = email.message_from_file(fp)
- finally:
- fp.close()
- return msg
-
-
-
-# Test various aspects of the Message class's API
-class TestMessageAPI(TestEmailBase):
- def test_get_all(self):
- eq = self.assertEqual
- msg = self._msgobj('msg_20.txt')
- eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
- eq(msg.get_all('xx', 'n/a'), 'n/a')
-
- def test_getset_charset(self):
- eq = self.assertEqual
- msg = Message()
- eq(msg.get_charset(), None)
- charset = Charset('iso-8859-1')
- msg.set_charset(charset)
- eq(msg['mime-version'], '1.0')
- eq(msg.get_content_type(), 'text/plain')
- eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
- eq(msg.get_param('charset'), 'iso-8859-1')
- eq(msg['content-transfer-encoding'], 'quoted-printable')
- eq(msg.get_charset().input_charset, 'iso-8859-1')
- # Remove the charset
- msg.set_charset(None)
- eq(msg.get_charset(), None)
- eq(msg['content-type'], 'text/plain')
- # Try adding a charset when there's already MIME headers present
- msg = Message()
- msg['MIME-Version'] = '2.0'
- msg['Content-Type'] = 'text/x-weird'
- msg['Content-Transfer-Encoding'] = 'quinted-puntable'
- msg.set_charset(charset)
- eq(msg['mime-version'], '2.0')
- eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
- eq(msg['content-transfer-encoding'], 'quinted-puntable')
-
- def test_set_charset_from_string(self):
- eq = self.assertEqual
- msg = Message()
- msg.set_charset('us-ascii')
- eq(msg.get_charset().input_charset, 'us-ascii')
- eq(msg['content-type'], 'text/plain; charset="us-ascii"')
-
- def test_set_payload_with_charset(self):
- msg = Message()
- charset = Charset('iso-8859-1')
- msg.set_payload('This is a string payload', charset)
- self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
-
- def test_get_charsets(self):
- eq = self.assertEqual
-
- msg = self._msgobj('msg_08.txt')
- charsets = msg.get_charsets()
- eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
-
- msg = self._msgobj('msg_09.txt')
- charsets = msg.get_charsets('dingbat')
- eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
- 'koi8-r'])
-
- msg = self._msgobj('msg_12.txt')
- charsets = msg.get_charsets()
- eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
- 'iso-8859-3', 'us-ascii', 'koi8-r'])
-
- def test_get_filename(self):
- eq = self.assertEqual
-
- msg = self._msgobj('msg_04.txt')
- filenames = [p.get_filename() for p in msg.get_payload()]
- eq(filenames, ['msg.txt', 'msg.txt'])
-
- msg = self._msgobj('msg_07.txt')
- subpart = msg.get_payload(1)
- eq(subpart.get_filename(), 'dingusfish.gif')
-
- def test_get_filename_with_name_parameter(self):
- eq = self.assertEqual
-
- msg = self._msgobj('msg_44.txt')
- filenames = [p.get_filename() for p in msg.get_payload()]
- eq(filenames, ['msg.txt', 'msg.txt'])
-
- def test_get_boundary(self):
- eq = self.assertEqual
- msg = self._msgobj('msg_07.txt')
- # No quotes!
- eq(msg.get_boundary(), 'BOUNDARY')
-
- def test_set_boundary(self):
- eq = self.assertEqual
- # This one has no existing boundary parameter, but the Content-Type:
- # header appears fifth.
- msg = self._msgobj('msg_01.txt')
- msg.set_boundary('BOUNDARY')
- header, value = msg.items()[4]
- eq(header.lower(), 'content-type')
- eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
- # This one has a Content-Type: header, with a boundary, stuck in the
- # middle of its headers. Make sure the order is preserved; it should
- # be fifth.
- msg = self._msgobj('msg_04.txt')
- msg.set_boundary('BOUNDARY')
- header, value = msg.items()[4]
- eq(header.lower(), 'content-type')
- eq(value, 'multipart/mixed; boundary="BOUNDARY"')
- # And this one has no Content-Type: header at all.
- msg = self._msgobj('msg_03.txt')
- self.assertRaises(errors.HeaderParseError,
- msg.set_boundary, 'BOUNDARY')
-
- def test_get_decoded_payload(self):
- eq = self.assertEqual
- msg = self._msgobj('msg_10.txt')
- # The outer message is a multipart
- eq(msg.get_payload(decode=True), None)
- # Subpart 1 is 7bit encoded
- eq(msg.get_payload(0).get_payload(decode=True),
- 'This is a 7bit encoded message.\n')
- # Subpart 2 is quopri
- eq(msg.get_payload(1).get_payload(decode=True),
- '\xa1This is a Quoted Printable encoded message!\n')
- # Subpart 3 is base64
- eq(msg.get_payload(2).get_payload(decode=True),
- 'This is a Base64 encoded message.')
- # Subpart 4 is base64 with a trailing newline, which
- # used to be stripped (issue 7143).
- eq(msg.get_payload(3).get_payload(decode=True),
- 'This is a Base64 encoded message.\n')
- # Subpart 5 has no Content-Transfer-Encoding: header.
- eq(msg.get_payload(4).get_payload(decode=True),
- 'This has no Content-Transfer-Encoding: header.\n')
-
- def test_get_decoded_uu_payload(self):
- eq = self.assertEqual
- msg = Message()
- msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
- for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
- msg['content-transfer-encoding'] = cte
- eq(msg.get_payload(decode=True), 'hello world')
- # Now try some bogus data
- msg.set_payload('foo')
- eq(msg.get_payload(decode=True), 'foo')
-
- def test_decoded_generator(self):
- eq = self.assertEqual
- msg = self._msgobj('msg_07.txt')
- fp = openfile('msg_17.txt')
- try:
- text = fp.read()
- finally:
- fp.close()
- s = StringIO()
- g = DecodedGenerator(s)
- g.flatten(msg)
- eq(s.getvalue(), text)
-
- def test__contains__(self):
- msg = Message()
- msg['From'] = 'Me'
- msg['to'] = 'You'
- # Check for case insensitivity
- self.assertTrue('from' in msg)
- self.assertTrue('From' in msg)
- self.assertTrue('FROM' in msg)
- self.assertTrue('to' in msg)
- self.assertTrue('To' in msg)
- self.assertTrue('TO' in msg)
-
- def test_as_string(self):
- eq = self.assertEqual
- msg = self._msgobj('msg_01.txt')
- fp = openfile('msg_01.txt')
- try:
- # BAW 30-Mar-2009 Evil be here. So, the generator is broken with
- # respect to long line breaking. It's also not idempotent when a
- # header from a parsed message is continued with tabs rather than
- # spaces. Before we fixed bug 1974 it was reversedly broken,
- # i.e. headers that were continued with spaces got continued with
- # tabs. For Python 2.x there's really no good fix and in Python
- # 3.x all this stuff is re-written to be right(er). Chris Withers
- # convinced me that using space as the default continuation
- # character is less bad for more applications.
- text = fp.read().replace('\t', ' ')
- finally:
- fp.close()
- self.ndiffAssertEqual(text, msg.as_string())
- fullrepr = str(msg)
- lines = fullrepr.split('\n')
- self.assertTrue(lines[0].startswith('From '))
- eq(text, NL.join(lines[1:]))
-
- def test_bad_param(self):
- msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
- self.assertEqual(msg.get_param('baz'), '')
-
- def test_missing_filename(self):
- msg = email.message_from_string("From: foo\n")
- self.assertEqual(msg.get_filename(), None)
-
- def test_bogus_filename(self):
- msg = email.message_from_string(
- "Content-Disposition: blarg; filename\n")
- self.assertEqual(msg.get_filename(), '')
-
- def test_missing_boundary(self):
- msg = email.message_from_string("From: foo\n")
- self.assertEqual(msg.get_boundary(), None)
-
- def test_get_params(self):
- eq = self.assertEqual
- msg = email.message_from_string(
- 'X-Header: foo=one; bar=two; baz=three\n')
- eq(msg.get_params(header='x-header'),
- [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
- msg = email.message_from_string(
- 'X-Header: foo; bar=one; baz=two\n')
- eq(msg.get_params(header='x-header'),
- [('foo', ''), ('bar', 'one'), ('baz', 'two')])
- eq(msg.get_params(), None)
- msg = email.message_from_string(
- 'X-Header: foo; bar="one"; baz=two\n')
- eq(msg.get_params(header='x-header'),
- [('foo', ''), ('bar', 'one'), ('baz', 'two')])
-
- def test_get_param_liberal(self):
- msg = Message()
- msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
- self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
-
- def test_get_param(self):
- eq = self.assertEqual
- msg = email.message_from_string(
- "X-Header: foo=one; bar=two; baz=three\n")
- eq(msg.get_param('bar', header='x-header'), 'two')
- eq(msg.get_param('quuz', header='x-header'), None)
- eq(msg.get_param('quuz'), None)
- msg = email.message_from_string(
- 'X-Header: foo; bar="one"; baz=two\n')
- eq(msg.get_param('foo', header='x-header'), '')
- eq(msg.get_param('bar', header='x-header'), 'one')
- eq(msg.get_param('baz', header='x-header'), 'two')
- # XXX: We are not RFC-2045 compliant! We cannot parse:
- # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
- # msg.get_param("weird")
- # yet.
-
- def test_get_param_funky_continuation_lines(self):
- msg = self._msgobj('msg_22.txt')
- self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
-
- def test_get_param_with_semis_in_quotes(self):
- msg = email.message_from_string(
- 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
- self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
- self.assertEqual(msg.get_param('name', unquote=False),
- '"Jim&amp;&amp;Jill"')
-
- def test_has_key(self):
- msg = email.message_from_string('Header: exists')
- self.assertTrue(msg.has_key('header'))
- self.assertTrue(msg.has_key('Header'))
- self.assertTrue(msg.has_key('HEADER'))
- self.assertFalse(msg.has_key('headeri'))
-
- def test_set_param(self):
- eq = self.assertEqual
- msg = Message()
- msg.set_param('charset', 'iso-2022-jp')
- eq(msg.get_param('charset'), 'iso-2022-jp')
- msg.set_param('importance', 'high value')
- eq(msg.get_param('importance'), 'high value')
- eq(msg.get_param('importance', unquote=False), '"high value"')
- eq(msg.get_params(), [('text/plain', ''),
- ('charset', 'iso-2022-jp'),
- ('importance', 'high value')])
- eq(msg.get_params(unquote=False), [('text/plain', ''),
- ('charset', '"iso-2022-jp"'),
- ('importance', '"high value"')])
- msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
- eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
-
- def test_del_param(self):
- eq = self.assertEqual
- msg = self._msgobj('msg_05.txt')
- eq(msg.get_params(),
- [('multipart/report', ''), ('report-type', 'delivery-status'),
- ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
- old_val = msg.get_param("report-type")
- msg.del_param("report-type")
- eq(msg.get_params(),
- [('multipart/report', ''),
- ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
- msg.set_param("report-type", old_val)
- eq(msg.get_params(),
- [('multipart/report', ''),
- ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
- ('report-type', old_val)])
-
- def test_del_param_on_other_header(self):
- msg = Message()
- msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
- msg.del_param('filename', 'content-disposition')
- self.assertEqual(msg['content-disposition'], 'attachment')
-
- def test_set_type(self):
- eq = self.assertEqual
- msg = Message()
- self.assertRaises(ValueError, msg.set_type, 'text')
- msg.set_type('text/plain')
- eq(msg['content-type'], 'text/plain')
- msg.set_param('charset', 'us-ascii')
- eq(msg['content-type'], 'text/plain; charset="us-ascii"')
- msg.set_type('text/html')
- eq(msg['content-type'], 'text/html; charset="us-ascii"')
-
- def test_set_type_on_other_header(self):
- msg = Message()
- msg['X-Content-Type'] = 'text/plain'
- msg.set_type('application/octet-stream', 'X-Content-Type')
- self.assertEqual(msg['x-content-type'], 'application/octet-stream')
-
- def test_get_content_type_missing(self):
- msg = Message()
- self.assertEqual(msg.get_content_type(), 'text/plain')
-
- def test_get_content_type_missing_with_default_type(self):
- msg = Message()
- msg.set_default_type('message/rfc822')
- self.assertEqual(msg.get_content_type(), 'message/rfc822')
-
- def test_get_content_type_from_message_implicit(self):
- msg = self._msgobj('msg_30.txt')
- self.assertEqual(msg.get_payload(0).get_content_type(),
- 'message/rfc822')
-
- def test_get_content_type_from_message_explicit(self):
- msg = self._msgobj('msg_28.txt')
- self.assertEqual(msg.get_payload(0).get_content_type(),
- 'message/rfc822')
-
- def test_get_content_type_from_message_text_plain_implicit(self):
- msg = self._msgobj('msg_03.txt')
- self.assertEqual(msg.get_content_type(), 'text/plain')
-
- def test_get_content_type_from_message_text_plain_explicit(self):
- msg = self._msgobj('msg_01.txt')
- self.assertEqual(msg.get_content_type(), 'text/plain')
-
- def test_get_content_maintype_missing(self):
- msg = Message()
- self.assertEqual(msg.get_content_maintype(), 'text')
-
- def test_get_content_maintype_missing_with_default_type(self):
- msg = Message()
- msg.set_default_type('message/rfc822')
- self.assertEqual(msg.get_content_maintype(), 'message')
-
- def test_get_content_maintype_from_message_implicit(self):
- msg = self._msgobj('msg_30.txt')
- self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
-
- def test_get_content_maintype_from_message_explicit(self):
- msg = self._msgobj('msg_28.txt')
- self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
-
- def test_get_content_maintype_from_message_text_plain_implicit(self):
- msg = self._msgobj('msg_03.txt')
- self.assertEqual(msg.get_content_maintype(), 'text')
-
- def test_get_content_maintype_from_message_text_plain_explicit(self):
- msg = self._msgobj('msg_01.txt')
- self.assertEqual(msg.get_content_maintype(), 'text')
-
- def test_get_content_subtype_missing(self):
- msg = Message()
- self.assertEqual(msg.get_content_subtype(), 'plain')
-
- def test_get_content_subtype_missing_with_default_type(self):
- msg = Message()
- msg.set_default_type('message/rfc822')
- self.assertEqual(msg.get_content_subtype(), 'rfc822')
-
- def test_get_content_subtype_from_message_implicit(self):
- msg = self._msgobj('msg_30.txt')
- self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
-
- def test_get_content_subtype_from_message_explicit(self):
- msg = self._msgobj('msg_28.txt')
- self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
-
- def test_get_content_subtype_from_message_text_plain_implicit(self):
- msg = self._msgobj('msg_03.txt')
- self.assertEqual(msg.get_content_subtype(), 'plain')
-
- def test_get_content_subtype_from_message_text_plain_explicit(self):
- msg = self._msgobj('msg_01.txt')
- self.assertEqual(msg.get_content_subtype(), 'plain')
-
- def test_get_content_maintype_error(self):
- msg = Message()
- msg['Content-Type'] = 'no-slash-in-this-string'
- self.assertEqual(msg.get_content_maintype(), 'text')
-
- def test_get_content_subtype_error(self):
- msg = Message()
- msg['Content-Type'] = 'no-slash-in-this-string'
- self.assertEqual(msg.get_content_subtype(), 'plain')
-
- def test_replace_header(self):
- eq = self.assertEqual
- msg = Message()
- msg.add_header('First', 'One')
- msg.add_header('Second', 'Two')
- msg.add_header('Third', 'Three')
- eq(msg.keys(), ['First', 'Second', 'Third'])
- eq(msg.values(), ['One', 'Two', 'Three'])
- msg.replace_header('Second', 'Twenty')
- eq(msg.keys(), ['First', 'Second', 'Third'])
- eq(msg.values(), ['One', 'Twenty', 'Three'])
- msg.add_header('First', 'Eleven')
- msg.replace_header('First', 'One Hundred')
- eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
- eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
- self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
-
- def test_broken_base64_payload(self):
- x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
- msg = Message()
- msg['content-type'] = 'audio/x-midi'
- msg['content-transfer-encoding'] = 'base64'
- msg.set_payload(x)
- self.assertEqual(msg.get_payload(decode=True), x)
-
-
-
-# Test the email.encoders module
-class TestEncoders(unittest.TestCase):
- def test_encode_empty_payload(self):
- eq = self.assertEqual
- msg = Message()
- msg.set_charset('us-ascii')
- eq(msg['content-transfer-encoding'], '7bit')
-
- def test_default_cte(self):
- eq = self.assertEqual
- msg = MIMEText('hello world')
- eq(msg['content-transfer-encoding'], '7bit')
-
- def test_default_cte(self):
- eq = self.assertEqual
- # With no explicit _charset its us-ascii, and all are 7-bit
- msg = MIMEText('hello world')
- eq(msg['content-transfer-encoding'], '7bit')
- # Similar, but with 8-bit data
- msg = MIMEText('hello \xf8 world')
- eq(msg['content-transfer-encoding'], '8bit')
- # And now with a different charset
- msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
- eq(msg['content-transfer-encoding'], 'quoted-printable')
-
-
-
-# Test long header wrapping
-class TestLongHeaders(TestEmailBase):
- def test_split_long_continuation(self):
- eq = self.ndiffAssertEqual
- msg = email.message_from_string("""\
-Subject: bug demonstration
-\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
-\tmore text
-
-test
-""")
- sfp = StringIO()
- g = Generator(sfp)
- g.flatten(msg)
- eq(sfp.getvalue(), """\
-Subject: bug demonstration
- 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
- more text
-
-test
-""")
-
- def test_another_long_almost_unsplittable_header(self):
- eq = self.ndiffAssertEqual
- hstr = """\
-bug demonstration
-\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
-\tmore text"""
- h = Header(hstr, continuation_ws='\t')
- eq(h.encode(), """\
-bug demonstration
-\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
-\tmore text""")
- h = Header(hstr)
- eq(h.encode(), """\
-bug demonstration
- 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
- more text""")
-
- def test_long_nonstring(self):
- eq = self.ndiffAssertEqual
- g = Charset("iso-8859-1")
- cz = Charset("iso-8859-2")
- utf8 = Charset("utf-8")
- g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
- cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
- utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
- h = Header(g_head, g, header_name='Subject')
- h.append(cz_head, cz)
- h.append(utf8_head, utf8)
- msg = Message()
- msg['Subject'] = h
- sfp = StringIO()
- g = Generator(sfp)
- g.flatten(msg)
- eq(sfp.getvalue(), """\
-Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerd?=
- =?iso-8859-1?q?erband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndi?=
- =?iso-8859-1?q?schen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Kling?=
- =?iso-8859-1?q?en_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_met?=
- =?iso-8859-2?q?ropole_se_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
- =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE?=
- =?utf-8?b?44G+44Gb44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB?=
- =?utf-8?b?44GC44Go44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CM?=
- =?utf-8?q?Wenn_ist_das_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das?=
- =?utf-8?b?IE9kZXIgZGllIEZsaXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBow==?=
- =?utf-8?b?44Gm44GE44G+44GZ44CC?=
-
-""")
- eq(h.encode(), """\
-=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerd?=
- =?iso-8859-1?q?erband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndi?=
- =?iso-8859-1?q?schen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Kling?=
- =?iso-8859-1?q?en_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_met?=
- =?iso-8859-2?q?ropole_se_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
- =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE?=
- =?utf-8?b?44G+44Gb44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB?=
- =?utf-8?b?44GC44Go44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CM?=
- =?utf-8?q?Wenn_ist_das_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das?=
- =?utf-8?b?IE9kZXIgZGllIEZsaXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBow==?=
- =?utf-8?b?44Gm44GE44G+44GZ44CC?=""")
-
- def test_long_header_encode(self):
- eq = self.ndiffAssertEqual
- h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
- 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
- header_name='X-Foobar-Spoink-Defrobnit')
- eq(h.encode(), '''\
-wasnipoop; giraffes="very-long-necked-animals";
- spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
-
- def test_long_header_encode_with_tab_continuation(self):
- eq = self.ndiffAssertEqual
- h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
- 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
- header_name='X-Foobar-Spoink-Defrobnit',
- continuation_ws='\t')
- eq(h.encode(), '''\
-wasnipoop; giraffes="very-long-necked-animals";
-\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
-
- def test_header_splitter(self):
- eq = self.ndiffAssertEqual
- msg = MIMEText('')
- # It'd be great if we could use add_header() here, but that doesn't
- # guarantee an order of the parameters.
- msg['X-Foobar-Spoink-Defrobnit'] = (
- 'wasnipoop; giraffes="very-long-necked-animals"; '
- 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
- sfp = StringIO()
- g = Generator(sfp)
- g.flatten(msg)
- eq(sfp.getvalue(), '''\
-Content-Type: text/plain; charset="us-ascii"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
- spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
-
-''')
-
- def test_no_semis_header_splitter(self):
- eq = self.ndiffAssertEqual
- msg = Message()
- msg['From'] = 'test@dom.ain'
- msg['References'] = SPACE.join(['<%d@dom.ain>' % i for i in range(10)])
- msg.set_payload('Test')
- sfp = StringIO()
- g = Generator(sfp)
- g.flatten(msg)
- eq(sfp.getvalue(), """\
-From: test@dom.ain
-References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
- <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
-
-Test""")
-
- def test_no_split_long_header(self):
- eq = self.ndiffAssertEqual
- hstr = 'References: ' + 'x' * 80
- h = Header(hstr, continuation_ws='\t')
- eq(h.encode(), """\
-References: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
-
- def test_splitting_multiple_long_lines(self):
- eq = self.ndiffAssertEqual
- hstr = """\
-from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
-\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
-\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
-"""
- h = Header(hstr, continuation_ws='\t')
- eq(h.encode(), """\
-from babylon.socal-raves.org (localhost [127.0.0.1]);
-\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
-\tfor <mailman-admin@babylon.socal-raves.org>;
-\tSat, 2 Feb 2002 17:00:06 -0800 (PST)
-\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
-\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
-\tfor <mailman-admin@babylon.socal-raves.org>;
-\tSat, 2 Feb 2002 17:00:06 -0800 (PST)
-\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
-\tby babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
-\tfor <mailman-admin@babylon.socal-raves.org>;
-\tSat, 2 Feb 2002 17:00:06 -0800 (PST)""")
-
- def test_splitting_first_line_only_is_long(self):
- eq = self.ndiffAssertEqual
- hstr = """\
-from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
-\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
-\tid 17k4h5-00034i-00
-\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
- h = Header(hstr, maxlinelen=78, header_name='Received',
- continuation_ws='\t')
- eq(h.encode(), """\
-from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
-\thelo=cthulhu.gerg.ca)
-\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
-\tid 17k4h5-00034i-00
-\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
-
- def test_long_8bit_header(self):
- eq = self.ndiffAssertEqual
- msg = Message()
- h = Header('Britische Regierung gibt', 'iso-8859-1',
- header_name='Subject')
- h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
- msg['Subject'] = h
- eq(msg.as_string(), """\
-Subject: =?iso-8859-1?q?Britische_Regierung_gibt?= =?iso-8859-1?q?gr=FCnes?=
- =?iso-8859-1?q?_Licht_f=FCr_Offshore-Windkraftprojekte?=
-
-""")
-
- def test_long_8bit_header_no_charset(self):
- eq = self.ndiffAssertEqual
- msg = Message()
- msg['Reply-To'] = 'Britische Regierung gibt gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte <a-very-long-address@example.com>'
- eq(msg.as_string(), """\
-Reply-To: Britische Regierung gibt gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte <a-very-long-address@example.com>
-
-""")
-
- def test_long_to_header(self):
- eq = self.ndiffAssertEqual
- to = '"Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,"Someone Test #B" <someone@umich.edu>, "Someone Test #C" <someone@eecs.umich.edu>, "Someone Test #D" <someone@eecs.umich.edu>'
- msg = Message()
- msg['To'] = to
- eq(msg.as_string(0), '''\
-To: "Someone Test #A" <someone@eecs.umich.edu>, <someone@eecs.umich.edu>,
- "Someone Test #B" <someone@umich.edu>,
- "Someone Test #C" <someone@eecs.umich.edu>,
- "Someone Test #D" <someone@eecs.umich.edu>
-
-''')
-
- def test_long_line_after_append(self):
- eq = self.ndiffAssertEqual
- s = 'This is an example of string which has almost the limit of header length.'
- h = Header(s)
- h.append('Add another line.')
- eq(h.encode(), """\
-This is an example of string which has almost the limit of header length.
- Add another line.""")
-
- def test_shorter_line_with_append(self):
- eq = self.ndiffAssertEqual
- s = 'This is a shorter line.'
- h = Header(s)
- h.append('Add another sentence. (Surprise?)')
- eq(h.encode(),
- 'This is a shorter line. Add another sentence. (Surprise?)')
-
- def test_long_field_name(self):
- eq = self.ndiffAssertEqual
- fn = 'X-Very-Very-Very-Long-Header-Name'
- gs = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
- h = Header(gs, 'iso-8859-1', header_name=fn)
- # BAW: this seems broken because the first line is too long
- eq(h.encode(), """\
-=?iso-8859-1?q?Die_Mieter_treten_hier_?=
- =?iso-8859-1?q?ein_werden_mit_einem_Foerderband_komfortabel_den_Korridor_?=
- =?iso-8859-1?q?entlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_g?=
- =?iso-8859-1?q?egen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
-
- def test_long_received_header(self):
- h = 'from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; Wed, 05 Mar 2003 18:10:18 -0700'
- msg = Message()
- msg['Received-1'] = Header(h, continuation_ws='\t')
- msg['Received-2'] = h
- self.ndiffAssertEqual(msg.as_string(), """\
-Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
-\throthgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
-\tWed, 05 Mar 2003 18:10:18 -0700
-Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
- hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
- Wed, 05 Mar 2003 18:10:18 -0700
-
-""")
-
- def test_string_headerinst_eq(self):
- h = '<15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner\'s message of "Thu, 6 Mar 2003 13:58:21 +0100")'
- msg = Message()
- msg['Received'] = Header(h, header_name='Received-1',
- continuation_ws='\t')
- msg['Received'] = h
- self.ndiffAssertEqual(msg.as_string(), """\
-Received: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de>
-\t(David Bremner's message of "Thu, 6 Mar 2003 13:58:21 +0100")
-Received: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de>
- (David Bremner's message of "Thu, 6 Mar 2003 13:58:21 +0100")
-
-""")
-
- def test_long_unbreakable_lines_with_continuation(self):
- eq = self.ndiffAssertEqual
- msg = Message()
- t = """\
- iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
- locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
- msg['Face-1'] = t
- msg['Face-2'] = Header(t, header_name='Face-2')
- eq(msg.as_string(), """\
-Face-1: iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
- locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
-Face-2: iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
- locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
-
-""")
-
- def test_another_long_multiline_header(self):
- eq = self.ndiffAssertEqual
- m = '''\
-Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
- Wed, 16 Oct 2002 07:41:11 -0700'''
- msg = email.message_from_string(m)
- eq(msg.as_string(), '''\
-Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
- Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
-
-''')
-
- def test_long_lines_with_different_header(self):
- eq = self.ndiffAssertEqual
- h = """\
-List-Unsubscribe: <https://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
- <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>"""
- msg = Message()
- msg['List'] = h
- msg['List'] = Header(h, header_name='List')
- self.ndiffAssertEqual(msg.as_string(), """\
-List: List-Unsubscribe: <https://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
- <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
-List: List-Unsubscribe: <https://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
- <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
-
-""")
-
-
-
-# Test mangling of "From " lines in the body of a message
-class TestFromMangling(unittest.TestCase):
- def setUp(self):
- self.msg = Message()
- self.msg['From'] = 'aaa@bbb.org'
- self.msg.set_payload("""\
-From the desk of A.A.A.:
-Blah blah blah
-""")
-
- def test_mangled_from(self):
- s = StringIO()
- g = Generator(s, mangle_from_=True)
- g.flatten(self.msg)
- self.assertEqual(s.getvalue(), """\
-From: aaa@bbb.org
-
->From the desk of A.A.A.:
-Blah blah blah
-""")
-
- def test_dont_mangle_from(self):
- s = StringIO()
- g = Generator(s, mangle_from_=False)
- g.flatten(self.msg)
- self.assertEqual(s.getvalue(), """\
-From: aaa@bbb.org
-
-From the desk of A.A.A.:
-Blah blah blah
-""")
-
-
-
-# Test the basic MIMEAudio class
-class TestMIMEAudio(unittest.TestCase):
- def setUp(self):
- # Make sure we pick up the audiotest.au that lives in email/test/data.
- # In Python, there's an audiotest.au living in Lib/test but that isn't
- # included in some binary distros that don't include the test
- # package. The trailing empty string on the .join() is significant
- # since findfile() will do a dirname().
- datadir = os.path.join(os.path.dirname(landmark), 'data', '')
- fp = open(findfile('audiotest.au', datadir), 'rb')
- try:
- self._audiodata = fp.read()
- finally:
- fp.close()
- self._au = MIMEAudio(self._audiodata)
-
- def test_guess_minor_type(self):
- self.assertEqual(self._au.get_content_type(), 'audio/basic')
-
- def test_encoding(self):
- payload = self._au.get_payload()
- self.assertEqual(base64.decodestring(payload), self._audiodata)
-
- def test_checkSetMinor(self):
- au = MIMEAudio(self._audiodata, 'fish')
- self.assertEqual(au.get_content_type(), 'audio/fish')
-
- def test_add_header(self):
- eq = self.assertEqual
- unless = self.assertTrue
- self._au.add_header('Content-Disposition', 'attachment',
- filename='audiotest.au')
- eq(self._au['content-disposition'],
- 'attachment; filename="audiotest.au"')
- eq(self._au.get_params(header='content-disposition'),
- [('attachment', ''), ('filename', 'audiotest.au')])
- eq(self._au.get_param('filename', header='content-disposition'),
- 'audiotest.au')
- missing = []
- eq(self._au.get_param('attachment', header='content-disposition'), '')
- unless(self._au.get_param('foo', failobj=missing,
- header='content-disposition') is missing)
- # Try some missing stuff
- unless(self._au.get_param('foobar', missing) is missing)
- unless(self._au.get_param('attachment', missing,
- header='foobar') is missing)
-
-
-
-# Test the basic MIMEImage class
-class TestMIMEImage(unittest.TestCase):
- def setUp(self):
- fp = openfile('PyBanner048.gif')
- try:
- self._imgdata = fp.read()
- finally:
- fp.close()
- self._im = MIMEImage(self._imgdata)
-
- def test_guess_minor_type(self):
- self.assertEqual(self._im.get_content_type(), 'image/gif')
-
- def test_encoding(self):
- payload = self._im.get_payload()
- self.assertEqual(base64.decodestring(payload), self._imgdata)
-
- def test_checkSetMinor(self):
- im = MIMEImage(self._imgdata, 'fish')
- self.assertEqual(im.get_content_type(), 'image/fish')
-
- def test_add_header(self):
- eq = self.assertEqual
- unless = self.assertTrue
- self._im.add_header('Content-Disposition', 'attachment',
- filename='dingusfish.gif')
- eq(self._im['content-disposition'],
- 'attachment; filename="dingusfish.gif"')
- eq(self._im.get_params(header='content-disposition'),
- [('attachment', ''), ('filename', 'dingusfish.gif')])
- eq(self._im.get_param('filename', header='content-disposition'),
- 'dingusfish.gif')
- missing = []
- eq(self._im.get_param('attachment', header='content-disposition'), '')
- unless(self._im.get_param('foo', failobj=missing,
- header='content-disposition') is missing)
- # Try some missing stuff
- unless(self._im.get_param('foobar', missing) is missing)
- unless(self._im.get_param('attachment', missing,
- header='foobar') is missing)
-
-
-
-# Test the basic MIMEApplication class
-class TestMIMEApplication(unittest.TestCase):
- def test_headers(self):
- eq = self.assertEqual
- msg = MIMEApplication('\xfa\xfb\xfc\xfd\xfe\xff')
- eq(msg.get_content_type(), 'application/octet-stream')
- eq(msg['content-transfer-encoding'], 'base64')
-
- def test_body(self):
- eq = self.assertEqual
- bytes = '\xfa\xfb\xfc\xfd\xfe\xff'
- msg = MIMEApplication(bytes)
- eq(msg.get_payload(), '+vv8/f7/')
- eq(msg.get_payload(decode=True), bytes)
-
-
-
-# Test the basic MIMEText class
-class TestMIMEText(unittest.TestCase):
- def setUp(self):
- self._msg = MIMEText('hello there')
-
- def test_types(self):
- eq = self.assertEqual
- unless = self.assertTrue
- eq(self._msg.get_content_type(), 'text/plain')
- eq(self._msg.get_param('charset'), 'us-ascii')
- missing = []
- unless(self._msg.get_param('foobar', missing) is missing)
- unless(self._msg.get_param('charset', missing, header='foobar')
- is missing)
-
- def test_payload(self):
- self.assertEqual(self._msg.get_payload(), 'hello there')
- self.assertTrue(not self._msg.is_multipart())
-
- def test_charset(self):
- eq = self.assertEqual
- msg = MIMEText('hello there', _charset='us-ascii')
- eq(msg.get_charset().input_charset, 'us-ascii')
- eq(msg['content-type'], 'text/plain; charset="us-ascii"')
-
-
-
-# Test complicated multipart/* messages
-class TestMultipart(TestEmailBase):
- def setUp(self):
- fp = openfile('PyBanner048.gif')
- try:
- data = fp.read()
- finally:
- fp.close()
-
- container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
- image = MIMEImage(data, name='dingusfish.gif')
- image.add_header('content-disposition', 'attachment',
- filename='dingusfish.gif')
- intro = MIMEText('''\
-Hi there,
-
-This is the dingus fish.
-''')
- container.attach(intro)
- container.attach(image)
- container['From'] = 'Barry <barry@digicool.com>'
- container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
- container['Subject'] = 'Here is your dingus fish'
-
- now = 987809702.54848599
- timetuple = time.localtime(now)
- if timetuple[-1] == 0:
- tzsecs = time.timezone
- else:
- tzsecs = time.altzone
- if tzsecs > 0:
- sign = '-'
- else:
- sign = '+'
- tzoffset = ' %s%04d' % (sign, tzsecs // 36)
- container['Date'] = time.strftime(
- '%a, %d %b %Y %H:%M:%S',
- time.localtime(now)) + tzoffset
- self._msg = container
- self._im = image
- self._txt = intro
-
- def test_hierarchy(self):
- # convenience
- eq = self.assertEqual
- unless = self.assertTrue
- raises = self.assertRaises
- # tests
- m = self._msg
- unless(m.is_multipart())
- eq(m.get_content_type(), 'multipart/mixed')
- eq(len(m.get_payload()), 2)
- raises(IndexError, m.get_payload, 2)
- m0 = m.get_payload(0)
- m1 = m.get_payload(1)
- unless(m0 is self._txt)
- unless(m1 is self._im)
- eq(m.get_payload(), [m0, m1])
- unless(not m0.is_multipart())
- unless(not m1.is_multipart())
-
- def test_empty_multipart_idempotent(self):
- text = """\
-Content-Type: multipart/mixed; boundary="BOUNDARY"
-MIME-Version: 1.0
-Subject: A subject
-To: aperson@dom.ain
-From: bperson@dom.ain
-
-
---BOUNDARY
-
-
---BOUNDARY--
-"""
- msg = Parser().parsestr(text)
- self.ndiffAssertEqual(text, msg.as_string())
-
- def test_no_parts_in_a_multipart_with_none_epilogue(self):
- outer = MIMEBase('multipart', 'mixed')
- outer['Subject'] = 'A subject'
- outer['To'] = 'aperson@dom.ain'
- outer['From'] = 'bperson@dom.ain'
- outer.set_boundary('BOUNDARY')
- self.ndiffAssertEqual(outer.as_string(), '''\
-Content-Type: multipart/mixed; boundary="BOUNDARY"
-MIME-Version: 1.0
-Subject: A subject
-To: aperson@dom.ain
-From: bperson@dom.ain
-
---BOUNDARY
-
---BOUNDARY--''')
-
- def test_no_parts_in_a_multipart_with_empty_epilogue(self):
- outer = MIMEBase('multipart', 'mixed')
- outer['Subject'] = 'A subject'
- outer['To'] = 'aperson@dom.ain'
- outer['From'] = 'bperson@dom.ain'
- outer.preamble = ''
- outer.epilogue = ''
- outer.set_boundary('BOUNDARY')
- self.ndiffAssertEqual(outer.as_string(), '''\
-Content-Type: multipart/mixed; boundary="BOUNDARY"
-MIME-Version: 1.0
-Subject: A subject
-To: aperson@dom.ain
-From: bperson@dom.ain
-
-
---BOUNDARY
-
---BOUNDARY--
-''')
-
- def test_one_part_in_a_multipart(self):
- eq = self.ndiffAssertEqual
- outer = MIMEBase('multipart', 'mixed')
- outer['Subject'] = 'A subject'
- outer['To'] = 'aperson@dom.ain'
- outer['From'] = 'bperson@dom.ain'
- outer.set_boundary('BOUNDARY')
- msg = MIMEText('hello world')
- outer.attach(msg)
- eq(outer.as_string(), '''\
-Content-Type: multipart/mixed; boundary="BOUNDARY"
-MIME-Version: 1.0
-Subject: A subject
-To: aperson@dom.ain
-From: bperson@dom.ain
-
---BOUNDARY
-Content-Type: text/plain; charset="us-ascii"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-
-hello world
---BOUNDARY--''')
-
- def test_seq_parts_in_a_multipart_with_empty_preamble(self):
- eq = self.ndiffAssertEqual
- outer = MIMEBase('multipart', 'mixed')
- outer['Subject'] = 'A subject'
- outer['To'] = 'aperson@dom.ain'
- outer['From'] = 'bperson@dom.ain'
- outer.preamble = ''
- msg = MIMEText('hello world')
- outer.attach(msg)
- outer.set_boundary('BOUNDARY')
- eq(outer.as_string(), '''\
-Content-Type: multipart/mixed; boundary="BOUNDARY"
-MIME-Version: 1.0
-Subject: A subject
-To: aperson@dom.ain
-From: bperson@dom.ain
-
-
---BOUNDARY
-Content-Type: text/plain; charset="us-ascii"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-
-hello world
---BOUNDARY--''')
-
-
- def test_seq_parts_in_a_multipart_with_none_preamble(self):
- eq = self.ndiffAssertEqual
- outer = MIMEBase('multipart', 'mixed')
- outer['Subject'] = 'A subject'
- outer['To'] = 'aperson@dom.ain'
- outer['From'] = 'bperson@dom.ain'
- outer.preamble = None
- msg = MIMEText('hello world')
- outer.attach(msg)
- outer.set_boundary('BOUNDARY')
- eq(outer.as_string(), '''\
-Content-Type: multipart/mixed; boundary="BOUNDARY"
-MIME-Version: 1.0
-Subject: A subject
-To: aperson@dom.ain
-From: bperson@dom.ain
-
---BOUNDARY
-Content-Type: text/plain; charset="us-ascii"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-
-hello world
---BOUNDARY--''')
-
-
- def test_seq_parts_in_a_multipart_with_none_epilogue(self):
- eq = self.ndiffAssertEqual
- outer = MIMEBase('multipart', 'mixed')
- outer['Subject'] = 'A subject'
- outer['To'] = 'aperson@dom.ain'
- outer['From'] = 'bperson@dom.ain'
- outer.epilogue = None
- msg = MIMEText('hello world')
- outer.attach(msg)
- outer.set_boundary('BOUNDARY')
- eq(outer.as_string(), '''\
-Content-Type: multipart/mixed; boundary="BOUNDARY"
-MIME-Version: 1.0
-Subject: A subject
-To: aperson@dom.ain
-From: bperson@dom.ain
-
---BOUNDARY
-Content-Type: text/plain; charset="us-ascii"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-
-hello world
---BOUNDARY--''')
-
-
- def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
- eq = self.ndiffAssertEqual
- outer = MIMEBase('multipart', 'mixed')
- outer['Subject'] = 'A subject'
- outer['To'] = 'aperson@dom.ain'
- outer['From'] = 'bperson@dom.ain'
- outer.epilogue = ''
- msg = MIMEText('hello world')
- outer.attach(msg)
- outer.set_boundary('BOUNDARY')
- eq(outer.as_string(), '''\
-Content-Type: multipart/mixed; boundary="BOUNDARY"
-MIME-Version: 1.0
-Subject: A subject
-To: aperson@dom.ain
-From: bperson@dom.ain
-
---BOUNDARY
-Content-Type: text/plain; charset="us-ascii"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-
-hello world
---BOUNDARY--
-''')
-
-
- def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
- eq = self.ndiffAssertEqual
- outer = MIMEBase('multipart', 'mixed')
- outer['Subject'] = 'A subject'
- outer['To'] = 'aperson@dom.ain'
- outer['From'] = 'bperson@dom.ain'
- outer.epilogue = '\n'
- msg = MIMEText('hello world')
- outer.attach(msg)
- outer.set_boundary('BOUNDARY')
- eq(outer.as_string(), '''\
-Content-Type: multipart/mixed; boundary="BOUNDARY"
-MIME-Version: 1.0
-Subject: A subject
-To: aperson@dom.ain
-From: bperson@dom.ain
-
---BOUNDARY
-Content-Type: text/plain; charset="us-ascii"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-
-hello world
---BOUNDARY--
-
-''')
-
- def test_message_external_body(self):
- eq = self.assertEqual
- msg = self._msgobj('msg_36.txt')
- eq(len(msg.get_payload()), 2)
- msg1 = msg.get_payload(1)
- eq(msg1.get_content_type(), 'multipart/alternative')
- eq(len(msg1.get_payload()), 2)
- for subpart in msg1.get_payload():
- eq(subpart.get_content_type(), 'message/external-body')
- eq(len(subpart.get_payload()), 1)
- subsubpart = subpart.get_payload(0)
- eq(subsubpart.get_content_type(), 'text/plain')
-
- def test_double_boundary(self):
- # msg_37.txt is a multipart that contains two dash-boundary's in a
- # row. Our interpretation of RFC 2046 calls for ignoring the second
- # and subsequent boundaries.
- msg = self._msgobj('msg_37.txt')
- self.assertEqual(len(msg.get_payload()), 3)
-
- def test_nested_inner_contains_outer_boundary(self):
- eq = self.ndiffAssertEqual
- # msg_38.txt has an inner part that contains outer boundaries. My
- # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
- # these are illegal and should be interpreted as unterminated inner
- # parts.
- msg = self._msgobj('msg_38.txt')
- sfp = StringIO()
- iterators._structure(msg, sfp)
- eq(sfp.getvalue(), """\
-multipart/mixed
- multipart/mixed
- multipart/alternative
- text/plain
- text/plain
- text/plain
- text/plain
-""")
-
- def test_nested_with_same_boundary(self):
- eq = self.ndiffAssertEqual
- # msg 39.txt is similarly evil in that it's got inner parts that use
- # the same boundary as outer parts. Again, I believe the way this is
- # parsed is closest to the spirit of RFC 2046
- msg = self._msgobj('msg_39.txt')
- sfp = StringIO()
- iterators._structure(msg, sfp)
- eq(sfp.getvalue(), """\
-multipart/mixed
- multipart/mixed
- multipart/alternative
- application/octet-stream
- application/octet-stream
- text/plain
-""")
-
- def test_boundary_in_non_multipart(self):
- msg = self._msgobj('msg_40.txt')
- self.assertEqual(msg.as_string(), '''\
-MIME-Version: 1.0
-Content-Type: text/html; boundary="--961284236552522269"
-
-----961284236552522269
-Content-Type: text/html;
-Content-Transfer-Encoding: 7Bit
-
-<html></html>
-
-----961284236552522269--
-''')
-
- def test_boundary_with_leading_space(self):
- eq = self.assertEqual
- msg = email.message_from_string('''\
-MIME-Version: 1.0
-Content-Type: multipart/mixed; boundary=" XXXX"
-
--- XXXX
-Content-Type: text/plain
-
-
--- XXXX
-Content-Type: text/plain
-
--- XXXX--
-''')
- self.assertTrue(msg.is_multipart())
- eq(msg.get_boundary(), ' XXXX')
- eq(len(msg.get_payload()), 2)
-
- def test_boundary_without_trailing_newline(self):
- m = Parser().parsestr("""\
-Content-Type: multipart/mixed; boundary="===============0012394164=="
-MIME-Version: 1.0
-
---===============0012394164==
-Content-Type: image/file1.jpg
-MIME-Version: 1.0
-Content-Transfer-Encoding: base64
-
-YXNkZg==
---===============0012394164==--""")
- self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
-
-
-
-# Test some badly formatted messages
-class TestNonConformant(TestEmailBase):
- def test_parse_missing_minor_type(self):
- eq = self.assertEqual
- msg = self._msgobj('msg_14.txt')
- eq(msg.get_content_type(), 'text/plain')
- eq(msg.get_content_maintype(), 'text')
- eq(msg.get_content_subtype(), 'plain')
-
- def test_same_boundary_inner_outer(self):
- unless = self.assertTrue
- msg = self._msgobj('msg_15.txt')
- # XXX We can probably eventually do better
- inner = msg.get_payload(0)
- unless(hasattr(inner, 'defects'))
- self.assertEqual(len(inner.defects), 1)
- unless(isinstance(inner.defects[0],
- errors.StartBoundaryNotFoundDefect))
-
- def test_multipart_no_boundary(self):
- unless = self.assertTrue
- msg = self._msgobj('msg_25.txt')
- unless(isinstance(msg.get_payload(), str))
- self.assertEqual(len(msg.defects), 2)
- unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
- unless(isinstance(msg.defects[1],
- errors.MultipartInvariantViolationDefect))
-
- def test_invalid_content_type(self):
- eq = self.assertEqual
- neq = self.ndiffAssertEqual
- msg = Message()
- # RFC 2045, $5.2 says invalid yields text/plain
- msg['Content-Type'] = 'text'
- eq(msg.get_content_maintype(), 'text')
- eq(msg.get_content_subtype(), 'plain')
- eq(msg.get_content_type(), 'text/plain')
- # Clear the old value and try something /really/ invalid
- del msg['content-type']
- msg['Content-Type'] = 'foo'
- eq(msg.get_content_maintype(), 'text')
- eq(msg.get_content_subtype(), 'plain')
- eq(msg.get_content_type(), 'text/plain')
- # Still, make sure that the message is idempotently generated
- s = StringIO()
- g = Generator(s)
- g.flatten(msg)
- neq(s.getvalue(), 'Content-Type: foo\n\n')
-
- def test_no_start_boundary(self):
- eq = self.ndiffAssertEqual
- msg = self._msgobj('msg_31.txt')
- eq(msg.get_payload(), """\
---BOUNDARY
-Content-Type: text/plain
-
-message 1
-
---BOUNDARY
-Content-Type: text/plain
-
-message 2
-
---BOUNDARY--
-""")
-
- def test_no_separating_blank_line(self):
- eq = self.ndiffAssertEqual
- msg = self._msgobj('msg_35.txt')
- eq(msg.as_string(), """\
-From: aperson@dom.ain
-To: bperson@dom.ain
-Subject: here's something interesting
-
-counter to RFC 2822, there's no separating newline here
-""")
-
- def test_lying_multipart(self):
- unless = self.assertTrue
- msg = self._msgobj('msg_41.txt')
- unless(hasattr(msg, 'defects'))
- self.assertEqual(len(msg.defects), 2)
- unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
- unless(isinstance(msg.defects[1],
- errors.MultipartInvariantViolationDefect))
-
- def test_missing_start_boundary(self):
- outer = self._msgobj('msg_42.txt')
- # The message structure is:
- #
- # multipart/mixed
- # text/plain
- # message/rfc822
- # multipart/mixed [*]
- #
- # [*] This message is missing its start boundary
- bad = outer.get_payload(1).get_payload(0)
- self.assertEqual(len(bad.defects), 1)
- self.assertTrue(isinstance(bad.defects[0],
- errors.StartBoundaryNotFoundDefect))
-
- def test_first_line_is_continuation_header(self):
- eq = self.assertEqual
- m = ' Line 1\nLine 2\nLine 3'
- msg = email.message_from_string(m)
- eq(msg.keys(), [])
- eq(msg.get_payload(), 'Line 2\nLine 3')
- eq(len(msg.defects), 1)
- self.assertTrue(isinstance(msg.defects[0],
- errors.FirstHeaderLineIsContinuationDefect))
- eq(msg.defects[0].line, ' Line 1\n')
-
-
-
-# Test RFC 2047 header encoding and decoding
-class TestRFC2047(unittest.TestCase):
- def test_rfc2047_multiline(self):
- eq = self.assertEqual
- s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
- foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
- dh = decode_header(s)
- eq(dh, [
- ('Re:', None),
- ('r\x8aksm\x9arg\x8cs', 'mac-iceland'),
- ('baz foo bar', None),
- ('r\x8aksm\x9arg\x8cs', 'mac-iceland')])
- eq(str(make_header(dh)),
- """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar
- =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""")
-
- def test_whitespace_eater_unicode(self):
- eq = self.assertEqual
- s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
- dh = decode_header(s)
- eq(dh, [('Andr\xe9', 'iso-8859-1'), ('Pirard <pirard@dom.ain>', None)])
- hu = unicode(make_header(dh)).encode('latin-1')
- eq(hu, 'Andr\xe9 Pirard <pirard@dom.ain>')
-
- def test_whitespace_eater_unicode_2(self):
- eq = self.assertEqual
- s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
- dh = decode_header(s)
- eq(dh, [('The', None), ('quick brown fox', 'iso-8859-1'),
- ('jumped over the', None), ('lazy dog', 'iso-8859-1')])
- hu = make_header(dh).__unicode__()
- eq(hu, u'The quick brown fox jumped over the lazy dog')
-
- def test_rfc2047_missing_whitespace(self):
- s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
- dh = decode_header(s)
- self.assertEqual(dh, [(s, None)])
-
- def test_rfc2047_with_whitespace(self):
- s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
- dh = decode_header(s)
- self.assertEqual(dh, [('Sm', None), ('\xf6', 'iso-8859-1'),
- ('rg', None), ('\xe5', 'iso-8859-1'),
- ('sbord', None)])
-
-
-
-# Test the MIMEMessage class
-class TestMIMEMessage(TestEmailBase):
- def setUp(self):
- fp = openfile('msg_11.txt')
- try:
- self._text = fp.read()
- finally:
- fp.close()
-
- def test_type_error(self):
- self.assertRaises(TypeError, MIMEMessage, 'a plain string')
-
- def test_valid_argument(self):
- eq = self.assertEqual
- unless = self.assertTrue
- subject = 'A sub-message'
- m = Message()
- m['Subject'] = subject
- r = MIMEMessage(m)
- eq(r.get_content_type(), 'message/rfc822')
- payload = r.get_payload()
- unless(isinstance(payload, list))
- eq(len(payload), 1)
- subpart = payload[0]
- unless(subpart is m)
- eq(subpart['subject'], subject)
-
- def test_bad_multipart(self):
- eq = self.assertEqual
- msg1 = Message()
- msg1['Subject'] = 'subpart 1'
- msg2 = Message()
- msg2['Subject'] = 'subpart 2'
- r = MIMEMessage(msg1)
- self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
-
- def test_generate(self):
- # First craft the message to be encapsulated
- m = Message()
- m['Subject'] = 'An enclosed message'
- m.set_payload('Here is the body of the message.\n')
- r = MIMEMessage(m)
- r['Subject'] = 'The enclosing message'
- s = StringIO()
- g = Generator(s)
- g.flatten(r)
- self.assertEqual(s.getvalue(), """\
-Content-Type: message/rfc822
-MIME-Version: 1.0
-Subject: The enclosing message
-
-Subject: An enclosed message
-
-Here is the body of the message.
-""")
-
- def test_parse_message_rfc822(self):
- eq = self.assertEqual
- unless = self.assertTrue
- msg = self._msgobj('msg_11.txt')
- eq(msg.get_content_type(), 'message/rfc822')
- payload = msg.get_payload()
- unless(isinstance(payload, list))
- eq(len(payload), 1)
- submsg = payload[0]
- self.assertTrue(isinstance(submsg, Message))
- eq(submsg['subject'], 'An enclosed message')
- eq(submsg.get_payload(), 'Here is the body of the message.\n')
-
- def test_dsn(self):
- eq = self.assertEqual
- unless = self.assertTrue
- # msg 16 is a Delivery Status Notification, see RFC 1894
- msg = self._msgobj('msg_16.txt')
- eq(msg.get_content_type(), 'multipart/report')
- unless(msg.is_multipart())
- eq(len(msg.get_payload()), 3)
- # Subpart 1 is a text/plain, human readable section
- subpart = msg.get_payload(0)
- eq(subpart.get_content_type(), 'text/plain')
- eq(subpart.get_payload(), """\
-This report relates to a message you sent with the following header fields:
-
- Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
- Date: Sun, 23 Sep 2001 20:10:55 -0700
- From: "Ian T. Henry" <henryi@oxy.edu>
- To: SoCal Raves <scr@socal-raves.org>
- Subject: [scr] yeah for Ians!!
-
-Your message cannot be delivered to the following recipients:
-
- Recipient address: jangel1@cougar.noc.ucla.edu
- Reason: recipient reached disk quota
-
-""")
- # Subpart 2 contains the machine parsable DSN information. It
- # consists of two blocks of headers, represented by two nested Message
- # objects.
- subpart = msg.get_payload(1)
- eq(subpart.get_content_type(), 'message/delivery-status')
- eq(len(subpart.get_payload()), 2)
- # message/delivery-status should treat each block as a bunch of
- # headers, i.e. a bunch of Message objects.
- dsn1 = subpart.get_payload(0)
- unless(isinstance(dsn1, Message))
- eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
- eq(dsn1.get_param('dns', header='reporting-mta'), '')
- # Try a missing one <wink>
- eq(dsn1.get_param('nsd', header='reporting-mta'), None)
- dsn2 = subpart.get_payload(1)
- unless(isinstance(dsn2, Message))
- eq(dsn2['action'], 'failed')
- eq(dsn2.get_params(header='original-recipient'),
- [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
- eq(dsn2.get_param('rfc822', header='final-recipient'), '')
- # Subpart 3 is the original message
- subpart = msg.get_payload(2)
- eq(subpart.get_content_type(), 'message/rfc822')
- payload = subpart.get_payload()
- unless(isinstance(payload, list))
- eq(len(payload), 1)
- subsubpart = payload[0]
- unless(isinstance(subsubpart, Message))
- eq(subsubpart.get_content_type(), 'text/plain')
- eq(subsubpart['message-id'],
- '<002001c144a6$8752e060$56104586@oxy.edu>')
-
- def test_epilogue(self):
- eq = self.ndiffAssertEqual
- fp = openfile('msg_21.txt')
- try:
- text = fp.read()
- finally:
- fp.close()
- msg = Message()
- msg['From'] = 'aperson@dom.ain'
- msg['To'] = 'bperson@dom.ain'
- msg['Subject'] = 'Test'
- msg.preamble = 'MIME message'
- msg.epilogue = 'End of MIME message\n'
- msg1 = MIMEText('One')
- msg2 = MIMEText('Two')
- msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
- msg.attach(msg1)
- msg.attach(msg2)
- sfp = StringIO()
- g = Generator(sfp)
- g.flatten(msg)
- eq(sfp.getvalue(), text)
-
- def test_no_nl_preamble(self):
- eq = self.ndiffAssertEqual
- msg = Message()
- msg['From'] = 'aperson@dom.ain'
- msg['To'] = 'bperson@dom.ain'
- msg['Subject'] = 'Test'
- msg.preamble = 'MIME message'
- msg.epilogue = ''
- msg1 = MIMEText('One')
- msg2 = MIMEText('Two')
- msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
- msg.attach(msg1)
- msg.attach(msg2)
- eq(msg.as_string(), """\
-From: aperson@dom.ain
-To: bperson@dom.ain
-Subject: Test
-Content-Type: multipart/mixed; boundary="BOUNDARY"
-
-MIME message
---BOUNDARY
-Content-Type: text/plain; charset="us-ascii"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-
-One
---BOUNDARY
-Content-Type: text/plain; charset="us-ascii"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-
-Two
---BOUNDARY--
-""")
-
- def test_default_type(self):
- eq = self.assertEqual
- fp = openfile('msg_30.txt')
- try:
- msg = email.message_from_file(fp)
- finally:
- fp.close()
- container1 = msg.get_payload(0)
- eq(container1.get_default_type(), 'message/rfc822')
- eq(container1.get_content_type(), 'message/rfc822')
- container2 = msg.get_payload(1)
- eq(container2.get_default_type(), 'message/rfc822')
- eq(container2.get_content_type(), 'message/rfc822')
- container1a = container1.get_payload(0)
- eq(container1a.get_default_type(), 'text/plain')
- eq(container1a.get_content_type(), 'text/plain')
- container2a = container2.get_payload(0)
- eq(container2a.get_default_type(), 'text/plain')
- eq(container2a.get_content_type(), 'text/plain')
-
- def test_default_type_with_explicit_container_type(self):
- eq = self.assertEqual
- fp = openfile('msg_28.txt')
- try:
- msg = email.message_from_file(fp)
- finally:
- fp.close()
- container1 = msg.get_payload(0)
- eq(container1.get_default_type(), 'message/rfc822')
- eq(container1.get_content_type(), 'message/rfc822')
- container2 = msg.get_payload(1)
- eq(container2.get_default_type(), 'message/rfc822')
- eq(container2.get_content_type(), 'message/rfc822')
- container1a = container1.get_payload(0)
- eq(container1a.get_default_type(), 'text/plain')
- eq(container1a.get_content_type(), 'text/plain')
- container2a = container2.get_payload(0)
- eq(container2a.get_default_type(), 'text/plain')
- eq(container2a.get_content_type(), 'text/plain')
-
- def test_default_type_non_parsed(self):
- eq = self.assertEqual
- neq = self.ndiffAssertEqual
- # Set up container
- container = MIMEMultipart('digest', 'BOUNDARY')
- container.epilogue = ''
- # Set up subparts
- subpart1a = MIMEText('message 1\n')
- subpart2a = MIMEText('message 2\n')
- subpart1 = MIMEMessage(subpart1a)
- subpart2 = MIMEMessage(subpart2a)
- container.attach(subpart1)
- container.attach(subpart2)
- eq(subpart1.get_content_type(), 'message/rfc822')
- eq(subpart1.get_default_type(), 'message/rfc822')
- eq(subpart2.get_content_type(), 'message/rfc822')
- eq(subpart2.get_default_type(), 'message/rfc822')
- neq(container.as_string(0), '''\
-Content-Type: multipart/digest; boundary="BOUNDARY"
-MIME-Version: 1.0
-
---BOUNDARY
-Content-Type: message/rfc822
-MIME-Version: 1.0
-
-Content-Type: text/plain; charset="us-ascii"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-
-message 1
-
---BOUNDARY
-Content-Type: message/rfc822
-MIME-Version: 1.0
-
-Content-Type: text/plain; charset="us-ascii"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-
-message 2
-
---BOUNDARY--
-''')
- del subpart1['content-type']
- del subpart1['mime-version']
- del subpart2['content-type']
- del subpart2['mime-version']
- eq(subpart1.get_content_type(), 'message/rfc822')
- eq(subpart1.get_default_type(), 'message/rfc822')
- eq(subpart2.get_content_type(), 'message/rfc822')
- eq(subpart2.get_default_type(), 'message/rfc822')
- neq(container.as_string(0), '''\
-Content-Type: multipart/digest; boundary="BOUNDARY"
-MIME-Version: 1.0
-
---BOUNDARY
-
-Content-Type: text/plain; charset="us-ascii"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-
-message 1
-
---BOUNDARY
-
-Content-Type: text/plain; charset="us-ascii"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-
-message 2
-
---BOUNDARY--
-''')
-
- def test_mime_attachments_in_constructor(self):
- eq = self.assertEqual
- text1 = MIMEText('')
- text2 = MIMEText('')
- msg = MIMEMultipart(_subparts=(text1, text2))
- eq(len(msg.get_payload()), 2)
- eq(msg.get_payload(0), text1)
- eq(msg.get_payload(1), text2)
-
-
-
-# A general test of parser->model->generator idempotency. IOW, read a message
-# in, parse it into a message object tree, then without touching the tree,
-# regenerate the plain text. The original text and the transformed text
-# should be identical. Note: that we ignore the Unix-From since that may
-# contain a changed date.
-class TestIdempotent(TestEmailBase):
- def _msgobj(self, filename):
- fp = openfile(filename)
- try:
- data = fp.read()
- finally:
- fp.close()
- msg = email.message_from_string(data)
- return msg, data
-
- def _idempotent(self, msg, text):
- eq = self.ndiffAssertEqual
- s = StringIO()
- g = Generator(s, maxheaderlen=0)
- g.flatten(msg)
- eq(text, s.getvalue())
-
- def test_parse_text_message(self):
- eq = self.assertEqual
- msg, text = self._msgobj('msg_01.txt')
- eq(msg.get_content_type(), 'text/plain')
- eq(msg.get_content_maintype(), 'text')
- eq(msg.get_content_subtype(), 'plain')
- eq(msg.get_params()[1], ('charset', 'us-ascii'))
- eq(msg.get_param('charset'), 'us-ascii')
- eq(msg.preamble, None)
- eq(msg.epilogue, None)
- self._idempotent(msg, text)
-
- def test_parse_untyped_message(self):
- eq = self.assertEqual
- msg, text = self._msgobj('msg_03.txt')
- eq(msg.get_content_type(), 'text/plain')
- eq(msg.get_params(), None)
- eq(msg.get_param('charset'), None)
- self._idempotent(msg, text)
-
- def test_simple_multipart(self):
- msg, text = self._msgobj('msg_04.txt')
- self._idempotent(msg, text)
-
- def test_MIME_digest(self):
- msg, text = self._msgobj('msg_02.txt')
- self._idempotent(msg, text)
-
- def test_long_header(self):
- msg, text = self._msgobj('msg_27.txt')
- self._idempotent(msg, text)
-
- def test_MIME_digest_with_part_headers(self):
- msg, text = self._msgobj('msg_28.txt')
- self._idempotent(msg, text)
-
- def test_mixed_with_image(self):
- msg, text = self._msgobj('msg_06.txt')
- self._idempotent(msg, text)
-
- def test_multipart_report(self):
- msg, text = self._msgobj('msg_05.txt')
- self._idempotent(msg, text)
-
- def test_dsn(self):
- msg, text = self._msgobj('msg_16.txt')
- self._idempotent(msg, text)
-
- def test_preamble_epilogue(self):
- msg, text = self._msgobj('msg_21.txt')
- self._idempotent(msg, text)
-
- def test_multipart_one_part(self):
- msg, text = self._msgobj('msg_23.txt')
- self._idempotent(msg, text)
-
- def test_multipart_no_parts(self):
- msg, text = self._msgobj('msg_24.txt')
- self._idempotent(msg, text)
-
- def test_no_start_boundary(self):
- msg, text = self._msgobj('msg_31.txt')
- self._idempotent(msg, text)
-
- def test_rfc2231_charset(self):
- msg, text = self._msgobj('msg_32.txt')
- self._idempotent(msg, text)
-
- def test_more_rfc2231_parameters(self):
- msg, text = self._msgobj('msg_33.txt')
- self._idempotent(msg, text)
-
- def test_text_plain_in_a_multipart_digest(self):
- msg, text = self._msgobj('msg_34.txt')
- self._idempotent(msg, text)
-
- def test_nested_multipart_mixeds(self):
- msg, text = self._msgobj('msg_12a.txt')
- self._idempotent(msg, text)
-
- def test_message_external_body_idempotent(self):
- msg, text = self._msgobj('msg_36.txt')
- self._idempotent(msg, text)
-
- def test_content_type(self):
- eq = self.assertEqual
- unless = self.assertTrue
- # Get a message object and reset the seek pointer for other tests
- msg, text = self._msgobj('msg_05.txt')
- eq(msg.get_content_type(), 'multipart/report')
- # Test the Content-Type: parameters
- params = {}
- for pk, pv in msg.get_params():
- params[pk] = pv
- eq(params['report-type'], 'delivery-status')
- eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
- eq(msg.preamble, 'This is a MIME-encapsulated message.\n')
- eq(msg.epilogue, '\n')
- eq(len(msg.get_payload()), 3)
- # Make sure the subparts are what we expect
- msg1 = msg.get_payload(0)
- eq(msg1.get_content_type(), 'text/plain')
- eq(msg1.get_payload(), 'Yadda yadda yadda\n')
- msg2 = msg.get_payload(1)
- eq(msg2.get_content_type(), 'text/plain')
- eq(msg2.get_payload(), 'Yadda yadda yadda\n')
- msg3 = msg.get_payload(2)
- eq(msg3.get_content_type(), 'message/rfc822')
- self.assertTrue(isinstance(msg3, Message))
- payload = msg3.get_payload()
- unless(isinstance(payload, list))
- eq(len(payload), 1)
- msg4 = payload[0]
- unless(isinstance(msg4, Message))
- eq(msg4.get_payload(), 'Yadda yadda yadda\n')
-
- def test_parser(self):
- eq = self.assertEqual
- unless = self.assertTrue
- msg, text = self._msgobj('msg_06.txt')
- # Check some of the outer headers
- eq(msg.get_content_type(), 'message/rfc822')
- # Make sure the payload is a list of exactly one sub-Message, and that
- # that submessage has a type of text/plain
- payload = msg.get_payload()
- unless(isinstance(payload, list))
- eq(len(payload), 1)
- msg1 = payload[0]
- self.assertTrue(isinstance(msg1, Message))
- eq(msg1.get_content_type(), 'text/plain')
- self.assertTrue(isinstance(msg1.get_payload(), str))
- eq(msg1.get_payload(), '\n')
-
-
-
-# Test various other bits of the package's functionality
-class TestMiscellaneous(TestEmailBase):
- def test_message_from_string(self):
- fp = openfile('msg_01.txt')
- try:
- text = fp.read()
- finally:
- fp.close()
- msg = email.message_from_string(text)
- s = StringIO()
- # Don't wrap/continue long headers since we're trying to test
- # idempotency.
- g = Generator(s, maxheaderlen=0)
- g.flatten(msg)
- self.assertEqual(text, s.getvalue())
-
- def test_message_from_file(self):
- fp = openfile('msg_01.txt')
- try:
- text = fp.read()
- fp.seek(0)
- msg = email.message_from_file(fp)
- s = StringIO()
- # Don't wrap/continue long headers since we're trying to test
- # idempotency.
- g = Generator(s, maxheaderlen=0)
- g.flatten(msg)
- self.assertEqual(text, s.getvalue())
- finally:
- fp.close()
-
- def test_message_from_string_with_class(self):
- unless = self.assertTrue
- fp = openfile('msg_01.txt')
- try:
- text = fp.read()
- finally:
- fp.close()
- # Create a subclass
- class MyMessage(Message):
- pass
-
- msg = email.message_from_string(text, MyMessage)
- unless(isinstance(msg, MyMessage))
- # Try something more complicated
- fp = openfile('msg_02.txt')
- try:
- text = fp.read()
- finally:
- fp.close()
- msg = email.message_from_string(text, MyMessage)
- for subpart in msg.walk():
- unless(isinstance(subpart, MyMessage))
-
- def test_message_from_file_with_class(self):
- unless = self.assertTrue
- # Create a subclass
- class MyMessage(Message):
- pass
-
- fp = openfile('msg_01.txt')
- try:
- msg = email.message_from_file(fp, MyMessage)
- finally:
- fp.close()
- unless(isinstance(msg, MyMessage))
- # Try something more complicated
- fp = openfile('msg_02.txt')
- try:
- msg = email.message_from_file(fp, MyMessage)
- finally:
- fp.close()
- for subpart in msg.walk():
- unless(isinstance(subpart, MyMessage))
-
- def test__all__(self):
- module = __import__('email')
- # Can't use sorted() here due to Python 2.3 compatibility
- all = module.__all__[:]
- all.sort()
- self.assertEqual(all, [
- # Old names
- 'Charset', 'Encoders', 'Errors', 'Generator',
- 'Header', 'Iterators', 'MIMEAudio', 'MIMEBase',
- 'MIMEImage', 'MIMEMessage', 'MIMEMultipart',
- 'MIMENonMultipart', 'MIMEText', 'Message',
- 'Parser', 'Utils', 'base64MIME',
- # new names
- 'base64mime', 'charset', 'encoders', 'errors', 'generator',
- 'header', 'iterators', 'message', 'message_from_file',
- 'message_from_string', 'mime', 'parser',
- 'quopriMIME', 'quoprimime', 'utils',
- ])
-
- def test_formatdate(self):
- now = time.time()
- self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
- time.gmtime(now)[:6])
-
- def test_formatdate_localtime(self):
- now = time.time()
- self.assertEqual(
- utils.parsedate(utils.formatdate(now, localtime=True))[:6],
- time.localtime(now)[:6])
-
- def test_formatdate_usegmt(self):
- now = time.time()
- self.assertEqual(
- utils.formatdate(now, localtime=False),
- time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
- self.assertEqual(
- utils.formatdate(now, localtime=False, usegmt=True),
- time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
-
- def test_parsedate_none(self):
- self.assertEqual(utils.parsedate(''), None)
-
- def test_parsedate_compact(self):
- # The FWS after the comma is optional
- self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
- utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
-
- def test_parsedate_no_dayofweek(self):
- eq = self.assertEqual
- eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
- (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
-
- def test_parsedate_compact_no_dayofweek(self):
- eq = self.assertEqual
- eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
- (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
-
- def test_parsedate_acceptable_to_time_functions(self):
- eq = self.assertEqual
- timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
- t = int(time.mktime(timetup))
- eq(time.localtime(t)[:6], timetup[:6])
- eq(int(time.strftime('%Y', timetup)), 2003)
- timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
- t = int(time.mktime(timetup[:9]))
- eq(time.localtime(t)[:6], timetup[:6])
- eq(int(time.strftime('%Y', timetup[:9])), 2003)
-
- def test_parseaddr_empty(self):
- self.assertEqual(utils.parseaddr('<>'), ('', ''))
- self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
-
- def test_noquote_dump(self):
- self.assertEqual(
- utils.formataddr(('A Silly Person', 'person@dom.ain')),
- 'A Silly Person <person@dom.ain>')
-
- def test_escape_dump(self):
- self.assertEqual(
- utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
- r'"A \(Very\) Silly Person" <person@dom.ain>')
- a = r'A \(Special\) Person'
- b = 'person@dom.ain'
- self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
-
- def test_escape_backslashes(self):
- self.assertEqual(
- utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
- r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
- a = r'Arthur \Backslash\ Foobar'
- b = 'person@dom.ain'
- self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
-
- def test_name_with_dot(self):
- x = 'John X. Doe <jxd@example.com>'
- y = '"John X. Doe" <jxd@example.com>'
- a, b = ('John X. Doe', 'jxd@example.com')
- self.assertEqual(utils.parseaddr(x), (a, b))
- self.assertEqual(utils.parseaddr(y), (a, b))
- # formataddr() quotes the name if there's a dot in it
- self.assertEqual(utils.formataddr((a, b)), y)
-
- def test_multiline_from_comment(self):
- x = """\
-Foo
-\tBar <foo@example.com>"""
- self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
-
- def test_quote_dump(self):
- self.assertEqual(
- utils.formataddr(('A Silly; Person', 'person@dom.ain')),
- r'"A Silly; Person" <person@dom.ain>')
-
- def test_fix_eols(self):
- eq = self.assertEqual
- eq(utils.fix_eols('hello'), 'hello')
- eq(utils.fix_eols('hello\n'), 'hello\r\n')
- eq(utils.fix_eols('hello\r'), 'hello\r\n')
- eq(utils.fix_eols('hello\r\n'), 'hello\r\n')
- eq(utils.fix_eols('hello\n\r'), 'hello\r\n\r\n')
-
- def test_charset_richcomparisons(self):
- eq = self.assertEqual
- ne = self.assertNotEqual
- cset1 = Charset()
- cset2 = Charset()
- eq(cset1, 'us-ascii')
- eq(cset1, 'US-ASCII')
- eq(cset1, 'Us-AsCiI')
- eq('us-ascii', cset1)
- eq('US-ASCII', cset1)
- eq('Us-AsCiI', cset1)
- ne(cset1, 'usascii')
- ne(cset1, 'USASCII')
- ne(cset1, 'UsAsCiI')
- ne('usascii', cset1)
- ne('USASCII', cset1)
- ne('UsAsCiI', cset1)
- eq(cset1, cset2)
- eq(cset2, cset1)
-
- def test_getaddresses(self):
- eq = self.assertEqual
- eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
- 'Bud Person <bperson@dom.ain>']),
- [('Al Person', 'aperson@dom.ain'),
- ('Bud Person', 'bperson@dom.ain')])
-
- def test_getaddresses_nasty(self):
- eq = self.assertEqual
- eq(utils.getaddresses(['foo: ;']), [('', '')])
- eq(utils.getaddresses(
- ['[]*-- =~$']),
- [('', ''), ('', ''), ('', '*--')])
- eq(utils.getaddresses(
- ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
-
- def test_getaddresses_embedded_comment(self):
- """Test proper handling of a nested comment"""
- eq = self.assertEqual
- addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
- eq(addrs[0][1], 'foo@bar.com')
-
- def test_utils_quote_unquote(self):
- eq = self.assertEqual
- msg = Message()
- msg.add_header('content-disposition', 'attachment',
- filename='foo\\wacky"name')
- eq(msg.get_filename(), 'foo\\wacky"name')
-
- def test_get_body_encoding_with_bogus_charset(self):
- charset = Charset('not a charset')
- self.assertEqual(charset.get_body_encoding(), 'base64')
-
- def test_get_body_encoding_with_uppercase_charset(self):
- eq = self.assertEqual
- msg = Message()
- msg['Content-Type'] = 'text/plain; charset=UTF-8'
- eq(msg['content-type'], 'text/plain; charset=UTF-8')
- charsets = msg.get_charsets()
- eq(len(charsets), 1)
- eq(charsets[0], 'utf-8')
- charset = Charset(charsets[0])
- eq(charset.get_body_encoding(), 'base64')
- msg.set_payload('hello world', charset=charset)
- eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
- eq(msg.get_payload(decode=True), 'hello world')
- eq(msg['content-transfer-encoding'], 'base64')
- # Try another one
- msg = Message()
- msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
- charsets = msg.get_charsets()
- eq(len(charsets), 1)
- eq(charsets[0], 'us-ascii')
- charset = Charset(charsets[0])
- eq(charset.get_body_encoding(), encoders.encode_7or8bit)
- msg.set_payload('hello world', charset=charset)
- eq(msg.get_payload(), 'hello world')
- eq(msg['content-transfer-encoding'], '7bit')
-
- def test_charsets_case_insensitive(self):
- lc = Charset('us-ascii')
- uc = Charset('US-ASCII')
- self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
-
- def test_partial_falls_inside_message_delivery_status(self):
- eq = self.ndiffAssertEqual
- # The Parser interface provides chunks of data to FeedParser in 8192
- # byte gulps. SF bug #1076485 found one of those chunks inside
- # message/delivery-status header block, which triggered an
- # unreadline() of NeedMoreData.
- msg = self._msgobj('msg_43.txt')
- sfp = StringIO()
- iterators._structure(msg, sfp)
- eq(sfp.getvalue(), """\
-multipart/report
- text/plain
- message/delivery-status
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/plain
- text/rfc822-headers
-""")
-
-
-
-# Test the iterator/generators
-class TestIterators(TestEmailBase):
- def test_body_line_iterator(self):
- eq = self.assertEqual
- neq = self.ndiffAssertEqual
- # First a simple non-multipart message
- msg = self._msgobj('msg_01.txt')
- it = iterators.body_line_iterator(msg)
- lines = list(it)
- eq(len(lines), 6)
- neq(EMPTYSTRING.join(lines), msg.get_payload())
- # Now a more complicated multipart
- msg = self._msgobj('msg_02.txt')
- it = iterators.body_line_iterator(msg)
- lines = list(it)
- eq(len(lines), 43)
- fp = openfile('msg_19.txt')
- try:
- neq(EMPTYSTRING.join(lines), fp.read())
- finally:
- fp.close()
-
- def test_typed_subpart_iterator(self):
- eq = self.assertEqual
- msg = self._msgobj('msg_04.txt')
- it = iterators.typed_subpart_iterator(msg, 'text')
- lines = []
- subparts = 0
- for subpart in it:
- subparts += 1
- lines.append(subpart.get_payload())
- eq(subparts, 2)
- eq(EMPTYSTRING.join(lines), """\
-a simple kind of mirror
-to reflect upon our own
-a simple kind of mirror
-to reflect upon our own
-""")
-
- def test_typed_subpart_iterator_default_type(self):
- eq = self.assertEqual
- msg = self._msgobj('msg_03.txt')
- it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
- lines = []
- subparts = 0
- for subpart in it:
- subparts += 1
- lines.append(subpart.get_payload())
- eq(subparts, 1)
- eq(EMPTYSTRING.join(lines), """\
-
-Hi,
-
-Do you like this message?
-
--Me
-""")
-
-
-
-class TestParsers(TestEmailBase):
- def test_header_parser(self):
- eq = self.assertEqual
- # Parse only the headers of a complex multipart MIME document
- fp = openfile('msg_02.txt')
- try:
- msg = HeaderParser().parse(fp)
- finally:
- fp.close()
- eq(msg['from'], 'ppp-request@zzz.org')
- eq(msg['to'], 'ppp@zzz.org')
- eq(msg.get_content_type(), 'multipart/mixed')
- self.assertFalse(msg.is_multipart())
- self.assertTrue(isinstance(msg.get_payload(), str))
-
- def test_whitespace_continuation(self):
- eq = self.assertEqual
- # This message contains a line after the Subject: header that has only
- # whitespace, but it is not empty!
- msg = email.message_from_string("""\
-From: aperson@dom.ain
-To: bperson@dom.ain
-Subject: the next line has a space on it
-\x20
-Date: Mon, 8 Apr 2002 15:09:19 -0400
-Message-ID: spam
-
-Here's the message body
-""")
- eq(msg['subject'], 'the next line has a space on it\n ')
- eq(msg['message-id'], 'spam')
- eq(msg.get_payload(), "Here's the message body\n")
-
- def test_whitespace_continuation_last_header(self):
- eq = self.assertEqual
- # Like the previous test, but the subject line is the last
- # header.
- msg = email.message_from_string("""\
-From: aperson@dom.ain
-To: bperson@dom.ain
-Date: Mon, 8 Apr 2002 15:09:19 -0400
-Message-ID: spam
-Subject: the next line has a space on it
-\x20
-
-Here's the message body
-""")
- eq(msg['subject'], 'the next line has a space on it\n ')
- eq(msg['message-id'], 'spam')
- eq(msg.get_payload(), "Here's the message body\n")
-
- def test_crlf_separation(self):
- eq = self.assertEqual
- fp = openfile('msg_26.txt', mode='rb')
- try:
- msg = Parser().parse(fp)
- finally:
- fp.close()
- eq(len(msg.get_payload()), 2)
- part1 = msg.get_payload(0)
- eq(part1.get_content_type(), 'text/plain')
- eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
- part2 = msg.get_payload(1)
- eq(part2.get_content_type(), 'application/riscos')
-
- def test_multipart_digest_with_extra_mime_headers(self):
- eq = self.assertEqual
- neq = self.ndiffAssertEqual
- fp = openfile('msg_28.txt')
- try:
- msg = email.message_from_file(fp)
- finally:
- fp.close()
- # Structure is:
- # multipart/digest
- # message/rfc822
- # text/plain
- # message/rfc822
- # text/plain
- eq(msg.is_multipart(), 1)
- eq(len(msg.get_payload()), 2)
- part1 = msg.get_payload(0)
- eq(part1.get_content_type(), 'message/rfc822')
- eq(part1.is_multipart(), 1)
- eq(len(part1.get_payload()), 1)
- part1a = part1.get_payload(0)
- eq(part1a.is_multipart(), 0)
- eq(part1a.get_content_type(), 'text/plain')
- neq(part1a.get_payload(), 'message 1\n')
- # next message/rfc822
- part2 = msg.get_payload(1)
- eq(part2.get_content_type(), 'message/rfc822')
- eq(part2.is_multipart(), 1)
- eq(len(part2.get_payload()), 1)
- part2a = part2.get_payload(0)
- eq(part2a.is_multipart(), 0)
- eq(part2a.get_content_type(), 'text/plain')
- neq(part2a.get_payload(), 'message 2\n')
-
- def test_three_lines(self):
- # A bug report by Andrew McNamara
- lines = ['From: Andrew Person <aperson@dom.ain',
- 'Subject: Test',
- 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
- msg = email.message_from_string(NL.join(lines))
- self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
-
- def test_strip_line_feed_and_carriage_return_in_headers(self):
- eq = self.assertEqual
- # For [ 1002475 ] email message parser doesn't handle \r\n correctly
- value1 = 'text'
- value2 = 'more text'
- m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
- value1, value2)
- msg = email.message_from_string(m)
- eq(msg.get('Header'), value1)
- eq(msg.get('Next-Header'), value2)
-
- def test_rfc2822_header_syntax(self):
- eq = self.assertEqual
- m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
- msg = email.message_from_string(m)
- eq(len(msg.keys()), 3)
- keys = msg.keys()
- keys.sort()
- eq(keys, ['!"#QUX;~', '>From', 'From'])
- eq(msg.get_payload(), 'body')
-
- def test_rfc2822_space_not_allowed_in_header(self):
- eq = self.assertEqual
- m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
- msg = email.message_from_string(m)
- eq(len(msg.keys()), 0)
-
- def test_rfc2822_one_character_header(self):
- eq = self.assertEqual
- m = 'A: first header\nB: second header\nCC: third header\n\nbody'
- msg = email.message_from_string(m)
- headers = msg.keys()
- headers.sort()
- eq(headers, ['A', 'B', 'CC'])
- eq(msg.get_payload(), 'body')
-
-
-
-class TestBase64(unittest.TestCase):
- def test_len(self):
- eq = self.assertEqual
- eq(base64mime.base64_len('hello'),
- len(base64mime.encode('hello', eol='')))
- for size in range(15):
- if size == 0 : bsize = 0
- elif size <= 3 : bsize = 4
- elif size <= 6 : bsize = 8
- elif size <= 9 : bsize = 12
- elif size <= 12: bsize = 16
- else : bsize = 20
- eq(base64mime.base64_len('x'*size), bsize)
-
- def test_decode(self):
- eq = self.assertEqual
- eq(base64mime.decode(''), '')
- eq(base64mime.decode('aGVsbG8='), 'hello')
- eq(base64mime.decode('aGVsbG8=', 'X'), 'hello')
- eq(base64mime.decode('aGVsbG8NCndvcmxk\n', 'X'), 'helloXworld')
-
- def test_encode(self):
- eq = self.assertEqual
- eq(base64mime.encode(''), '')
- eq(base64mime.encode('hello'), 'aGVsbG8=\n')
- # Test the binary flag
- eq(base64mime.encode('hello\n'), 'aGVsbG8K\n')
- eq(base64mime.encode('hello\n', 0), 'aGVsbG8NCg==\n')
- # Test the maxlinelen arg
- eq(base64mime.encode('xxxx ' * 20, maxlinelen=40), """\
-eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
-eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
-eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
-eHh4eCB4eHh4IA==
-""")
- # Test the eol argument
- eq(base64mime.encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
-eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
-eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
-eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
-eHh4eCB4eHh4IA==\r
-""")
-
- def test_header_encode(self):
- eq = self.assertEqual
- he = base64mime.header_encode
- eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
- eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
- # Test the charset option
- eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
- # Test the keep_eols flag
- eq(he('hello\nworld', keep_eols=True),
- '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
- # Test the maxlinelen argument
- eq(he('xxxx ' * 20, maxlinelen=40), """\
-=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHggeHg=?=
- =?iso-8859-1?b?eHggeHh4eCB4eHh4IHh4eHg=?=
- =?iso-8859-1?b?IHh4eHggeHh4eCB4eHh4IHg=?=
- =?iso-8859-1?b?eHh4IHh4eHggeHh4eCB4eHg=?=
- =?iso-8859-1?b?eCB4eHh4IHh4eHggeHh4eCA=?=
- =?iso-8859-1?b?eHh4eCB4eHh4IHh4eHgg?=""")
- # Test the eol argument
- eq(he('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
-=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHggeHg=?=\r
- =?iso-8859-1?b?eHggeHh4eCB4eHh4IHh4eHg=?=\r
- =?iso-8859-1?b?IHh4eHggeHh4eCB4eHh4IHg=?=\r
- =?iso-8859-1?b?eHh4IHh4eHggeHh4eCB4eHg=?=\r
- =?iso-8859-1?b?eCB4eHh4IHh4eHggeHh4eCA=?=\r
- =?iso-8859-1?b?eHh4eCB4eHh4IHh4eHgg?=""")
-
-
-
-class TestQuopri(unittest.TestCase):
- def setUp(self):
- self.hlit = [chr(x) for x in range(ord('a'), ord('z')+1)] + \
- [chr(x) for x in range(ord('A'), ord('Z')+1)] + \
- [chr(x) for x in range(ord('0'), ord('9')+1)] + \
- ['!', '*', '+', '-', '/', ' ']
- self.hnon = [chr(x) for x in range(256) if chr(x) not in self.hlit]
- assert len(self.hlit) + len(self.hnon) == 256
- self.blit = [chr(x) for x in range(ord(' '), ord('~')+1)] + ['\t']
- self.blit.remove('=')
- self.bnon = [chr(x) for x in range(256) if chr(x) not in self.blit]
- assert len(self.blit) + len(self.bnon) == 256
-
- def test_header_quopri_check(self):
- for c in self.hlit:
- self.assertFalse(quoprimime.header_quopri_check(c))
- for c in self.hnon:
- self.assertTrue(quoprimime.header_quopri_check(c))
-
- def test_body_quopri_check(self):
- for c in self.blit:
- self.assertFalse(quoprimime.body_quopri_check(c))
- for c in self.bnon:
- self.assertTrue(quoprimime.body_quopri_check(c))
-
- def test_header_quopri_len(self):
- eq = self.assertEqual
- hql = quoprimime.header_quopri_len
- enc = quoprimime.header_encode
- for s in ('hello', 'h@e@l@l@o@'):
- # Empty charset and no line-endings. 7 == RFC chrome
- eq(hql(s), len(enc(s, charset='', eol=''))-7)
- for c in self.hlit:
- eq(hql(c), 1)
- for c in self.hnon:
- eq(hql(c), 3)
-
- def test_body_quopri_len(self):
- eq = self.assertEqual
- bql = quoprimime.body_quopri_len
- for c in self.blit:
- eq(bql(c), 1)
- for c in self.bnon:
- eq(bql(c), 3)
-
- def test_quote_unquote_idempotent(self):
- for x in range(256):
- c = chr(x)
- self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
-
- def test_header_encode(self):
- eq = self.assertEqual
- he = quoprimime.header_encode
- eq(he('hello'), '=?iso-8859-1?q?hello?=')
- eq(he('hello\nworld'), '=?iso-8859-1?q?hello=0D=0Aworld?=')
- # Test the charset option
- eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
- # Test the keep_eols flag
- eq(he('hello\nworld', keep_eols=True), '=?iso-8859-1?q?hello=0Aworld?=')
- # Test a non-ASCII character
- eq(he('hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
- # Test the maxlinelen argument
- eq(he('xxxx ' * 20, maxlinelen=40), """\
-=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
- =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
- =?iso-8859-1?q?_xxxx_xxxx_xxxx_xxxx_x?=
- =?iso-8859-1?q?xxx_xxxx_xxxx_xxxx_xxx?=
- =?iso-8859-1?q?x_xxxx_xxxx_?=""")
- # Test the eol argument
- eq(he('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
-=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=\r
- =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=\r
- =?iso-8859-1?q?_xxxx_xxxx_xxxx_xxxx_x?=\r
- =?iso-8859-1?q?xxx_xxxx_xxxx_xxxx_xxx?=\r
- =?iso-8859-1?q?x_xxxx_xxxx_?=""")
-
- def test_decode(self):
- eq = self.assertEqual
- eq(quoprimime.decode(''), '')
- eq(quoprimime.decode('hello'), 'hello')
- eq(quoprimime.decode('hello', 'X'), 'hello')
- eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
-
- def test_encode(self):
- eq = self.assertEqual
- eq(quoprimime.encode(''), '')
- eq(quoprimime.encode('hello'), 'hello')
- # Test the binary flag
- eq(quoprimime.encode('hello\r\nworld'), 'hello\nworld')
- eq(quoprimime.encode('hello\r\nworld', 0), 'hello\nworld')
- # Test the maxlinelen arg
- eq(quoprimime.encode('xxxx ' * 20, maxlinelen=40), """\
-xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
- xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
-x xxxx xxxx xxxx xxxx=20""")
- # Test the eol argument
- eq(quoprimime.encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
-xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
- xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
-x xxxx xxxx xxxx xxxx=20""")
- eq(quoprimime.encode("""\
-one line
-
-two line"""), """\
-one line
-
-two line""")
-
-
-
-# Test the Charset class
-class TestCharset(unittest.TestCase):
- def tearDown(self):
- from email import charset as CharsetModule
- try:
- del CharsetModule.CHARSETS['fake']
- except KeyError:
- pass
-
- def test_idempotent(self):
- eq = self.assertEqual
- # Make sure us-ascii = no Unicode conversion
- c = Charset('us-ascii')
- s = 'Hello World!'
- sp = c.to_splittable(s)
- eq(s, c.from_splittable(sp))
- # test 8-bit idempotency with us-ascii
- s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
- sp = c.to_splittable(s)
- eq(s, c.from_splittable(sp))
-
- def test_body_encode(self):
- eq = self.assertEqual
- # Try a charset with QP body encoding
- c = Charset('iso-8859-1')
- eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
- # Try a charset with Base64 body encoding
- c = Charset('utf-8')
- eq('aGVsbG8gd29ybGQ=\n', c.body_encode('hello world'))
- # Try a charset with None body encoding
- c = Charset('us-ascii')
- eq('hello world', c.body_encode('hello world'))
- # Try the convert argument, where input codec != output codec
- c = Charset('euc-jp')
- # With apologies to Tokio Kikuchi ;)
- try:
- eq('\x1b$B5FCO;~IW\x1b(B',
- c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
- eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
- c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
- except LookupError:
- # We probably don't have the Japanese codecs installed
- pass
- # Testing SF bug #625509, which we have to fake, since there are no
- # built-in encodings where the header encoding is QP but the body
- # encoding is not.
- from email import charset as CharsetModule
- CharsetModule.add_charset('fake', CharsetModule.QP, None)
- c = Charset('fake')
- eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
-
- def test_unicode_charset_name(self):
- charset = Charset(u'us-ascii')
- self.assertEqual(str(charset), 'us-ascii')
- self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
-
-
-
-# Test multilingual MIME headers.
-class TestHeader(TestEmailBase):
- def test_simple(self):
- eq = self.ndiffAssertEqual
- h = Header('Hello World!')
- eq(h.encode(), 'Hello World!')
- h.append(' Goodbye World!')
- eq(h.encode(), 'Hello World! Goodbye World!')
-
- def test_simple_surprise(self):
- eq = self.ndiffAssertEqual
- h = Header('Hello World!')
- eq(h.encode(), 'Hello World!')
- h.append('Goodbye World!')
- eq(h.encode(), 'Hello World! Goodbye World!')
-
- def test_header_needs_no_decoding(self):
- h = 'no decoding needed'
- self.assertEqual(decode_header(h), [(h, None)])
-
- def test_long(self):
- h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
- maxlinelen=76)
- for l in h.encode(splitchars=' ').split('\n '):
- self.assertTrue(len(l) <= 76)
-
- def test_multilingual(self):
- eq = self.ndiffAssertEqual
- g = Charset("iso-8859-1")
- cz = Charset("iso-8859-2")
- utf8 = Charset("utf-8")
- g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
- cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
- utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
- h = Header(g_head, g)
- h.append(cz_head, cz)
- h.append(utf8_head, utf8)
- enc = h.encode()
- eq(enc, """\
-=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_ko?=
- =?iso-8859-1?q?mfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wan?=
- =?iso-8859-1?q?dgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6?=
- =?iso-8859-1?q?rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
- =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
- =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
- =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
- =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
- =?utf-8?q?_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das_Oder_die_Fl?=
- =?utf-8?b?aXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBo+OBpuOBhOOBvuOBmQ==?=
- =?utf-8?b?44CC?=""")
- eq(decode_header(enc),
- [(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"),
- (utf8_head, "utf-8")])
- ustr = unicode(h)
- eq(ustr.encode('utf-8'),
- 'Die Mieter treten hier ein werden mit einem Foerderband '
- 'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
- 'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
- 'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
- 'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
- '\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
- '\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
- '\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
- '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
- '\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
- '\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
- '\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
- '\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
- 'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
- 'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
- '\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82')
- # Test make_header()
- newh = make_header(decode_header(enc))
- eq(newh, enc)
-
- def test_header_ctor_default_args(self):
- eq = self.ndiffAssertEqual
- h = Header()
- eq(h, '')
- h.append('foo', Charset('iso-8859-1'))
- eq(h, '=?iso-8859-1?q?foo?=')
-
- def test_explicit_maxlinelen(self):
- eq = self.ndiffAssertEqual
- hstr = 'A very long line that must get split to something other than at the 76th character boundary to test the non-default behavior'
- h = Header(hstr)
- eq(h.encode(), '''\
-A very long line that must get split to something other than at the 76th
- character boundary to test the non-default behavior''')
- h = Header(hstr, header_name='Subject')
- eq(h.encode(), '''\
-A very long line that must get split to something other than at the
- 76th character boundary to test the non-default behavior''')
- h = Header(hstr, maxlinelen=1024, header_name='Subject')
- eq(h.encode(), hstr)
-
- def test_us_ascii_header(self):
- eq = self.assertEqual
- s = 'hello'
- x = decode_header(s)
- eq(x, [('hello', None)])
- h = make_header(x)
- eq(s, h.encode())
-
- def test_string_charset(self):
- eq = self.assertEqual
- h = Header()
- h.append('hello', 'iso-8859-1')
- eq(h, '=?iso-8859-1?q?hello?=')
-
-## def test_unicode_error(self):
-## raises = self.assertRaises
-## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
-## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
-## h = Header()
-## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
-## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
-## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
-
- def test_utf8_shortest(self):
- eq = self.assertEqual
- h = Header(u'p\xf6stal', 'utf-8')
- eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
- h = Header(u'\u83ca\u5730\u6642\u592b', 'utf-8')
- eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
-
- def test_bad_8bit_header(self):
- raises = self.assertRaises
- eq = self.assertEqual
- x = 'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
- raises(UnicodeError, Header, x)
- h = Header()
- raises(UnicodeError, h.append, x)
- eq(str(Header(x, errors='replace')), x)
- h.append(x, errors='replace')
- eq(str(h), x)
-
- def test_encoded_adjacent_nonencoded(self):
- eq = self.assertEqual
- h = Header()
- h.append('hello', 'iso-8859-1')
- h.append('world')
- s = h.encode()
- eq(s, '=?iso-8859-1?q?hello?= world')
- h = make_header(decode_header(s))
- eq(h.encode(), s)
-
- def test_whitespace_eater(self):
- eq = self.assertEqual
- s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
- parts = decode_header(s)
- eq(parts, [('Subject:', None), ('\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), ('zz.', None)])
- hdr = make_header(parts)
- eq(hdr.encode(),
- 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
-
- def test_broken_base64_header(self):
- raises = self.assertRaises
- s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
- raises(errors.HeaderParseError, decode_header, s)
-
-
-
-# Test RFC 2231 header parameters (en/de)coding
-class TestRFC2231(TestEmailBase):
- def test_get_param(self):
- eq = self.assertEqual
- msg = self._msgobj('msg_29.txt')
- eq(msg.get_param('title'),
- ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
- eq(msg.get_param('title', unquote=False),
- ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
-
- def test_set_param(self):
- eq = self.assertEqual
- msg = Message()
- msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
- charset='us-ascii')
- eq(msg.get_param('title'),
- ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
- msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
- charset='us-ascii', language='en')
- eq(msg.get_param('title'),
- ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
- msg = self._msgobj('msg_01.txt')
- msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
- charset='us-ascii', language='en')
- self.ndiffAssertEqual(msg.as_string(), """\
-Return-Path: <bbb@zzz.org>
-Delivered-To: bbb@zzz.org
-Received: by mail.zzz.org (Postfix, from userid 889)
- id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
-From: bbb@ddd.com (John X. Doe)
-To: bbb@zzz.org
-Subject: This is a test message
-Date: Fri, 4 May 2001 14:05:44 -0400
-Content-Type: text/plain; charset=us-ascii;
- title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
-
-
-Hi,
-
-Do you like this message?
-
--Me
-""")
-
- def test_del_param(self):
- eq = self.ndiffAssertEqual
- msg = self._msgobj('msg_01.txt')
- msg.set_param('foo', 'bar', charset='us-ascii', language='en')
- msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
- charset='us-ascii', language='en')
- msg.del_param('foo', header='Content-Type')
- eq(msg.as_string(), """\
-Return-Path: <bbb@zzz.org>
-Delivered-To: bbb@zzz.org
-Received: by mail.zzz.org (Postfix, from userid 889)
- id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
-From: bbb@ddd.com (John X. Doe)
-To: bbb@zzz.org
-Subject: This is a test message
-Date: Fri, 4 May 2001 14:05:44 -0400
-Content-Type: text/plain; charset="us-ascii";
- title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
-
-
-Hi,
-
-Do you like this message?
-
--Me
-""")
-
- def test_rfc2231_get_content_charset(self):
- eq = self.assertEqual
- msg = self._msgobj('msg_32.txt')
- eq(msg.get_content_charset(), 'us-ascii')
-
- def test_rfc2231_no_language_or_charset(self):
- m = '''\
-Content-Transfer-Encoding: 8bit
-Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
-Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
-
-'''
- msg = email.message_from_string(m)
- param = msg.get_param('NAME')
- self.assertFalse(isinstance(param, tuple))
- self.assertEqual(
- param,
- 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
-
- def test_rfc2231_no_language_or_charset_in_filename(self):
- m = '''\
-Content-Disposition: inline;
-\tfilename*0*="''This%20is%20even%20more%20";
-\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
-\tfilename*2="is it not.pdf"
-
-'''
- msg = email.message_from_string(m)
- self.assertEqual(msg.get_filename(),
- 'This is even more ***fun*** is it not.pdf')
-
- def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
- m = '''\
-Content-Disposition: inline;
-\tfilename*0*="''This%20is%20even%20more%20";
-\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
-\tfilename*2="is it not.pdf"
-
-'''
- msg = email.message_from_string(m)
- self.assertEqual(msg.get_filename(),
- 'This is even more ***fun*** is it not.pdf')
-
- def test_rfc2231_partly_encoded(self):
- m = '''\
-Content-Disposition: inline;
-\tfilename*0="''This%20is%20even%20more%20";
-\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
-\tfilename*2="is it not.pdf"
-
-'''
- msg = email.message_from_string(m)
- self.assertEqual(
- msg.get_filename(),
- 'This%20is%20even%20more%20***fun*** is it not.pdf')
-
- def test_rfc2231_partly_nonencoded(self):
- m = '''\
-Content-Disposition: inline;
-\tfilename*0="This%20is%20even%20more%20";
-\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
-\tfilename*2="is it not.pdf"
-
-'''
- msg = email.message_from_string(m)
- self.assertEqual(
- msg.get_filename(),
- 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
-
- def test_rfc2231_no_language_or_charset_in_boundary(self):
- m = '''\
-Content-Type: multipart/alternative;
-\tboundary*0*="''This%20is%20even%20more%20";
-\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
-\tboundary*2="is it not.pdf"
-
-'''
- msg = email.message_from_string(m)
- self.assertEqual(msg.get_boundary(),
- 'This is even more ***fun*** is it not.pdf')
-
- def test_rfc2231_no_language_or_charset_in_charset(self):
- # This is a nonsensical charset value, but tests the code anyway
- m = '''\
-Content-Type: text/plain;
-\tcharset*0*="This%20is%20even%20more%20";
-\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
-\tcharset*2="is it not.pdf"
-
-'''
- msg = email.message_from_string(m)
- self.assertEqual(msg.get_content_charset(),
- 'this is even more ***fun*** is it not.pdf')
-
- def test_rfc2231_bad_encoding_in_filename(self):
- m = '''\
-Content-Disposition: inline;
-\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
-\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
-\tfilename*2="is it not.pdf"
-
-'''
- msg = email.message_from_string(m)
- self.assertEqual(msg.get_filename(),
- 'This is even more ***fun*** is it not.pdf')
-
- def test_rfc2231_bad_encoding_in_charset(self):
- m = """\
-Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
-
-"""
- msg = email.message_from_string(m)
- # This should return None because non-ascii characters in the charset
- # are not allowed.
- self.assertEqual(msg.get_content_charset(), None)
-
- def test_rfc2231_bad_character_in_charset(self):
- m = """\
-Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
-
-"""
- msg = email.message_from_string(m)
- # This should return None because non-ascii characters in the charset
- # are not allowed.
- self.assertEqual(msg.get_content_charset(), None)
-
- def test_rfc2231_bad_character_in_filename(self):
- m = '''\
-Content-Disposition: inline;
-\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
-\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
-\tfilename*2*="is it not.pdf%E2"
-
-'''
- msg = email.message_from_string(m)
- self.assertEqual(msg.get_filename(),
- u'This is even more ***fun*** is it not.pdf\ufffd')
-
- def test_rfc2231_unknown_encoding(self):
- m = """\
-Content-Transfer-Encoding: 8bit
-Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
-
-"""
- msg = email.message_from_string(m)
- self.assertEqual(msg.get_filename(), 'myfile.txt')
-
- def test_rfc2231_single_tick_in_filename_extended(self):
- eq = self.assertEqual
- m = """\
-Content-Type: application/x-foo;
-\tname*0*=\"Frank's\"; name*1*=\" Document\"
-
-"""
- msg = email.message_from_string(m)
- charset, language, s = msg.get_param('name')
- eq(charset, None)
- eq(language, None)
- eq(s, "Frank's Document")
-
- def test_rfc2231_single_tick_in_filename(self):
- m = """\
-Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
-
-"""
- msg = email.message_from_string(m)
- param = msg.get_param('name')
- self.assertFalse(isinstance(param, tuple))
- self.assertEqual(param, "Frank's Document")
-
- def test_rfc2231_tick_attack_extended(self):
- eq = self.assertEqual
- m = """\
-Content-Type: application/x-foo;
-\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
-
-"""
- msg = email.message_from_string(m)
- charset, language, s = msg.get_param('name')
- eq(charset, 'us-ascii')
- eq(language, 'en-us')
- eq(s, "Frank's Document")
-
- def test_rfc2231_tick_attack(self):
- m = """\
-Content-Type: application/x-foo;
-\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
-
-"""
- msg = email.message_from_string(m)
- param = msg.get_param('name')
- self.assertFalse(isinstance(param, tuple))
- self.assertEqual(param, "us-ascii'en-us'Frank's Document")
-
- def test_rfc2231_no_extended_values(self):
- eq = self.assertEqual
- m = """\
-Content-Type: application/x-foo; name=\"Frank's Document\"
-
-"""
- msg = email.message_from_string(m)
- eq(msg.get_param('name'), "Frank's Document")
-
- def test_rfc2231_encoded_then_unencoded_segments(self):
- eq = self.assertEqual
- m = """\
-Content-Type: application/x-foo;
-\tname*0*=\"us-ascii'en-us'My\";
-\tname*1=\" Document\";
-\tname*2*=\" For You\"
-
-"""
- msg = email.message_from_string(m)
- charset, language, s = msg.get_param('name')
- eq(charset, 'us-ascii')
- eq(language, 'en-us')
- eq(s, 'My Document For You')
-
- def test_rfc2231_unencoded_then_encoded_segments(self):
- eq = self.assertEqual
- m = """\
-Content-Type: application/x-foo;
-\tname*0=\"us-ascii'en-us'My\";
-\tname*1*=\" Document\";
-\tname*2*=\" For You\"
-
-"""
- msg = email.message_from_string(m)
- charset, language, s = msg.get_param('name')
- eq(charset, 'us-ascii')
- eq(language, 'en-us')
- eq(s, 'My Document For You')
-
-
-
-def _testclasses():
- mod = sys.modules[__name__]
- return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
-
-
-def suite():
- suite = unittest.TestSuite()
- for testclass in _testclasses():
- suite.addTest(unittest.makeSuite(testclass))
- return suite
-
-
-def test_main():
- for testclass in _testclasses():
- run_unittest(testclass)
-
-
-
-if __name__ == '__main__':
- unittest.main(defaultTest='suite')
diff --git a/Lib/email/test/test_email_torture.py b/Lib/email/test/test_email_torture.py
index 9dfa602..544b1bb 100644
--- a/Lib/email/test/test_email_torture.py
+++ b/Lib/email/test/test_email_torture.py
@@ -9,11 +9,11 @@
import sys
import os
import unittest
-from cStringIO import StringIO
+from io import StringIO
from types import ListType
from email.test.test_email import TestEmailBase
-from test.test_support import TestSkipped, run_unittest
+from test.support import TestSkipped, run_unittest
import email
from email import __file__ as testfile
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index c976021..ac4da37 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -26,8 +26,9 @@ import time
import base64
import random
import socket
-import urllib
+import urllib.parse
import warnings
+from io import StringIO
from email._parseaddr import quote
from email._parseaddr import AddressList as _AddressList
@@ -44,7 +45,7 @@ from email.encoders import _bencode, _qencode
COMMASPACE = ', '
EMPTYSTRING = ''
-UEMPTYSTRING = u''
+UEMPTYSTRING = ''
CRLF = '\r\n'
TICK = "'"
@@ -52,36 +53,9 @@ specialsre = re.compile(r'[][\\()<>@,:;".]')
escapesre = re.compile(r'[][\\()"]')
-
-# Helpers
-
-def _identity(s):
- return s
-
-
-def _bdecode(s):
- """Decodes a base64 string.
-
- This function is equivalent to base64.decodestring and it's retained only
- for backward compatibility. It used to remove the last \\n of the decoded
- string, if it had any (see issue 7143).
- """
- if not s:
- return s
- return base64.decodestring(s)
-
-
-
-def fix_eols(s):
- """Replace all line-ending characters with \\r\\n."""
- # Fix newlines with no preceding carriage return
- s = re.sub(r'(?<!\r)\n', CRLF, s)
- # Fix carriage returns with no following newline
- s = re.sub(r'\r(?!\n)', CRLF, s)
- return s
+# Helpers
-
def formataddr(pair):
"""The inverse of parseaddr(), this takes a 2-tuple of the form
(realname, email_address) and returns the string value suitable
@@ -100,7 +74,7 @@ def formataddr(pair):
return address
-
+
def getaddresses(fieldvalues):
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
all = COMMASPACE.join(fieldvalues)
@@ -108,7 +82,7 @@ def getaddresses(fieldvalues):
return a.addresslist
-
+
ecre = re.compile(r'''
=\? # literal =?
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
@@ -120,7 +94,7 @@ ecre = re.compile(r'''
''', re.VERBOSE | re.IGNORECASE)
-
+
def formatdate(timeval=None, localtime=False, usegmt=False):
"""Returns a date string as specified by RFC 2822, e.g.:
@@ -173,14 +147,16 @@ def formatdate(timeval=None, localtime=False, usegmt=False):
zone)
-
-def make_msgid(idstring=None):
+
+def make_msgid(idstring=None, domain=None):
"""Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
<20020201195627.33539.96671@nightshade.la.mastaler.com>
Optional idstring if given is a string used to strengthen the
- uniqueness of the message id.
+ uniqueness of the message id. Optional domain if given provides the
+ portion of the message id after the '@'. It defaults to the locally
+ defined hostname.
"""
timeval = time.time()
utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
@@ -190,12 +166,13 @@ def make_msgid(idstring=None):
idstring = ''
else:
idstring = '.' + idstring
- idhost = socket.getfqdn()
- msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
+ if domain is None:
+ domain = socket.getfqdn()
+ msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain)
return msgid
-
+
# These functions are in the standalone mimelib version only because they've
# subsequently been fixed in the latest Python versions. We use this to worm
# around broken older Pythons.
@@ -229,7 +206,7 @@ def unquote(str):
return str
-
+
# RFC2231-related functions - parameter encoding and decoding
def decode_rfc2231(s):
"""Decode string according to RFC 2231"""
@@ -246,8 +223,7 @@ def encode_rfc2231(s, charset=None, language=None):
charset is given but not language, the string is encoded using the empty
string for language.
"""
- import urllib
- s = urllib.quote(s, safe='')
+ s = urllib.parse.quote(s, safe='', encoding=charset or 'ascii')
if charset is None and language is None:
return s
if language is None:
@@ -255,7 +231,8 @@ def encode_rfc2231(s, charset=None, language=None):
return "%s'%s'%s" % (charset, language, s)
-rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
+rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$',
+ re.ASCII)
def decode_params(params):
"""Decode parameters list according to RFC 2231.
@@ -299,7 +276,10 @@ def decode_params(params):
# language specifiers at the beginning of the string.
for num, s, encoded in continuations:
if encoded:
- s = urllib.unquote(s)
+ # Decode as "latin-1", so the characters in s directly
+ # represent the percent-encoded octet values.
+ # collapse_rfc2231_value treats this as an octet sequence.
+ s = urllib.parse.unquote(s, encoding="latin-1")
extended = True
value.append(s)
value = quote(EMPTYSTRING.join(value))
@@ -312,13 +292,15 @@ def decode_params(params):
def collapse_rfc2231_value(value, errors='replace',
fallback_charset='us-ascii'):
- if isinstance(value, tuple):
- rawval = unquote(value[2])
- charset = value[0] or 'us-ascii'
- try:
- return unicode(rawval, charset, errors)
- except LookupError:
- # XXX charset is unknown to Python.
- return unicode(rawval, fallback_charset, errors)
- else:
+ if not isinstance(value, tuple) or len(value) != 3:
return unquote(value)
+ # While value comes to us as a unicode string, we need it to be a bytes
+ # object. We do not want bytes() normal utf-8 decoder, we want a straight
+ # interpretation of the string as character bytes.
+ charset, language, text = value
+ rawbytes = bytes(text, 'raw-unicode-escape')
+ try:
+ return str(rawbytes, charset, errors)
+ except LookupError:
+ # charset is not a known codec.
+ return unquote(text)