diff options
author | Barry Warsaw <barry@python.org> | 2002-04-10 21:01:31 (GMT) |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2002-04-10 21:01:31 (GMT) |
commit | 409a4c08b545aa064cf8fe3b8de51404756a301e (patch) | |
tree | 06cf8fe44e1fe28fbc0147635ec41961f2df6515 | |
parent | 68e69338ae19c37bd3e69cb76e107bfa76231e06 (diff) | |
download | cpython-409a4c08b545aa064cf8fe3b8de51404756a301e.zip cpython-409a4c08b545aa064cf8fe3b8de51404756a301e.tar.gz cpython-409a4c08b545aa064cf8fe3b8de51404756a301e.tar.bz2 |
Sync'ing with standalone email package 2.0.1. This adds support for
non-us-ascii character sets in headers and bodies. Some API changes
(with DeprecationWarnings for the old APIs). Better RFC-compliant
implementations of base64 and quoted-printable.
Updated test cases. Documentation updates to follow (after I finish
writing them ;).
-rw-r--r-- | Lib/email/Charset.py | 327 | ||||
-rw-r--r-- | Lib/email/Encoders.py | 10 | ||||
-rw-r--r-- | Lib/email/Errors.py | 2 | ||||
-rw-r--r-- | Lib/email/Generator.py | 54 | ||||
-rw-r--r-- | Lib/email/Header.py | 210 | ||||
-rw-r--r-- | Lib/email/Iterators.py | 2 | ||||
-rw-r--r-- | Lib/email/MIMEBase.py | 2 | ||||
-rw-r--r-- | Lib/email/MIMEImage.py | 2 | ||||
-rw-r--r-- | Lib/email/MIMEMessage.py | 2 | ||||
-rw-r--r-- | Lib/email/MIMEText.py | 35 | ||||
-rw-r--r-- | Lib/email/Message.py | 223 | ||||
-rw-r--r-- | Lib/email/Parser.py | 26 | ||||
-rw-r--r-- | Lib/email/Utils.py | 143 | ||||
-rw-r--r-- | Lib/email/__init__.py | 10 | ||||
-rw-r--r-- | Lib/email/base64MIME.py | 174 | ||||
-rw-r--r-- | Lib/email/quopriMIME.py | 312 | ||||
-rw-r--r-- | Lib/test/data/msg_24.txt | 10 | ||||
-rw-r--r-- | Lib/test/data/msg_25.txt | 117 | ||||
-rw-r--r-- | Lib/test/test_email.py | 642 | ||||
-rw-r--r-- | Lib/test/test_email_codecs.py | 51 |
20 files changed, 2210 insertions, 144 deletions
diff --git a/Lib/email/Charset.py b/Lib/email/Charset.py new file mode 100644 index 0000000..4874597 --- /dev/null +++ b/Lib/email/Charset.py @@ -0,0 +1,327 @@ +# Copyright (C) 2001,2002 Python Software Foundation +# Author: che@debian.org (Ben Gertzfield) + +from types import UnicodeType +from email.Encoders import encode_7or8bit +import email.base64MIME +import email.quopriMIME + + + +# Flags for types of header encodings +QP = 1 # Quoted-Printable +BASE64 = 2 # Base64 + +# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7 +MISC_LEN = 7 + +DEFAULT_CHARSET = 'us-ascii' + + + +# Defaults +CHARSETS = { + # input header enc body enc output conv + 'iso-8859-1': (QP, QP, None), + 'iso-8859-2': (QP, QP, None), + 'us-ascii': (None, None, None), + 'big5': (BASE64, BASE64, None), + 'gb2312': (BASE64, BASE64, None), + 'euc-jp': (BASE64, None, 'iso-2022-jp'), + 'shift_jis': (BASE64, None, 'iso-2022-jp'), + 'iso-2022-jp': (BASE64, None, None), + 'koi8-r': (BASE64, BASE64, None), + 'utf-8': (BASE64, BASE64, 'utf-8'), + } + +# Aliases for other commonly-used names for character sets. Map +# them to the real ones used in email. +ALIASES = { + 'latin_1': 'iso-8859-1', + 'latin-1': 'iso-8859-1', + 'ascii': 'us-ascii', + } + +# Map charsets to their Unicode codec strings. Note that the Japanese +# examples included below do not (yet) come with Python! They are available +# from http://pseudo.grad.sccs.chukyo-u.ac.jp/~kajiyama/python/ + +# The Chinese and Korean codecs are available from SourceForge: +# +# http://sourceforge.net/projects/python-codecs/ +# +# although you'll need to check them out of cvs since they haven't been file +# released yet. You might also try to use +# +# http://www.freshports.org/port-description.php3?port=6702 +# +# if you can get logged in. AFAICT, both the Chinese and Korean codecs are +# fairly experimental at this point. +CODEC_MAP = { + 'euc-jp': 'japanese.euc-jp', + 'iso-2022-jp': 'japanese.iso-2022-jp', + 'shift_jis': 'japanese.shift_jis', + 'gb2132': 'eucgb2312_cn', + 'big5': 'big5_tw', + 'utf-8': 'utf-8', + # Hack: We don't want *any* conversion for stuff marked us-ascii, as all + # sorts of garbage might be sent to us in the guise of 7-bit us-ascii. + # Let that stuff pass through without conversion to/from Unicode. + 'us-ascii': None, + } + + + +# Convenience functions for extending the above mappings +def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): + """Add charset properties to the global map. + + charset is the input character set, and must be the canonical name of a + character set. + + Optional header_enc and body_enc is either Charset.QP for + quoted-printable, Charset.BASE64 for base64 encoding, or None for no + encoding. It describes how message headers and message bodies in the + input charset are to be encoded. Default is no encoding. + + Optional output_charset is the character set that the output should be + in. Conversions will proceed from input charset, to Unicode, to the + output charset when the method Charset.convert() is called. The default + is to output in the same character set as the input. + + Both input_charset and output_charset must have Unicode codec entries in + the module's charset-to-codec mapping; use add_codec(charset, codecname) + to add codecs the module does not know about. See the codec module's + documentation for more information. + """ + CHARSETS[charset] = (header_enc, body_enc, output_charset) + + +def add_alias(alias, canonical): + """Add a character set alias. + + alias is the alias name, e.g. latin-1 + canonical is the character set's canonical name, e.g. iso-8859-1 + """ + ALIASES[alias] = canonical + + +def add_codec(charset, codecname): + """Add a codec that map characters in the given charset to/from Unicode. + + charset is the canonical name of a character set. codecname is the name + of a Python codec, as appropriate for the second argument to the unicode() + built-in, or to the .encode() method of a Unicode string. + """ + CODEC_MAP[charset] = codecname + + + +class Charset: + """Map character sets to their email properties. + + This class provides information about the requirements imposed on email + for a specific character set. It also provides convenience routines for + converting between character sets, given the availability of the + applicable codecs. Given an character set, it will do its best to provide + information on how to use that character set in an email. + + Certain character sets must be encoded with quoted-printable or base64 + when used in email headers or bodies. Certain character sets must be + converted outright, and are not allowed in email. Instances of this + module expose the following information about a character set: + + input_charset: The initial character set specified. Common aliases + are converted to their `official' email names (e.g. latin_1 + is converted to iso-8859-1). Defaults to 7-bit us-ascii. + + header_encoding: If the character set must be encoded before it can be + used in an email header, this attribute will be set to + Charset.QP (for quoted-printable) or Charset.BASE64 (for + base64 encoding). Otherwise, it will be None. + + body_encoding: Same as header_encoding, but describes the encoding for the + mail message's body, which indeed may be different than the + header encoding. + + output_charset: Some character sets must be converted before the can be + used in email headers or bodies. If the input_charset is + one of them, this attribute will contain the name of the + charset output will be converted to. Otherwise, it will + be None. + + input_codec: The name of the Python codec used to convert the + input_charset to Unicode. If no conversion codec is + necessary, this attribute will be None. + + output_codec: The name of the Python codec used to convert Unicode + to the output_charset. If no conversion codec is necessary, + this attribute will have the same value as the input_codec. + """ + def __init__(self, input_charset=DEFAULT_CHARSET): + # Set the input charset after filtering through the aliases + self.input_charset = ALIASES.get(input_charset, input_charset) + # We can try to guess which encoding and conversion to use by the + # charset_map dictionary. Try that first, but let the user override + # it. + henc, benc, conv = CHARSETS.get(self.input_charset, + (BASE64, BASE64, None)) + # Set the attributes, allowing the arguments to override the default. + self.header_encoding = henc + self.body_encoding = benc + self.output_charset = ALIASES.get(conv, conv) + # Now set the codecs. If one isn't defined for input_charset, + # guess and try a Unicode codec with the same name as input_codec. + self.input_codec = CODEC_MAP.get(self.input_charset, + self.input_charset) + self.output_codec = CODEC_MAP.get(self.output_charset, + self.input_codec) + + def __str__(self): + return self.input_charset.lower() + + def __eq__(self, other): + return str(self) == str(other).lower() + + def __ne__(self, other): + return not self.__eq__(other) + + def get_body_encoding(self): + """Return the content-transfer-encoding used for body encoding. + + This is either the string `quoted-printable' or `base64' depending on + the encoding used, or it is a function in which case you should call + the function with a single argument, the Message object being + encoded. The function should then set the Content-Transfer-Encoding: + header itself to whatever is appropriate. + + Returns "quoted-printable" if self.body_encoding is QP. + Returns "base64" if self.body_encoding is BASE64. + Returns "7bit" otherwise. + """ + if self.body_encoding == QP: + return 'quoted-printable' + elif self.body_encoding == BASE64: + return 'base64' + else: + return encode_7or8bit + + def convert(self, s): + """Convert a string from the input_codec to the output_codec.""" + if self.input_codec <> self.output_codec: + return unicode(s, self.input_codec).encode(self.output_codec) + else: + return s + + def to_splittable(self, s): + """Convert a possibly multibyte string to a safely splittable format. + + Uses the input_codec to try and convert the string to Unicode, so it + can be safely split on character boundaries (even for double-byte + characters). + + Returns the string untouched if we don't know how to convert it to + Unicode with the input_charset. + + Characters that could not be converted to Unicode will be replaced + with the Unicode replacement character U+FFFD. + """ + if isinstance(s, UnicodeType) or self.input_codec is None: + return s + try: + return unicode(s, self.input_codec, 'replace') + except LookupError: + # Input codec not installed on system, so return the original + # string unchanged. + return s + + def from_splittable(self, ustr, to_output=1): + """Convert a splittable string back into an encoded string. + + Uses the proper codec to try and convert the string from + Unicode back into an encoded format. Return the string as-is + if it is not Unicode, or if it could not be encoded from + Unicode. + + Characters that could not be converted from Unicode will be replaced + with an appropriate character (usually '?'). + + If to_output is true, uses output_codec to convert to an encoded + format. If to_output is false, uses input_codec. to_output defaults + to 1. + """ + if to_output: + codec = self.output_codec + else: + codec = self.input_codec + if not isinstance(ustr, UnicodeType) or codec is None: + return ustr + try: + return ustr.encode(codec, 'replace') + except LookupError: + # Output codec not installed + return ustr + + def get_output_charset(self): + """Return the output character set. + + This is self.output_charset if that is set, otherwise it is + self.input_charset. + """ + return self.output_charset or self.input_charset + + def encoded_header_len(self, s): + """Return the length of the encoded header string.""" + cset = self.get_output_charset() + # The len(s) of a 7bit encoding is len(s) + if self.header_encoding is BASE64: + return email.base64MIME.base64_len(s) + len(cset) + MISC_LEN + elif self.header_encoding is QP: + return email.quopriMIME.header_quopri_len(s) + len(cset) + MISC_LEN + else: + return len(s) + + def header_encode(self, s, convert=0): + """Header-encode a string, optionally converting it to output_charset. + + If convert is true, the string will be converted from the input + charset to the output charset automatically. This is not useful for + multibyte character sets, which have line length issues (multibyte + characters must be split on a character, not a byte boundary); use the + high-level Header class to deal with these issues. convert defaults + to 0. + + The type of encoding (base64 or quoted-printable) will be based on + self.header_encoding. + """ + cset = self.get_output_charset() + if convert: + s = self.convert(s) + # 7bit/8bit encodings return the string unchanged (modulo conversions) + if self.header_encoding is BASE64: + return email.base64MIME.header_encode(s, cset) + elif self.header_encoding is QP: + return email.quopriMIME.header_encode(s, cset) + else: + return s + + def body_encode(self, s, convert=1): + """Body-encode a string and convert it to output_charset. + + If convert is true (the default), the string will be converted from + the input charset to output charset automatically. Unlike + header_encode(), there are no issues with byte boundaries and + multibyte charsets in email bodies, so this is usually pretty safe. + + The type of encoding (base64 or quoted-printable) will be based on + self.body_encoding. + """ + if convert: + s = self.convert(s) + # 7bit/8bit encodings return the string unchanged (module conversions) + if self.body_encoding is BASE64: + return email.base64MIME.body_encode(s) + elif self.header_encoding is QP: + return email.quopriMIME.body_encode(s) + else: + return s diff --git a/Lib/email/Encoders.py b/Lib/email/Encoders.py index d9cd42d..f09affa 100644 --- a/Lib/email/Encoders.py +++ b/Lib/email/Encoders.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001 Python Software Foundation +# Copyright (C) 2001,2002 Python Software Foundation # Author: barry@zope.com (Barry Warsaw) """Module containing encoding functions for Image.Image and Text.Text. @@ -11,7 +11,9 @@ from quopri import encodestring as _encodestring # Helpers def _qencode(s): - return _encodestring(s, quotetabs=1) + enc = _encodestring(s, quotetabs=1) + # Must encode spaces, which quopri.encodestring() doesn't do + return enc.replace(' ', '=20') def _bencode(s): @@ -54,6 +56,10 @@ def encode_quopri(msg): def encode_7or8bit(msg): """Set the Content-Transfer-Encoding: header to 7bit or 8bit.""" orig = msg.get_payload() + if orig is None: + # There's no payload. For backwards compatibility we use 7bit + msg['Content-Transfer-Encoding'] = '7bit' + return # We play a trick to make this go fast. If encoding to ASCII succeeds, we # know the data must be 7bit, otherwise treat it as 8bit. try: diff --git a/Lib/email/Errors.py b/Lib/email/Errors.py index 71d7663..e3a3666 100644 --- a/Lib/email/Errors.py +++ b/Lib/email/Errors.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001 Python Software Foundation +# Copyright (C) 2001,2002 Python Software Foundation # Author: barry@zope.com (Barry Warsaw) """email package exception classes. diff --git a/Lib/email/Generator.py b/Lib/email/Generator.py index 981e0ff..dbbcabc 100644 --- a/Lib/email/Generator.py +++ b/Lib/email/Generator.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001 Python Software Foundation +# Copyright (C) 2001,2002 Python Software Foundation # Author: barry@zope.com (Barry Warsaw) """Classes to generate plain text from a message object tree. @@ -166,30 +166,33 @@ class Generator: return text rtn = [] for line in text.split('\n'): + splitline = [] # Short lines can remain unchanged if len(line.replace('\t', SPACE8)) <= maxheaderlen: - rtn.append(line) - SEMINLTAB.join(rtn) + splitline.append(line) + rtn.append(SEMINLTAB.join(splitline)) else: - oldlen = len(text) + oldlen = len(line) # Try to break the line on semicolons, but if that doesn't # work, try to split on folding whitespace. - while len(text) > maxheaderlen: - i = text.rfind(';', 0, maxheaderlen) + while len(line) > maxheaderlen: + i = line.rfind(';', 0, maxheaderlen) if i < 0: break - rtn.append(text[:i]) - text = text[i+1:].lstrip() - if len(text) <> oldlen: + splitline.append(line[:i]) + line = line[i+1:].lstrip() + if len(line) <> oldlen: # Splitting on semis worked - rtn.append(text) - return SEMINLTAB.join(rtn) + splitline.append(line) + rtn.append(SEMINLTAB.join(splitline)) + continue # Splitting on semis didn't help, so try to split on # whitespace. - parts = re.split(r'(\s+)', text) + parts = re.split(r'(\s+)', line) # Watch out though for "Header: longnonsplittableline" if parts[0].endswith(':') and len(parts) == 3: - return text + rtn.append(line) + continue first = parts.pop(0) sublines = [first] acc = len(first) @@ -203,13 +206,14 @@ class Generator: else: # Split it here, but don't forget to ignore the # next whitespace-only part - rtn.append(EMPTYSTRING.join(sublines)) + splitline.append(EMPTYSTRING.join(sublines)) del parts[0] first = parts.pop(0) sublines = [first] acc = len(first) - rtn.append(EMPTYSTRING.join(sublines)) - return NLTAB.join(rtn) + splitline.append(EMPTYSTRING.join(sublines)) + rtn.append(NLTAB.join(splitline)) + return NL.join(rtn) # # Handlers for writing types and subtypes @@ -219,6 +223,9 @@ class Generator: payload = msg.get_payload() if payload is None: return + cset = msg.get_charset() + if cset is not None: + payload = cset.body_encode(payload) if not isinstance(payload, StringType): raise TypeError, 'string payload expected: %s' % type(payload) if self._mangle_from_: @@ -233,7 +240,18 @@ class Generator: # together, and then make sure that the boundary we've chosen isn't # present in the payload. msgtexts = [] - for part in msg.get_payload(): + subparts = msg.get_payload() + if subparts is None: + # Nothing has every been attached + boundary = msg.get_boundary(failobj=_make_boundary()) + print >> self._fp, '--' + boundary + print >> self._fp, '\n' + print >> self._fp, '--' + boundary + '--' + return + elif not isinstance(subparts, ListType): + # Scalar payload + subparts = [subparts] + for part in subparts: s = StringIO() g = self.__class__(s, self._mangle_from_, self.__maxheaderlen) g(part, unixfrom=0) @@ -365,7 +383,7 @@ class DecodedGenerator(Generator): # Helper -def _make_boundary(self, text=None): +def _make_boundary(text=None): # Craft a random boundary. If text is given, ensure that the chosen # boundary doesn't appear in the text. boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '==' diff --git a/Lib/email/Header.py b/Lib/email/Header.py new file mode 100644 index 0000000..097b978 --- /dev/null +++ b/Lib/email/Header.py @@ -0,0 +1,210 @@ +# Copyright (C) 2002 Python Software Foundation +# Author: che@debian.org (Ben Gertzfield) + +"""Header encoding and decoding functionality.""" + +import re +import email.quopriMIME +import email.base64MIME +from email.Charset import Charset + +CRLFSPACE = '\r\n ' +CRLF = '\r\n' +NLSPACE = '\n ' + +MAXLINELEN = 76 + +ENCODE = 1 +DECODE = 2 + +# Match encoded-word strings in the form =?charset?q?Hello_World?= +ecre = re.compile(r''' + =\? # literal =? + (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset + \? # literal ? + (?P<encoding>[qb]) # either a "q" or a "b", case insensitive + \? # literal ? + (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string + \?= # literal ?= + ''', re.VERBOSE | re.IGNORECASE) + + + +# Helpers +_max_append = email.quopriMIME._max_append + + + +def decode_header(header): + """Decode a message header value without converting charset. + + Returns a list of (decoded_string, charset) pairs containing each of the + decoded parts of the header. Charset is None for non-encoded parts of the + header, otherwise a lower-case string containing the name of the character + set specified in the encoded string. + """ + # If no encoding, just return the header + header = str(header) + if not ecre.search(header): + return [(header, None)] + + decoded = [] + dec = '' + for line in header.splitlines(): + # This line might not have an encoding in it + if not ecre.search(line): + decoded.append((line, None)) + continue + + parts = ecre.split(line) + while parts: + unenc = parts.pop(0).strip() + if unenc: + # Should we continue a long line? + if decoded and decoded[-1][1] is None: + decoded[-1] = (decoded[-1][0] + dec, None) + else: + decoded.append((unenc, None)) + if parts: + charset, encoding = [s.lower() for s in parts[0:2]] + encoded = parts[2] + dec = '' + if encoding == 'q': + dec = email.quopriMIME.header_decode(encoded) + elif encoding == 'b': + dec = email.base64MIME.decode(encoded) + else: + dec = encoded + + if decoded and decoded[-1][1] == charset: + decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1]) + else: + decoded.append((dec, charset)) + del parts[0:3] + return decoded + + + +class Header: + def __init__(self, s, charset=None, maxlinelen=MAXLINELEN, + header_name=None): + """Create a MIME-compliant header that can contain many languages. + + Specify the initial header value in s. Specify its character set as a + Charset object in the charset argument. If none, a default Charset + instance will be used. + + You can later append to the header with append(s, charset) below; + charset does not have to be the same as the one initially specified + here. In fact, it's optional, and if not given, defaults to the + charset specified in the constructor. + + The maximum line length can either be specified by maxlinelen, or you + can pass in the name of the header field (e.g. "Subject") to let this + class guess the best line length to use to prevent wrapping. The + default maxlinelen is 76. + """ + if charset is None: + charset = Charset() + self._charset = charset + # BAW: I believe `chunks' and `maxlinelen' should be non-public. + self._chunks = [] + self.append(s, charset) + self._maxlinelen = maxlinelen + if header_name is not None: + self.guess_maxlinelen(header_name) + + def __str__(self): + """A synonym for self.encode().""" + return self.encode() + + def guess_maxlinelen(self, s=None): + """Guess the maximum length to make each header line. + + Given a header name (e.g. "Subject"), set this header's maximum line + length to an appropriate length to avoid line wrapping. If s is not + given, return the previous maximum line length and don't set it. + + Returns the new maximum line length. + """ + # BAW: is this semantic necessary? + if s is not None: + self._maxlinelen = MAXLINELEN - len(s) - 2 + return self._maxlinelen + + def append(self, s, charset=None): + """Append string s with Charset charset to the MIME header. + + charset defaults to the one given in the class constructor. + """ + if charset is None: + charset = self._charset + self._chunks.append((s, charset)) + + def _split(self, s, charset): + # Split up a header safely for use with encode_chunks. BAW: this + # appears to be a private convenience method. + splittable = charset.to_splittable(s) + encoded = charset.from_splittable(splittable) + + if charset.encoded_header_len(encoded) < self._maxlinelen: + return [(encoded, charset)] + else: + # Divide and conquer. BAW: halfway depends on integer division. + # When porting to Python 2.2, use the // operator. + halfway = len(splittable) // 2 + first = charset.from_splittable(splittable[:halfway], 0) + last = charset.from_splittable(splittable[halfway:], 0) + return self._split(first, charset) + self._split(last, charset) + + def encode(self): + """Encode a message header, possibly converting charset and encoding. + + There are many issues involved in converting a given string for use in + an email header. Only certain character sets are readable in most + email clients, and as header strings can only contain a subset of + 7-bit ASCII, care must be taken to properly convert and encode (with + Base64 or quoted-printable) header strings. In addition, there is a + 75-character length limit on any given encoded header field, so + line-wrapping must be performed, even with double-byte character sets. + + This method will do its best to convert the string to the correct + character set used in email, and encode and line wrap it safely with + the appropriate scheme for that character set. + + If the given charset is not known or an error occurs during + conversion, this function will return the header untouched. + """ + newchunks = [] + for s, charset in self._chunks: + newchunks += self._split(s, charset) + self._chunks = newchunks + return self.encode_chunks() + + def encode_chunks(self): + """MIME-encode a header with many different charsets and/or encodings. + + Given a list of pairs (string, charset), return a MIME-encoded string + suitable for use in a header field. Each pair may have different + charsets and/or encodings, and the resulting header will accurately + reflect each setting. + + Each encoding can be email.Utils.QP (quoted-printable, for ASCII-like + character sets like iso-8859-1), email.Utils.BASE64 (Base64, for + non-ASCII like character sets like KOI8-R and iso-2022-jp), or None + (no encoding). + + Each pair will be represented on a separate line; the resulting string + will be in the format: + + "=?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n + =?charset2?b?SvxyZ2VuIEL2aW5n?=" + """ + chunks = [] + for header, charset in self._chunks: + if charset is None: + _max_append(chunks, header, self._maxlinelen, ' ') + else: + _max_append(chunks, charset.header_encode(header, 0), + self._maxlinelen, ' ') + return NLSPACE.join(chunks) diff --git a/Lib/email/Iterators.py b/Lib/email/Iterators.py index a64495d..515bac9 100644 --- a/Lib/email/Iterators.py +++ b/Lib/email/Iterators.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001 Python Software Foundation +# Copyright (C) 2001,2002 Python Software Foundation # Author: barry@zope.com (Barry Warsaw) """Various types of useful iterators and generators. diff --git a/Lib/email/MIMEBase.py b/Lib/email/MIMEBase.py index 33216f6..28816e8 100644 --- a/Lib/email/MIMEBase.py +++ b/Lib/email/MIMEBase.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001 Python Software Foundation +# Copyright (C) 2001,2002 Python Software Foundation # Author: barry@zope.com (Barry Warsaw) """Base class for MIME specializations. diff --git a/Lib/email/MIMEImage.py b/Lib/email/MIMEImage.py index 963da23..f0e7931a 100644 --- a/Lib/email/MIMEImage.py +++ b/Lib/email/MIMEImage.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001 Python Software Foundation +# Copyright (C) 2001,2002 Python Software Foundation # Author: barry@zope.com (Barry Warsaw) """Class representing image/* type MIME documents. diff --git a/Lib/email/MIMEMessage.py b/Lib/email/MIMEMessage.py index fc4b2c6..89da925 100644 --- a/Lib/email/MIMEMessage.py +++ b/Lib/email/MIMEMessage.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001 Python Software Foundation +# Copyright (C) 2001,2002 Python Software Foundation # Author: barry@zope.com (Barry Warsaw) """Class representing message/* MIME documents. diff --git a/Lib/email/MIMEText.py b/Lib/email/MIMEText.py index ccce9fb..8669d28 100644 --- a/Lib/email/MIMEText.py +++ b/Lib/email/MIMEText.py @@ -1,9 +1,10 @@ -# Copyright (C) 2001 Python Software Foundation +# Copyright (C) 2001,2002 Python Software Foundation # Author: barry@zope.com (Barry Warsaw) """Class representing text/* type MIME documents. """ +import warnings import MIMEBase from Encoders import encode_7or8bit @@ -13,7 +14,7 @@ class MIMEText(MIMEBase.MIMEBase): """Class for generating text/* type MIME documents.""" def __init__(self, _text, _subtype='plain', _charset='us-ascii', - _encoder=encode_7or8bit): + _encoder=None): """Create a text/* type MIME document. _text is the string for this message object. If the text does not end @@ -22,20 +23,26 @@ class MIMEText(MIMEBase.MIMEBase): _subtype is the MIME sub content type, defaulting to "plain". _charset is the character set parameter added to the Content-Type: - header. This defaults to "us-ascii". - - _encoder is a function which will perform the actual encoding for - transport of the text data. It takes one argument, which is this - Text instance. It should use get_payload() and set_payload() to - change the payload to the encoded form. It should also add any - Content-Transfer-Encoding: or other headers to the message as - necessary. The default encoding doesn't actually modify the payload, - but it does set Content-Transfer-Encoding: to either `7bit' or `8bit' - as appropriate. + header. This defaults to "us-ascii". Note that as a side-effect, the + Content-Transfer-Encoding: header will also be set. + + The use of the _encoder is deprecated. The encoding of the payload, + and the setting of the character set parameter now happens implicitly + based on the _charset argument. If _encoder is supplied, then a + DeprecationWarning is used, and the _encoder functionality may + override any header settings indicated by _charset. This is probably + not what you want. """ MIMEBase.MIMEBase.__init__(self, 'text', _subtype, **{'charset': _charset}) if _text and _text[-1] <> '\n': _text += '\n' - self.set_payload(_text) - _encoder(self) + self.set_payload(_text, _charset) + if _encoder is not None: + warnings.warn('_encoder argument is obsolete.', + DeprecationWarning, 2) + # Because set_payload() with a _charset will set its own + # Content-Transfer-Encoding: header, we need to delete the + # existing one or will end up with two of them. :( + del self['content-transfer-encoding'] + _encoder(self) diff --git a/Lib/email/Message.py b/Lib/email/Message.py index 91931a1..71d10c4 100644 --- a/Lib/email/Message.py +++ b/Lib/email/Message.py @@ -1,23 +1,47 @@ -# Copyright (C) 2001 Python Software Foundation +# Copyright (C) 2001,2002 Python Software Foundation # Author: barry@zope.com (Barry Warsaw) """Basic message object for the email package object model. """ -from __future__ import generators - import re -import base64 -import quopri +import warnings from cStringIO import StringIO -from types import ListType +from types import ListType, StringType # Intrapackage imports import Errors import Utils +import Charset SEMISPACE = '; ' + +# Regular expression used to split header parameters. BAW: this may be too +# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches +# most headers found in the wild. We may eventually need a full fledged +# parser eventually. paramre = re.compile(r'\s*;\s*') +# Regular expression that matches `special' characters in parameters, the +# existance of which force quoting of the parameter value. +tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') + + + +# Helper function +def _formatparam(param, value=None, quote=1): + """Convenience function to format and return a key=value pair. + + Will quote the value if needed or if quote is true. + """ + if value is not None and len(value) > 0: + # BAW: Please check this. I think that if quote is set it should + # force quoting even if not necessary. + if quote or tspecials.search(value): + return '%s="%s"' % (param, Utils.quote(value)) + else: + return '%s=%s' % (param, value) + else: + return param @@ -39,6 +63,7 @@ class Message: self._headers = [] self._unixfrom = None self._payload = None + self._charset = None # Defaults for multipart messages self.preamble = self.epilogue = None @@ -83,6 +108,8 @@ class Message: If the current payload is empty, then the current payload will be made a scalar, set to the given value. """ + warnings.warn('add_payload() is deprecated, use attach() instead.', + DeprecationWarning, 2) if self._payload is None: self._payload = payload elif type(self._payload) is ListType: @@ -93,8 +120,18 @@ class Message: else: self._payload = [self._payload, payload] - # A useful synonym - attach = add_payload + def attach(self, payload): + """Add the given payload to the current payload. + + The current payload will always be a list of objects after this method + is called. If you want to set the payload to a scalar object + (e.g. because you're attaching a message/rfc822 subpart), use + set_payload() instead. + """ + if self._payload is None: + self._payload = [payload] + else: + self._payload.append(payload) def get_payload(self, i=None, decode=0): """Return the current payload exactly as is. @@ -128,10 +165,58 @@ class Message: return payload - def set_payload(self, payload): - """Set the payload to the given value.""" + def set_payload(self, payload, charset=None): + """Set the payload to the given value. + + Optionally set the charset, which must be a Charset instance.""" self._payload = payload + if charset is not None: + self.set_charset(charset) + + def set_charset(self, charset): + """Set the charset of the payload to a given character set. + + charset can be a string or a Charset object. If it is a string, it + will be converted to a Charset object by calling Charset's + constructor. If charset is None, the charset parameter will be + removed from the Content-Type: field. Anything else will generate a + TypeError. + + The message will be assumed to be a text message encoded with + charset.input_charset. It will be converted to charset.output_charset + and encoded properly, if needed, when generating the plain text + representation of the message. MIME headers (MIME-Version, + Content-Type, Content-Transfer-Encoding) will be added as needed. + """ + if charset is None: + self.del_param('charset') + self._charset = None + return + if isinstance(charset, StringType): + charset = Charset.Charset(charset) + if not isinstance(charset, Charset.Charset): + raise TypeError, charset + # BAW: should we accept strings that can serve as arguments to the + # Charset constructor? + self._charset = charset + if not self.has_key('MIME-Version'): + self.add_header('MIME-Version', '1.0') + if not self.has_key('Content-Type'): + self.add_header('Content-Type', 'text/plain', + charset=charset.get_output_charset()) + else: + self.set_param('charset', charset.get_output_charset()) + if not self.has_key('Content-Transfer-Encoding'): + cte = charset.get_body_encoding() + if callable(cte): + cte(self) + else: + self.add_header('Content-Transfer-Encoding', cte) + def get_charset(self): + """Return the Charset object associated with the message's payload.""" + return self._charset + # # MAPPING INTERFACE (partial) # @@ -257,7 +342,7 @@ class Message: if v is None: parts.append(k.replace('_', '-')) else: - parts.append('%s="%s"' % (k.replace('_', '-'), v)) + parts.append(_formatparam(k.replace('_', '-'), v)) if _value is not None: parts.insert(0, _value) self._headers.append((_name, SEMISPACE.join(parts))) @@ -308,6 +393,8 @@ class Message: for p in paramre.split(value): try: name, val = p.split('=', 1) + name = name.rstrip() + val = val.lstrip() except ValueError: # Must have been a bare attribute name = p @@ -315,26 +402,29 @@ class Message: params.append((name, val)) return params - def get_params(self, failobj=None, header='content-type'): + def get_params(self, failobj=None, header='content-type', unquote=1): """Return the message's Content-Type: parameters, as a list. The elements of the returned list are 2-tuples of key/value pairs, as split on the `=' sign. The left hand side of the `=' is the key, while the right hand side is the value. If there is no `=' sign in the parameter the value is the empty string. The value is always - unquoted. + unquoted, unless unquote is set to a false value. Optional failobj is the object to return if there is no Content-Type: header. Optional header is the header to search instead of - Content-Type: + Content-Type:. """ missing = [] params = self._get_params_preserve(missing, header) if params is missing: return failobj - return [(k, Utils.unquote(v)) for k, v in params] + if unquote: + return [(k, Utils.unquote(v)) for k, v in params] + else: + return params - def get_param(self, param, failobj=None, header='content-type'): + def get_param(self, param, failobj=None, header='content-type', unquote=1): """Return the parameter value if found in the Content-Type: header. Optional failobj is the object to return if there is no Content-Type: @@ -342,15 +432,112 @@ class Message: Content-Type: Parameter keys are always compared case insensitively. Values are - always unquoted. + always unquoted, unless unquote is set to a false value. """ if not self.has_key(header): return failobj for k, v in self._get_params_preserve(failobj, header): if k.lower() == param.lower(): - return Utils.unquote(v) + if unquote: + return Utils.unquote(v) + else: + return v return failobj + def set_param(self, param, value, header='Content-Type', requote=1): + """Set a parameter in the Content-Type: header. + + If the parameter already exists in the header, its value will be + replaced with the new value. + + If header is Content-Type: and has not yet been defined in this + message, it will be set to "text/plain" and the new parameter and + value will be appended, as per RFC 2045. + + An alternate header can specified in the header argument, and + all parameters will be quoted as appropriate unless requote is + set to a false value. + """ + if not self.has_key(header) and header.lower() == 'content-type': + ctype = 'text/plain' + else: + ctype = self.get(header) + if not self.get_param(param, header=header): + if not ctype: + ctype = _formatparam(param, value, requote) + else: + ctype = SEMISPACE.join( + [ctype, _formatparam(param, value, requote)]) + else: + ctype = '' + for old_param, old_value in self.get_params(header=header, + unquote=requote): + append_param = '' + if old_param.lower() == param.lower(): + append_param = _formatparam(param, value, requote) + else: + append_param = _formatparam(old_param, old_value, requote) + if not ctype: + ctype = append_param + else: + ctype = SEMISPACE.join([ctype, append_param]) + if ctype <> self.get(header): + del self[header] + self[header] = ctype + + def del_param(self, param, header='content-type', requote=1): + """Remove the given parameter completely from the Content-Type header. + + The header will be re-written in place without param or its value. + All values will be quoted as appropriate unless requote is set to a + false value. + """ + if not self.has_key(header): + return + new_ctype = '' + for p, v in self.get_params(header, unquote=requote): + if p.lower() <> param.lower(): + if not new_ctype: + new_ctype = _formatparam(p, v, requote) + else: + new_ctype = SEMISPACE.join([new_ctype, + _formatparam(p, v, requote)]) + if new_ctype <> self.get(header): + del self[header] + self[header] = new_ctype + + def set_type(self, type, header='Content-Type', requote=1): + """Set the main type and subtype for the Content-Type: header. + + type must be a string in the form "maintype/subtype", otherwise a + ValueError is raised. + + This method replaces the Content-Type: header, keeping all the + parameters in place. If requote is false, this leaves the existing + header's quoting as is. Otherwise, the parameters will be quoted (the + default). + + An alternate header can be specified in the header argument. When the + Content-Type: header is set, we'll always also add a MIME-Version: + header. + """ + # BAW: should we be strict? + if not type.count('/') == 1: + raise ValueError + # Set the Content-Type: you get a MIME-Version: + if header.lower() == 'content-type': + del self['mime-version'] + self['MIME-Version'] = '1.0' + if not self.has_key(header): + self[header] = type + return + params = self.get_params(header, unquote=requote) + del self[header] + self[header] = type + # Skip the first param; it's the old type. + for p, v in params[1:]: + self.set_param(p, v, header, requote) + def get_filename(self, failobj=None): """Return the filename associated with the payload if present. diff --git a/Lib/email/Parser.py b/Lib/email/Parser.py index 2f131d6..7177dfc 100644 --- a/Lib/email/Parser.py +++ b/Lib/email/Parser.py @@ -51,9 +51,16 @@ class Parser: lastvalue = [] lineno = 0 while 1: - line = fp.readline()[:-1] - if not line or not line.strip(): + # Don't strip the line before we test for the end condition, + # because whitespace-only header lines are RFC compliant + # continuation lines. + line = fp.readline() + if not line: break + line = line.splitlines()[0] + if not line: + break + # Ignore the trailing newline lineno += 1 # Check for initial Unix From_ line if line.startswith('From '): @@ -63,7 +70,6 @@ class Parser: else: raise Errors.HeaderParseError( 'Unix-from in headers after first rfc822 header') - # # Header continuation line if line[0] in ' \t': if not lastheader: @@ -134,11 +140,11 @@ class Parser: msgobj = self.parsestr(part) container.preamble = preamble container.epilogue = epilogue - # Ensure that the container's payload is a list - if not isinstance(container.get_payload(), ListType): - container.set_payload([msgobj]) - else: - container.add_payload(msgobj) + container.attach(msgobj) + elif container.get_main_type() == 'multipart': + # Very bad. A message is a multipart with no boundary! + raise Errors.BoundaryError( + 'multipart message with no defined boundary') elif container.get_type() == 'message/delivery-status': # This special kind of type contains blocks of headers separated # by a blank line. We'll represent each header block as a @@ -160,9 +166,9 @@ class Parser: except Errors.HeaderParseError: msg = self._class() self._parsebody(msg, fp) - container.add_payload(msg) + container.set_payload(msg) else: - container.add_payload(fp.read()) + container.set_payload(fp.read()) diff --git a/Lib/email/Utils.py b/Lib/email/Utils.py index 3d48287..887be55 100644 --- a/Lib/email/Utils.py +++ b/Lib/email/Utils.py @@ -1,16 +1,26 @@ -# Copyright (C) 2001 Python Software Foundation +# Copyright (C) 2001,2002 Python Software Foundation # Author: barry@zope.com (Barry Warsaw) """Miscellaneous utilities. """ import time +import socket import re +import random +import os +import warnings +from cStringIO import StringIO +from types import ListType -from rfc822 import unquote, quote, parseaddr -from rfc822 import dump_address_pair +from rfc822 import unquote, quote from rfc822 import AddrlistClass as _AddrlistClass -from rfc822 import parsedate_tz, parsedate, mktime_tz +from rfc822 import mktime_tz + +# We need wormarounds for bugs in these methods in older Pythons (see below) +from rfc822 import parsedate as _parsedate +from rfc822 import parsedate_tz as _parsedate_tz +from rfc822 import parseaddr as _parseaddr from quopri import decodestring as _qdecode import base64 @@ -20,6 +30,10 @@ from Encoders import _bencode, _qencode COMMASPACE = ', ' UEMPTYSTRING = u'' +CRLF = '\r\n' + +specialsre = re.compile(r'[][\()<>@,:;".]') +escapesre = re.compile(r'[][\()"]') @@ -44,6 +58,41 @@ def _bdecode(s): +def fix_eols(s): + """Replace all line-ending characters with \r\n.""" + # Fix newlines with no preceding carriage return + s = re.sub(r'(?<!\r)\n', CRLF, s) + # Fix carriage returns with no following newline + s = re.sub(r'\r(?!\n)', CRLF, s) + return s + + + +def formataddr(pair): + """The inverse of parseaddr(), this takes a 2-tuple of the form + (realname, email_address) and returns the string value suitable + for an RFC 2822 From:, To: or Cc:. + + If the first element of pair is false, then the second element is + returned unmodified. + """ + name, address = pair + if name: + quotes = '' + if specialsre.search(name): + quotes = '"' + name = escapesre.sub(r'\\\g<0>', name) + return '%s%s%s <%s>' % (quotes, name, quotes, address) + return address + +# For backwards compatibility +def dump_address_pair(pair): + warnings.warn('Use email.Utils.formataddr() instead', + DeprecationWarning, 2) + return formataddr(pair) + + + def getaddresses(fieldvalues): """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" all = COMMASPACE.join(fieldvalues) @@ -64,30 +113,26 @@ ecre = re.compile(r''' def decode(s): - """Return a decoded string according to RFC 2047, as a unicode string.""" + """Return a decoded string according to RFC 2047, as a unicode string. + + NOTE: This function is deprecated. Use Header.decode_header() instead. + """ + warnings.warn('Use Header.decode_header() instead.', DeprecationWarning, 2) + # Intra-package import here to avoid circular import problems. + from Header import decode_header + L = decode_header(s) + if not isinstance(L, ListType): + # s wasn't decoded + return s + rtn = [] - parts = ecre.split(s, 1) - while parts: - # If there are less than 4 parts, it can't be encoded and we're done - if len(parts) < 5: - rtn.extend(parts) - break - # The first element is any non-encoded leading text - rtn.append(parts[0]) - charset = parts[1] - encoding = parts[2].lower() - atom = parts[3] - # The next chunk to decode should be in parts[4] - parts = ecre.split(parts[4]) - # The encoding must be either `q' or `b', case-insensitive - if encoding == 'q': - func = _qdecode - elif encoding == 'b': - func = _bdecode + for atom, charset in L: + if charset is None: + rtn.append(atom) else: - func = _identity - # Decode and get the unicode in the charset - rtn.append(unicode(func(atom), charset)) + # Convert the string to Unicode using the given encoding. Leave + # Unicode conversion errors to strict. + rtn.append(unicode(atom, charset)) # Now that we've decoded everything, we just need to join all the parts # together into the final string. return UEMPTYSTRING.join(rtn) @@ -96,6 +141,7 @@ def decode(s): def encode(s, charset='iso-8859-1', encoding='q'): """Encode a string according to RFC 2047.""" + warnings.warn('Use Header.Header.encode() instead.', DeprecationWarning, 2) encoding = encoding.lower() if encoding == 'q': estr = _qencode(s) @@ -150,3 +196,48 @@ def formatdate(timeval=None, localtime=0): 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1], now[0], now[3], now[4], now[5], zone) + + + +def make_msgid(idstring=None): + """Returns a string suitable for RFC 2822 compliant Message-ID:, e.g: + + <20020201195627.33539.96671@nightshade.la.mastaler.com> + + Optional idstring if given is a string used to strengthen the + uniqueness of the Message-ID, otherwise an empty string is used. + """ + timeval = time.time() + utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval)) + pid = os.getpid() + randint = random.randrange(100000) + if idstring is None: + idstring = '' + else: + idstring = '.' + idstring + idhost = socket.getfqdn() + msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost) + return msgid + + + +# These functions are in the standalone mimelib version only because they've +# subsequently been fixed in the latest Python versions. We use this to worm +# around broken older Pythons. +def parsedate(data): + if not data: + return None + return _parsedate(data) + + +def parsedate_tz(data): + if not data: + return None + return _parsedate_tz(data) + + +def parseaddr(addr): + realname, emailaddr = _parseaddr(addr) + if realname == '' and emailaddr is None: + return '', '' + return realname, emailaddr diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py index c13495b..f4a5b76 100644 --- a/Lib/email/__init__.py +++ b/Lib/email/__init__.py @@ -1,14 +1,16 @@ -# Copyright (C) 2001 Python Software Foundation +# Copyright (C) 2001,2002 Python Software Foundation # Author: barry@zope.com (Barry Warsaw) """A package for parsing, handling, and generating email messages. """ -__version__ = '1.0' +__version__ = '2.0' -__all__ = ['Encoders', +__all__ = ['Charset', + 'Encoders', 'Errors', 'Generator', + 'Header', 'Iterators', 'MIMEAudio', 'MIMEBase', @@ -18,6 +20,8 @@ __all__ = ['Encoders', 'Message', 'Parser', 'Utils', + 'base64MIME', + 'quopriMIME', 'message_from_string', 'message_from_file', ] diff --git a/Lib/email/base64MIME.py b/Lib/email/base64MIME.py new file mode 100644 index 0000000..08420b2 --- /dev/null +++ b/Lib/email/base64MIME.py @@ -0,0 +1,174 @@ +# Copyright (C) 2002 Python Software Foundation +# Author: che@debian.org (Ben Gertzfield) + +"""Base64 content transfer encoding per RFCs 2045-2047. + +This module handles the content transfer encoding method defined in RFC 2045 +to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit +characters encoding known as Base64. + +It is used in the MIME standards for email to attach images, audio, and text +using some 8-bit character sets to messages. + +This module provides an interface to encode and decode both headers and bodies +with Base64 encoding. + +RFC 2045 defines a method for including character set information in an +`encoded-word' in a header. This method is commonly used for 8-bit real names +in To:, From:, Cc:, etc. fields, as well as Subject: lines. + +This module does not do the line wrapping or end-of-line character conversion +necessary for proper internationalized headers; it only does dumb encoding and +decoding. To deal with the various line wrapping issues, use the email.Header +module. +""" + +import re +from binascii import b2a_base64, a2b_base64 +from email.Utils import fix_eols + +CRLF = '\r\n' +NL = '\n' +EMPTYSTRING = '' + +# See also Charset.py +MISC_LEN = 7 + + + +# Helpers +def base64_len(s): + """Return the length of s when it is encoded with base64.""" + groups_of_3, leftover = divmod(len(s), 3) + # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. + # Thanks, Tim! + n = groups_of_3 * 4 + if leftover: + n += 4 + return n + + + +def header_encode(header, charset='iso-8859-1', keep_eols=0, maxlinelen=76, + eol=NL): + """Encode a single header line with Base64 encoding in a given charset. + + Defined in RFC 2045, this Base64 encoding is identical to normal Base64 + encoding, except that each line must be intelligently wrapped (respecting + the Base64 encoding), and subsequent lines must start with a space. + + charset names the character set to use to encode the header. It defaults + to iso-8859-1. + + End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted + to the canonical email line separator \\r\\n unless the keep_eols + parameter is set to true (the default is false). + + Each line of the header will be terminated in the value of eol, which + defaults to "\\n". Set this to "\\r\\n" if you are using the result of + this function directly in email. + + The resulting string will be in the form: + + "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n + =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?=" + + with each line wrapped at, at most, maxlinelen characters (defaults to 76 + characters). + """ + # Return empty headers unchanged + if not header: + return header + + if not keep_eols: + header = fix_eols(header) + + # Base64 encode each line, in encoded chunks no greater than maxlinelen in + # length, after the RFC chrome is added in. + base64ed = [] + max_encoded = maxlinelen - len(charset) - MISC_LEN + max_unencoded = max_encoded * 3 / 4 + + # BAW: Ben's original code used a step of max_unencoded, but I think it + # ought to be max_encoded. Otherwise, where's max_encoded used? I'm + # still not sure what the + for i in range(0, len(header), max_unencoded): + base64ed.append(b2a_base64(header[i:i+max_unencoded])) + + # Now add the RFC chrome to each encoded chunk + lines = [] + for line in base64ed: + # Ignore the last character of each line if it is a newline + if line[-1] == NL: + line = line[:-1] + # Add the chrome + lines.append('=?%s?b?%s?=' % (charset, line)) + # Glue the lines together and return it. BAW: should we be able to + # specify the leading whitespace in the joiner? + joiner = eol + ' ' + return joiner.join(lines) + + + +def encode(s, binary=1, maxlinelen=76, eol=NL): + """Encode a string with base64. + + Each line will be wrapped at, at most, maxlinelen characters (defaults to + 76 characters). + + If binary is false, end-of-line characters will be converted to the + canonical email end-of-line sequence \\r\\n. Otherwise they will be left + verbatim (this is the default). + + Each line of encoded text will end with eol, which defaults to "\\n". Set + this to "\r\n" if you will be using the result of this function directly + in an email. + """ + if not s: + return s + + if not binary: + s = fix_eols(s) + + encvec = [] + max_unencoded = maxlinelen * 3 / 4 + for i in range(0, len(s), max_unencoded): + # BAW: should encode() inherit b2a_base64()'s dubious behavior in + # adding a newline to the encoded string? + enc = b2a_base64(s[i:i + max_unencoded]) + if enc[-1] == NL and eol <> NL: + enc = enc[:-1] + eol + encvec.append(enc) + return EMPTYSTRING.join(encvec) + + +# For convenience and backwards compatibility w/ standard base64 module +body_encode = encode +encodestring = encode + + + +def decode(s, convert_eols=None): + """Decode a raw base64 string. + + If convert_eols is set to a string value, all canonical email linefeeds, + e.g. "\\r\\n", in the decoded text will be converted to the value of + convert_eols. os.linesep is a good choice for convert_eols if you are + decoding a text attachment. + + This function does not parse a full MIME header value encoded with + base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high + level email.Header class for that functionality. + """ + if not s: + return s + + dec = a2b_base64(s) + if convert_eols: + return dec.replace(CRLF, convert_eols) + return dec + + +# For convenience and backwards compatibility w/ standard base64 module +body_decode = decode +decodestring = decode diff --git a/Lib/email/quopriMIME.py b/Lib/email/quopriMIME.py new file mode 100644 index 0000000..002034e --- /dev/null +++ b/Lib/email/quopriMIME.py @@ -0,0 +1,312 @@ +# Copyright (C) 2001,2002 Python Software Foundation +# Author: che@debian.org (Ben Gertzfield) + +"""Quoted-printable content transfer encoding per RFCs 2045-2047. + +This module handles the content transfer encoding method defined in RFC 2045 +to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to +safely encode text that is in a character set similar to the 7-bit US ASCII +character set, but that includes some 8-bit characters that are normally not +allowed in email bodies or headers. + +Quoted-printable is very space-inefficient for encoding binary files; use the +email.base64MIME module for that instead. + +This module provides an interface to encode and decode both headers and bodies +with quoted-printable encoding. + +RFC 2045 defines a method for including character set information in an +`encoded-word' in a header. This method is commonly used for 8-bit real names +in To:/From:/Cc: etc. fields, as well as Subject: lines. + +This module does not do the line wrapping or end-of-line character +conversion necessary for proper internationalized headers; it only +does dumb encoding and decoding. To deal with the various line +wrapping issues, use the email.Header module. +""" + +import re +from string import hexdigits +from email.Utils import fix_eols + +CRLF = '\r\n' +NL = '\n' + +# See also Charset.py +MISC_LEN = 7 + +hqre = re.compile(r'[^-a-zA-Z0-9!*+/ ]') +bqre = re.compile(r'[^ !-<>-~\t]') + + + +# Helpers +def header_quopri_check(c): + """Return true if the character should be escaped with header quopri.""" + return hqre.match(c) and 1 + + +def body_quopri_check(c): + """Return true if the character should be escaped with body quopri.""" + return bqre.match(c) and 1 + + +def header_quopri_len(s): + """Return the length of str when it is encoded with header quopri.""" + count = 0 + for c in s: + if hqre.match(c): + count += 3 + else: + count += 1 + return count + + +def body_quopri_len(str): + """Return the length of str when it is encoded with body quopri.""" + count = 0 + for c in str: + if bqre.match(c): + count += 3 + else: + count += 1 + return count + + +def _max_append(L, s, maxlen, extra=''): + if not L: + L.append(s) + elif len(L[-1]) + len(s) < maxlen: + L[-1] += extra + s + else: + L.append(s) + + +def unquote(s): + """Turn a string in the form =AB to the ASCII character with value 0xab""" + return chr(int(s[1:3], 16)) + + +def quote(c): + return "=%02X" % ord(c) + + + +def header_encode(header, charset="iso-8859-1", keep_eols=0, maxlinelen=76, + eol=NL): + """Encode a single header line with quoted-printable (like) encoding. + + Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but + used specifically for email header fields to allow charsets with mostly 7 + bit characters (and some 8 bit) to remain more or less readable in non-RFC + 2045 aware mail clients. + + charset names the character set to use to encode the header. It defaults + to iso-8859-1. + + The resulting string will be in the form: + + "=?charset?q?I_f=E2rt_in_your_g=E8n=E8ral_dire=E7tion?\\n + =?charset?q?Silly_=C8nglish_Kn=EEghts?=" + + with each line wrapped safely at, at most, maxlinelen characters (defaults + to 76 characters). + + End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted + to the canonical email line separator \\r\\n unless the keep_eols + parameter is set to true (the default is false). + + Each line of the header will be terminated in the value of eol, which + defaults to "\\n". Set this to "\\r\\n" if you are using the result of + this function directly in email. + """ + # Return empty headers unchanged + if not header: + return header + + if not keep_eols: + header = fix_eols(header) + + # Quopri encode each line, in encoded chunks no greater than maxlinelen in + # lenght, after the RFC chrome is added in. + quoted = [] + max_encoded = maxlinelen - len(charset) - MISC_LEN + + for c in header: + # Space may be represented as _ instead of =20 for readability + if c == ' ': + _max_append(quoted, '_', max_encoded) + # These characters can be included verbatim + elif not hqre.match(c): + _max_append(quoted, c, max_encoded) + # Otherwise, replace with hex value like =E2 + else: + _max_append(quoted, "=%02X" % ord(c), max_encoded) + + # Now add the RFC chrome to each encoded chunk and glue the chunks + # together. BAW: should we be able to specify the leading whitespace in + # the joiner? + joiner = eol + ' ' + return joiner.join(['=?%s?q?%s?=' % (charset, line) for line in quoted]) + + + +def encode(body, binary=0, maxlinelen=76, eol=NL): + """Encode with quoted-printable, wrapping at maxlinelen characters. + + If binary is false (the default), end-of-line characters will be converted + to the canonical email end-of-line sequence \\r\\n. Otherwise they will + be left verbatim. + + Each line of encoded text will end with eol, which defaults to "\\n". Set + this to "\\r\\n" if you will be using the result of this function directly + in an email. + + Each line will be wrapped at, at most, maxlinelen characters (defaults to + 76 characters). Long lines will have the `soft linefeed' quoted-printable + character "=" appended to them, so the decoded text will be identical to + the original text. + """ + if not body: + return body + + if not binary: + body = fix_eols(body) + + # BAW: We're accumulating the body text by string concatenation. That + # can't be very efficient, but I don't have time now to rewrite it. It + # just feels like this algorithm could be more efficient. + encoded_body = '' + lineno = -1 + # Preserve line endings here so we can check later to see an eol needs to + # be added to the output later. + lines = body.splitlines(1) + for line in lines: + # But strip off line-endings for processing this line. + if line.endswith(CRLF): + line = line[:-2] + elif line[-1] in CRLF: + line = line[:-1] + + lineno += 1 + encoded_line = '' + prev = None + linelen = len(line) + # Now we need to examine every character to see if it needs to be + # quopri encoded. BAW: again, string concatenation is inefficient. + for j in range(linelen): + c = line[j] + prev = c + if bqre.match(c): + c = quote(c) + elif j+1 == linelen: + # Check for whitespace at end of line; special case + if c not in ' \t': + encoded_line += c + prev = c + continue + # Check to see to see if the line has reached its maximum length + if len(encoded_line) + len(c) >= maxlinelen: + encoded_body += encoded_line + '=' + eol + encoded_line = '' + encoded_line += c + # Now at end of line.. + if prev and prev in ' \t': + # Special case for whitespace at end of file + if lineno+1 == len(lines): + prev = quote(prev) + if len(encoded_line) + len(prev) > maxlinelen: + encoded_body += encoded_line + '=' + eol + prev + else: + encoded_body += encoded_line + prev + # Just normal whitespace at end of line + else: + encoded_body += encoded_line + prev + '=' + eol + encoded_line = '' + # Now look at the line we just finished and it has a line ending, we + # need to add eol to the end of the line. + if lines[lineno].endswith(CRLF) or lines[lineno][-1] in CRLF: + encoded_body += encoded_line + eol + else: + encoded_body += encoded_line + encoded_line = '' + return encoded_body + + +# For convenience and backwards compatibility w/ standard base64 module +body_encode = encode +encodestring = encode + + + +# BAW: I'm not sure if the intent was for the signature of this function to be +# the same as base64MIME.decode() or not... +def decode(encoded, eol=NL): + """Decode a quoted-printable string. + + Lines are separated with eol, which defaults to \\n. + """ + if not encoded: + return encoded + # BAW: see comment in encode() above. Again, we're building up the + # decoded string with string concatenation, which could be done much more + # efficiently. + decoded = '' + + for line in encoded.splitlines(): + line = line.rstrip() + if not line: + decoded += eol + continue + + i = 0 + n = len(line) + while i < n: + c = line[i] + if c <> '=': + decoded += c + i += 1 + # Otherwise, c == "=". Are we at the end of the line? If so, add + # a soft line break. + elif i+1 == n: + i += 1 + continue + # Decode if in form =AB + elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits: + decoded += unquote(line[i:i+3]) + i += 3 + # Otherwise, not in form =AB, pass literally + else: + decoded += c + i += 1 + + if i == n: + decoded += eol + # Special case if original string did not end with eol + if encoded[-1] <> eol and decoded[-1] == eol: + decoded = decoded[:-1] + return decoded + + +# For convenience and backwards compatibility w/ standard base64 module +body_decode = decode +decodestring = decode + + + +def _unquote_match(match): + """Turn a match in the form =AB to the ASCII character with value 0xab""" + s = match.group(0) + return unquote(s) + + +# Header decoding is done a bit differently +def header_decode(s): + """Decode a string encoded with RFC 2045 MIME header `Q' encoding. + + This function does not parse a full MIME header value encoded with + quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use + the high level email.Header class for that functionality. + """ + s = s.replace('_', ' ') + return re.sub(r'=\w{2}', _unquote_match, s) diff --git a/Lib/test/data/msg_24.txt b/Lib/test/data/msg_24.txt new file mode 100644 index 0000000..4e52339 --- /dev/null +++ b/Lib/test/data/msg_24.txt @@ -0,0 +1,10 @@ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY + + +--BOUNDARY-- diff --git a/Lib/test/data/msg_25.txt b/Lib/test/data/msg_25.txt new file mode 100644 index 0000000..9e35275 --- /dev/null +++ b/Lib/test/data/msg_25.txt @@ -0,0 +1,117 @@ +From MAILER-DAEMON Fri Apr 06 16:46:09 2001 +Received: from [204.245.199.98] (helo=zinfandel.lacita.com) + by www.linux.org.uk with esmtp (Exim 3.13 #1) + id 14lYR6-0008Iv-00 + for linuxuser-admin@www.linux.org.uk; Fri, 06 Apr 2001 16:46:09 +0100 +Received: from localhost (localhost) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with internal id JAB03225; Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800) +Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800) +From: Mail Delivery Subsystem <MAILER-DAEMON@zinfandel.lacita.com> +Subject: Returned mail: Too many hops 19 (17 max): from <linuxuser-admin@www.linux.org.uk> via [199.164.235.226], to <scoffman@wellpartner.com> +Message-Id: <200104061723.JAB03225@zinfandel.lacita.com> +To: <linuxuser-admin@www.linux.org.uk> +To: postmaster@zinfandel.lacita.com +MIME-Version: 1.0 +Content-Type: multipart/report; report-type=delivery-status; + bo +Auto-Submitted: auto-generated (failure) + +This is a MIME-encapsulated message + +--JAB03225.986577786/zinfandel.lacita.com + +The original message was received at Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800) +from [199.164.235.226] + + ----- The following addresses have delivery notifications ----- +<scoffman@wellpartner.com> (unrecoverable error) + + ----- Transcript of session follows ----- +554 Too many hops 19 (17 max): from <linuxuser-admin@www.linux.org.uk> via [199.164.235.226], to <scoffman@wellpartner.com> + +--JAB03225.986577786/zinfandel.lacita.com +Content-Type: message/delivery-status + +Reporting-MTA: dns; zinfandel.lacita.com +Received-From-MTA: dns; [199.164.235.226] +Arrival-Date: Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800) + +Final-Recipient: rfc822; scoffman@wellpartner.com +Action: failed +Status: 5.4.6 +Last-Attempt-Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800) + +--JAB03225.986577786/zinfandel.lacita.com +Content-Type: text/rfc822-headers + +Return-Path: linuxuser-admin@www.linux.org.uk +Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03225 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800) +Received: from zinfandel.lacita.com ([204.245.199.98]) + by + fo +Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03221 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:22:18 -0800 (GMT-0800) +Received: from zinfandel.lacita.com ([204.245.199.98]) + by + fo +Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03217 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:21:37 -0800 (GMT-0800) +Received: from zinfandel.lacita.com ([204.245.199.98]) + by + fo +Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03213 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:20:56 -0800 (GMT-0800) +Received: from zinfandel.lacita.com ([204.245.199.98]) + by + fo +Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03209 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:20:15 -0800 (GMT-0800) +Received: from zinfandel.lacita.com ([204.245.199.98]) + by + fo +Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03205 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:19:33 -0800 (GMT-0800) +Received: from zinfandel.lacita.com ([204.245.199.98]) + by + fo +Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03201 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:18:52 -0800 (GMT-0800) +Received: from zinfandel.lacita.com ([204.245.199.98]) + by + fo +Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03197 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:17:54 -0800 (GMT-0800) +Received: from www.linux.org.uk (parcelfarce.linux.theplanet.co.uk [195.92.249.252]) + by + fo +Received: from localhost.localdomain + ([ + by + id +Received: from [212.1.130.11] (helo=s1.uklinux.net ident=root) + by + id + fo +Received: from server (ppp-2-22.cvx4.telinco.net [212.1.149.22]) + by + fo +From: Daniel James <daniel@linuxuser.co.uk> +Organization: LinuxUser +To: linuxuser@www.linux.org.uk +X-Mailer: KMail [version 1.1.99] +Content-Type: text/plain; + c +MIME-Version: 1.0 +Message-Id: <01040616033903.00962@server> +Content-Transfer-Encoding: 8bit +Subject: [LinuxUser] bulletin no. 45 +Sender: linuxuser-admin@www.linux.org.uk +Errors-To: linuxuser-admin@www.linux.org.uk +X-BeenThere: linuxuser@www.linux.org.uk +X-Mailman-Version: 2.0.3 +Precedence: bulk +List-Help: <mailto:linuxuser-request@www.linux.org.uk?subject=help> +List-Post: <mailto:linuxuser@www.linux.org.uk> +List-Subscribe: <http://www.linux.org.uk/mailman/listinfo/linuxuser>, + <m +List-Id: bulletins from LinuxUser magazine <linuxuser.www.linux.org.uk> +List-Unsubscribe: <http://www.linux.org.uk/mailman/listinfo/linuxuser>, + <m +List-Archive: <http://www.linux.org.uk/pipermail/linuxuser/> +Date: Fri, 6 Apr 2001 16:03:39 +0100 + +--JAB03225.986577786/zinfandel.lacita.com-- + + diff --git a/Lib/test/test_email.py b/Lib/test/test_email.py index 7105f7d..1322246 100644 --- a/Lib/test/test_email.py +++ b/Lib/test/test_email.py @@ -1,15 +1,19 @@ # Copyright (C) 2001,2002 Python Software Foundation # email package unit tests +import sys import os import time import unittest import base64 from cStringIO import StringIO from types import StringType +import warnings import email +from email.Charset import Charset +from email.Header import Header, decode_header from email.Parser import Parser, HeaderParser from email.Generator import Generator, DecodedGenerator from email.Message import Message @@ -22,14 +26,18 @@ from email import Utils from email import Errors from email import Encoders from email import Iterators +from email import base64MIME +from email import quopriMIME from test_support import findfile, __file__ as test_support_file - NL = '\n' EMPTYSTRING = '' SPACE = ' ' +# We don't care about DeprecationWarnings +warnings.filterwarnings('ignore', '', DeprecationWarning, __name__) + def openfile(filename): @@ -41,7 +49,7 @@ def openfile(filename): # Base test class class TestEmailBase(unittest.TestCase): def _msgobj(self, filename): - fp = openfile(filename) + fp = openfile(findfile(filename)) try: msg = email.message_from_file(fp) finally: @@ -58,6 +66,45 @@ class TestMessageAPI(TestEmailBase): eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org']) eq(msg.get_all('xx', 'n/a'), 'n/a') + def test_getset_charset(self): + eq = self.assertEqual + msg = Message() + eq(msg.get_charset(), None) + charset = Charset('iso-8859-1') + msg.set_charset(charset) + eq(msg['mime-version'], '1.0') + eq(msg.get_type(), 'text/plain') + eq(msg['content-type'], 'text/plain; charset="iso-8859-1"') + eq(msg.get_param('charset'), 'iso-8859-1') + eq(msg['content-transfer-encoding'], 'quoted-printable') + eq(msg.get_charset().input_charset, 'iso-8859-1') + # Remove the charset + msg.set_charset(None) + eq(msg.get_charset(), None) + eq(msg['content-type'], 'text/plain') + # Try adding a charset when there's already MIME headers present + msg = Message() + msg['MIME-Version'] = '2.0' + msg['Content-Type'] = 'text/x-weird' + msg['Content-Transfer-Encoding'] = 'quinted-puntable' + msg.set_charset(charset) + eq(msg['mime-version'], '2.0') + eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"') + eq(msg['content-transfer-encoding'], 'quinted-puntable') + + def test_set_charset_from_string(self): + eq = self.assertEqual + msg = Message() + msg.set_charset('us-ascii') + eq(msg.get_charset().input_charset, 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + + def test_set_payload_with_charset(self): + msg = Message() + charset = Charset('iso-8859-1') + msg.set_payload('This is a string payload', charset) + self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1') + def test_get_charsets(self): eq = self.assertEqual @@ -204,6 +251,11 @@ class TestMessageAPI(TestEmailBase): eq(msg.get_params(header='x-header'), [('foo', ''), ('bar', 'one'), ('baz', 'two')]) + def test_get_param_liberal(self): + msg = Message() + msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"' + self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG') + def test_get_param(self): eq = self.assertEqual msg = email.message_from_string( @@ -216,6 +268,10 @@ class TestMessageAPI(TestEmailBase): eq(msg.get_param('foo', header='x-header'), '') eq(msg.get_param('bar', header='x-header'), 'one') eq(msg.get_param('baz', header='x-header'), 'two') + # XXX: We are not RFC-2045 compliant! We cannot parse: + # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"' + # msg.get_param("weird") + # yet. def test_get_param_funky_continuation_lines(self): msg = self._msgobj('msg_22.txt') @@ -228,7 +284,52 @@ class TestMessageAPI(TestEmailBase): self.failUnless(msg.has_key('HEADER')) self.failIf(msg.has_key('headeri')) - + def test_set_param(self): + eq = self.assertEqual + msg = Message() + msg.set_param('charset', 'iso-2022-jp') + eq(msg.get_param('charset'), 'iso-2022-jp') + msg.set_param('importance', 'high value') + eq(msg.get_param('importance'), 'high value') + eq(msg.get_param('importance', unquote=0), '"high value"') + eq(msg.get_params(), [('text/plain', ''), + ('charset', 'iso-2022-jp'), + ('importance', 'high value')]) + eq(msg.get_params(unquote=0), [('text/plain', ''), + ('charset', '"iso-2022-jp"'), + ('importance', '"high value"')]) + msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy') + eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx') + + def test_del_param(self): + eq = self.assertEqual + msg = self._msgobj('msg_05.txt') + eq(msg.get_params(), + [('multipart/report', ''), ('report-type', 'delivery-status'), + ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) + old_val = msg.get_param("report-type") + msg.del_param("report-type") + eq(msg.get_params(), + [('multipart/report', ''), + ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) + msg.set_param("report-type", old_val) + eq(msg.get_params(), + [('multipart/report', ''), + ('boundary', 'D1690A7AC1.996856090/mail.example.com'), + ('report-type', old_val)]) + + def test_set_type(self): + eq = self.assertEqual + msg = Message() + self.assertRaises(ValueError, msg.set_type, 'text') + msg.set_type('text/plain') + eq(msg['content-type'], 'text/plain') + msg.set_param('charset', 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + msg.set_type('text/html') + eq(msg['content-type'], 'text/html; charset="us-ascii"') + + # Test the email.Encoders module class TestEncoders(unittest.TestCase): @@ -236,7 +337,6 @@ class TestEncoders(unittest.TestCase): eq = self.assertEqual msg = MIMEText('hello world', _encoder=Encoders.encode_noop) eq(msg.get_payload(), 'hello world\n') - eq(msg['content-transfer-encoding'], None) def test_encode_7bit(self): eq = self.assertEqual @@ -253,6 +353,12 @@ class TestEncoders(unittest.TestCase): eq(msg.get_payload(), 'hello \x80 world\n') eq(msg['content-transfer-encoding'], '8bit') + def test_encode_empty_payload(self): + eq = self.assertEqual + msg = Message() + msg.set_charset('us-ascii') + eq(msg['content-transfer-encoding'], '7bit') + def test_encode_base64(self): eq = self.assertEqual msg = MIMEText('hello world', _encoder=Encoders.encode_base64) @@ -265,6 +371,23 @@ class TestEncoders(unittest.TestCase): eq(msg.get_payload(), 'hello=20world\n') eq(msg['content-transfer-encoding'], 'quoted-printable') + def test_default_cte(self): + eq = self.assertEqual + msg = MIMEText('hello world') + eq(msg['content-transfer-encoding'], '7bit') + + def test_default_cte(self): + eq = self.assertEqual + # With no explicit _charset its us-ascii, and all are 7-bit + msg = MIMEText('hello world') + eq(msg['content-transfer-encoding'], '7bit') + # Similar, but with 8-bit data + msg = MIMEText('hello \xf8 world') + eq(msg['content-transfer-encoding'], '8bit') + # And now with a different charset + msg = MIMEText('hello \xf8 world', _charset='iso-8859-1') + eq(msg['content-transfer-encoding'], 'quoted-printable') + # Test long header wrapping @@ -279,7 +402,14 @@ class TestLongHeaders(unittest.TestCase): sfp = StringIO() g = Generator(sfp) g(msg) - self.assertEqual(sfp.getvalue(), openfile('msg_18.txt').read()) + self.assertEqual(sfp.getvalue(), '''\ +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals"; + spooge="yummy"; hippos="gargantuan"; marshmallows="gooey" + +''') def test_no_semis_header_splitter(self): msg = Message() @@ -314,6 +444,30 @@ References: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx Test""") + def test_splitting_multiple_long_lines(self): + msg = Message() + msg['Received'] = """\ +from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) + from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) + from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) +""" + self.assertEqual(msg.as_string(), """\ +Received: from babylon.socal-raves.org (localhost [127.0.0.1]); + by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; + for <mailman-admin@babylon.socal-raves.org>; + Sat, 2 Feb 2002 17:00:06 -0800 (PST) + from babylon.socal-raves.org (localhost [127.0.0.1]); + by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; + for <mailman-admin@babylon.socal-raves.org>; + Sat, 2 Feb 2002 17:00:06 -0800 (PST) + from babylon.socal-raves.org (localhost [127.0.0.1]); + by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; + for <mailman-admin@babylon.socal-raves.org>; + Sat, 2 Feb 2002 17:00:06 -0800 (PST) + + +""") + # Test mangling of "From " lines in the body of a message @@ -476,6 +630,12 @@ class TestMIMEText(unittest.TestCase): self.assertEqual(self._msg.get_payload(), 'hello there\n') self.failUnless(not self._msg.is_multipart()) + def test_charset(self): + eq = self.assertEqual + msg = MIMEText('hello there', _charset='us-ascii') + eq(msg.get_charset().input_charset, 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + # Test a more complicated multipart/mixed type message @@ -539,6 +699,82 @@ This is the dingus fish. unless(not m0.is_multipart()) unless(not m1.is_multipart()) + def test_no_parts_in_a_multipart(self): + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.preamble = '' + outer.epilogue = '' + outer.set_boundary('BOUNDARY') + msg = MIMEText('hello world') + self.assertEqual(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY + + +--BOUNDARY-- +''') + + def test_one_part_in_a_multipart(self): + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.preamble = '' + outer.epilogue = '' + outer.set_boundary('BOUNDARY') + msg = MIMEText('hello world') + outer.attach(msg) + self.assertEqual(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +hello world + +--BOUNDARY-- +''') + + def test_seq_parts_in_a_multipart(self): + outer = MIMEBase('multipart', 'mixed') + outer['Subject'] = 'A subject' + outer['To'] = 'aperson@dom.ain' + outer['From'] = 'bperson@dom.ain' + outer.preamble = '' + outer.epilogue = '' + msg = MIMEText('hello world') + outer.attach(msg) + outer.set_boundary('BOUNDARY') + self.assertEqual(outer.as_string(), '''\ +Content-Type: multipart/mixed; boundary="BOUNDARY" +MIME-Version: 1.0 +Subject: A subject +To: aperson@dom.ain +From: bperson@dom.ain + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +hello world + +--BOUNDARY-- +''') + # Test some badly formatted messages @@ -551,7 +787,7 @@ class TestNonConformant(TestEmailBase): self.failUnless(msg.get_subtype() is None) def test_bogus_boundary(self): - fp = openfile('msg_15.txt') + fp = openfile(findfile('msg_15.txt')) try: data = fp.read() finally: @@ -561,6 +797,10 @@ class TestNonConformant(TestEmailBase): # message into the intended message tree. self.assertRaises(Errors.BoundaryError, p.parsestr, data) + def test_multipart_no_boundary(self): + fp = openfile(findfile('msg_25.txt')) + self.assertRaises(Errors.BoundaryError, email.message_from_file, fp) + # Test RFC 2047 header encoding and decoding @@ -570,7 +810,7 @@ class TestRFC2047(unittest.TestCase): s = '=?iso-8859-1?q?this=20is=20some=20text?=' eq(Utils.decode(s), 'this is some text') s = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=' - eq(Utils.decode(s), u'Keld_J\xf8rn_Simonsen') + eq(Utils.decode(s), u'Keld J\xf8rn Simonsen') s = '=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=' \ '=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=' eq(Utils.decode(s), 'If you can read this you understand the example.') @@ -578,6 +818,8 @@ class TestRFC2047(unittest.TestCase): eq(Utils.decode(s), u'\u05dd\u05d5\u05dc\u05e9 \u05df\u05d1 \u05d9\u05dc\u05d8\u05e4\u05e0') s = '=?iso-8859-1?q?this=20is?= =?iso-8859-1?q?some=20text?=' + eq(Utils.decode(s), u'this issome text') + s = '=?iso-8859-1?q?this=20is_?= =?iso-8859-1?q?some=20text?=' eq(Utils.decode(s), u'this is some text') def test_encode_header(self): @@ -794,6 +1036,10 @@ class TestIdempotent(unittest.TestCase): msg, text = self._msgobj('msg_23.txt') self._idempotent(msg, text) + def test_multipart_no_parts(self): + msg, text = self._msgobj('msg_24.txt') + self._idempotent(msg, text) + def test_content_type(self): eq = self.assertEquals # Get a message object and reset the seek pointer for other tests @@ -835,7 +1081,6 @@ class TestIdempotent(unittest.TestCase): self.failUnless(isinstance(msg1.get_payload(), StringType)) eq(msg1.get_payload(), '\n') - # Test various other bits of the package's functionality class TestMiscellaneous(unittest.TestCase): @@ -916,49 +1161,77 @@ class TestMiscellaneous(unittest.TestCase): module = __import__('email') all = module.__all__ all.sort() - self.assertEqual(all, ['Encoders', 'Errors', 'Generator', 'Iterators', - 'MIMEAudio', 'MIMEBase', 'MIMEImage', - 'MIMEMessage', 'MIMEText', 'Message', 'Parser', - 'Utils', - 'message_from_file', 'message_from_string']) + self.assertEqual(all, ['Charset', 'Encoders', 'Errors', 'Generator', + 'Header', 'Iterators', 'MIMEAudio', + 'MIMEBase', 'MIMEImage', 'MIMEMessage', + 'MIMEText', 'Message', 'Parser', + 'Utils', 'base64MIME', + 'message_from_file', 'message_from_string', + 'quopriMIME']) def test_formatdate(self): - now = 1005327232.109884 - gm_epoch = time.gmtime(0)[0:3] - loc_epoch = time.localtime(0)[0:3] - # When does the epoch start? - if gm_epoch == (1970, 1, 1): - # traditional Unix epoch - matchdate = 'Fri, 09 Nov 2001 17:33:52 -0000' - elif loc_epoch == (1904, 1, 1): - # Mac epoch - matchdate = 'Sat, 09 Nov 1935 16:33:52 -0000' - else: - matchdate = "I don't understand your epoch" - gdate = Utils.formatdate(now) - self.assertEqual(gdate, matchdate) + now = time.time() + self.assertEqual(Utils.parsedate(Utils.formatdate(now))[:6], + time.gmtime(now)[:6]) def test_formatdate_localtime(self): - now = 1005327232.109884 - ldate = Utils.formatdate(now, localtime=1) - zone = ldate.split()[5] - offset = int(zone[1:3]) * 3600 + int(zone[-2:]) * 60 - # Remember offset is in seconds west of UTC, but the timezone is in - # minutes east of UTC, so the signs differ. - if zone[0] == '+': - offset = -offset - if time.daylight and time.localtime(now)[-1]: - toff = time.altzone - else: - toff = time.timezone - self.assertEqual(offset, toff) + now = time.time() + self.assertEqual( + Utils.parsedate(Utils.formatdate(now, localtime=1))[:6], + time.localtime(now)[:6]) def test_parsedate_none(self): self.assertEqual(Utils.parsedate(''), None) def test_parseaddr_empty(self): self.assertEqual(Utils.parseaddr('<>'), ('', '')) - self.assertEqual(Utils.dump_address_pair(Utils.parseaddr('<>')), '') + self.assertEqual(Utils.formataddr(Utils.parseaddr('<>')), '') + + def test_noquote_dump(self): + self.assertEqual( + Utils.formataddr(('A Silly Person', 'person@dom.ain')), + 'A Silly Person <person@dom.ain>') + + def test_escape_dump(self): + self.assertEqual( + Utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')), + r'"A \(Very\) Silly Person" <person@dom.ain>') + a = r'A \(Special\) Person' + b = 'person@dom.ain' + self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b)) + + def test_quote_dump(self): + self.assertEqual( + Utils.formataddr(('A Silly; Person', 'person@dom.ain')), + r'"A Silly; Person" <person@dom.ain>') + + def test_fix_eols(self): + eq = self.assertEqual + eq(Utils.fix_eols('hello'), 'hello') + eq(Utils.fix_eols('hello\n'), 'hello\r\n') + eq(Utils.fix_eols('hello\r'), 'hello\r\n') + eq(Utils.fix_eols('hello\r\n'), 'hello\r\n') + eq(Utils.fix_eols('hello\n\r'), 'hello\r\n\r\n') + + def test_charset_richcomparisons(self): + eq = self.assertEqual + ne = self.failIfEqual + cset1 = Charset() + cset2 = Charset() + eq(cset1, 'us-ascii') + eq(cset1, 'US-ASCII') + eq(cset1, 'Us-AsCiI') + eq('us-ascii', cset1) + eq('US-ASCII', cset1) + eq('Us-AsCiI', cset1) + ne(cset1, 'usascii') + ne(cset1, 'USASCII') + ne(cset1, 'UsAsCiI') + ne('usascii', cset1) + ne('USASCII', cset1) + ne('UsAsCiI', cset1) + eq(cset1, cset2) + eq(cset2, cset1) @@ -983,8 +1256,12 @@ class TestIterators(TestEmailBase): eq = self.assertEqual msg = self._msgobj('msg_04.txt') it = Iterators.typed_subpart_iterator(msg, 'text') - lines = [subpart.get_payload() for subpart in it] - eq(len(lines), 2) + lines = [] + subparts = 0 + for subpart in it: + subparts += 1 + lines.append(subpart.get_payload()) + eq(subparts, 2) eq(EMPTYSTRING.join(lines), """\ a simple kind of mirror to reflect upon our own @@ -1011,6 +1288,7 @@ Do you like this message? -Me """) + class TestParsers(unittest.TestCase): def test_header_parser(self): @@ -1025,6 +1303,274 @@ class TestParsers(unittest.TestCase): eq(msg.is_multipart(), 0) self.failUnless(isinstance(msg.get_payload(), StringType)) + def test_whitespace_continuaton(self): + eq = self.assertEqual + # This message contains a line after the Subject: header that has only + # whitespace, but it is not empty! + msg = email.message_from_string("""\ +From: aperson@dom.ain +To: bperson@dom.ain +Subject: the next line has a space on it + +Date: Mon, 8 Apr 2002 15:09:19 -0400 +Message-ID: spam + +Here's the message body +""") + eq(msg['subject'], 'the next line has a space on it\n ') + eq(msg['message-id'], 'spam') + eq(msg.get_payload(), "Here's the message body\n") + + + +class TestBase64(unittest.TestCase): + def test_len(self): + eq = self.assertEqual + eq(base64MIME.base64_len('hello'), + len(base64MIME.encode('hello', eol=''))) + for size in range(15): + if size == 0 : bsize = 0 + elif size <= 3 : bsize = 4 + elif size <= 6 : bsize = 8 + elif size <= 9 : bsize = 12 + elif size <= 12: bsize = 16 + else : bsize = 20 + eq(base64MIME.base64_len('x'*size), bsize) + + def test_decode(self): + eq = self.assertEqual + eq(base64MIME.decode(''), '') + eq(base64MIME.decode('aGVsbG8='), 'hello') + eq(base64MIME.decode('aGVsbG8=', 'X'), 'hello') + eq(base64MIME.decode('aGVsbG8NCndvcmxk\n', 'X'), 'helloXworld') + + def test_encode(self): + eq = self.assertEqual + eq(base64MIME.encode(''), '') + eq(base64MIME.encode('hello'), 'aGVsbG8=\n') + # Test the binary flag + eq(base64MIME.encode('hello\n'), 'aGVsbG8K\n') + eq(base64MIME.encode('hello\n', 0), 'aGVsbG8NCg==\n') + # Test the maxlinelen arg + eq(base64MIME.encode('xxxx ' * 20, maxlinelen=40), """\ +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg +eHh4eCB4eHh4IA== +""") + # Test the eol argument + eq(base64MIME.encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\ +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r +eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r +eHh4eCB4eHh4IA==\r +""") + + def test_header_encode(self): + eq = self.assertEqual + he = base64MIME.header_encode + eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=') + eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=') + # Test the charset option + eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=') + # Test the keep_eols flag + eq(he('hello\nworld', keep_eols=1), + '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') + # Test the maxlinelen argument + eq(he('xxxx ' * 20, maxlinelen=40), """\ +=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHggeHg=?= + =?iso-8859-1?b?eHggeHh4eCB4eHh4IHh4eHg=?= + =?iso-8859-1?b?IHh4eHggeHh4eCB4eHh4IHg=?= + =?iso-8859-1?b?eHh4IHh4eHggeHh4eCB4eHg=?= + =?iso-8859-1?b?eCB4eHh4IHh4eHggeHh4eCA=?= + =?iso-8859-1?b?eHh4eCB4eHh4IHh4eHgg?=""") + # Test the eol argument + eq(he('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\ +=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHggeHg=?=\r + =?iso-8859-1?b?eHggeHh4eCB4eHh4IHh4eHg=?=\r + =?iso-8859-1?b?IHh4eHggeHh4eCB4eHh4IHg=?=\r + =?iso-8859-1?b?eHh4IHh4eHggeHh4eCB4eHg=?=\r + =?iso-8859-1?b?eCB4eHh4IHh4eHggeHh4eCA=?=\r + =?iso-8859-1?b?eHh4eCB4eHh4IHh4eHgg?=""") + + + +class TestQuopri(unittest.TestCase): + def setUp(self): + self.hlit = [chr(x) for x in range(ord('a'), ord('z')+1)] + \ + [chr(x) for x in range(ord('A'), ord('Z')+1)] + \ + [chr(x) for x in range(ord('0'), ord('9')+1)] + \ + ['!', '*', '+', '-', '/', ' '] + self.hnon = [chr(x) for x in range(256) if chr(x) not in self.hlit] + assert len(self.hlit) + len(self.hnon) == 256 + self.blit = [chr(x) for x in range(ord(' '), ord('~')+1)] + ['\t'] + self.blit.remove('=') + self.bnon = [chr(x) for x in range(256) if chr(x) not in self.blit] + assert len(self.blit) + len(self.bnon) == 256 + + def test_header_quopri_check(self): + for c in self.hlit: + self.failIf(quopriMIME.header_quopri_check(c)) + for c in self.hnon: + self.failUnless(quopriMIME.header_quopri_check(c)) + + def test_body_quopri_check(self): + for c in self.blit: + self.failIf(quopriMIME.body_quopri_check(c)) + for c in self.bnon: + self.failUnless(quopriMIME.body_quopri_check(c)) + + def test_header_quopri_len(self): + eq = self.assertEqual + hql = quopriMIME.header_quopri_len + enc = quopriMIME.header_encode + for s in ('hello', 'h@e@l@l@o@'): + # Empty charset and no line-endings. 7 == RFC chrome + eq(hql(s), len(enc(s, charset='', eol=''))-7) + for c in self.hlit: + eq(hql(c), 1) + for c in self.hnon: + eq(hql(c), 3) + + def test_body_quopri_len(self): + eq = self.assertEqual + bql = quopriMIME.body_quopri_len + for c in self.blit: + eq(bql(c), 1) + for c in self.bnon: + eq(bql(c), 3) + + def test_quote_unquote_idempotent(self): + for x in range(256): + c = chr(x) + self.assertEqual(quopriMIME.unquote(quopriMIME.quote(c)), c) + + def test_header_encode(self): + eq = self.assertEqual + he = quopriMIME.header_encode + eq(he('hello'), '=?iso-8859-1?q?hello?=') + eq(he('hello\nworld'), '=?iso-8859-1?q?hello=0D=0Aworld?=') + # Test the charset option + eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=') + # Test the keep_eols flag + eq(he('hello\nworld', keep_eols=1), '=?iso-8859-1?q?hello=0Aworld?=') + # Test a non-ASCII character + eq(he('helloÇthere'), '=?iso-8859-1?q?hello=C7there?=') + # Test the maxlinelen argument + eq(he('xxxx ' * 20, maxlinelen=40), """\ +=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?= + =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?= + =?iso-8859-1?q?_xxxx_xxxx_xxxx_xxxx_x?= + =?iso-8859-1?q?xxx_xxxx_xxxx_xxxx_xxx?= + =?iso-8859-1?q?x_xxxx_xxxx_?=""") + # Test the eol argument + eq(he('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\ +=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=\r + =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=\r + =?iso-8859-1?q?_xxxx_xxxx_xxxx_xxxx_x?=\r + =?iso-8859-1?q?xxx_xxxx_xxxx_xxxx_xxx?=\r + =?iso-8859-1?q?x_xxxx_xxxx_?=""") + + def test_decode(self): + eq = self.assertEqual + eq(quopriMIME.decode(''), '') + eq(quopriMIME.decode('hello'), 'hello') + eq(quopriMIME.decode('hello', 'X'), 'hello') + eq(quopriMIME.decode('hello\nworld', 'X'), 'helloXworld') + + def test_encode(self): + eq = self.assertEqual + eq(quopriMIME.encode(''), '') + eq(quopriMIME.encode('hello'), 'hello') + # Test the binary flag + eq(quopriMIME.encode('hello\r\nworld'), 'hello\nworld') + eq(quopriMIME.encode('hello\r\nworld', 0), 'hello\nworld') + # Test the maxlinelen arg + eq(quopriMIME.encode('xxxx ' * 20, maxlinelen=40), """\ +xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx= + xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx= +x xxxx xxxx xxxx xxxx=20""") + # Test the eol argument + eq(quopriMIME.encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\ +xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r + xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r +x xxxx xxxx xxxx xxxx=20""") + eq(quopriMIME.encode("""\ +one line + +two line"""), """\ +one line + +two line""") + + + +# Test the Charset class +class TestCharset(unittest.TestCase): + def test_idempotent(self): + eq = self.assertEqual + # Make sure us-ascii = no Unicode conversion + c = Charset('us-ascii') + s = 'Hello World!' + sp = c.to_splittable(s) + eq(s, c.from_splittable(sp)) + # test 8-bit idempotency with us-ascii + s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa' + sp = c.to_splittable(s) + eq(s, c.from_splittable(sp)) + + + +# Test multilingual MIME headers. +class TestHeader(unittest.TestCase): + def test_simple(self): + eq = self.assertEqual + h = Header('Hello World!') + eq(h.encode(), 'Hello World!') + h.append('Goodbye World!') + eq(h.encode(), 'Hello World! Goodbye World!') + + def test_header_needs_no_decoding(self): + h = 'no decoding needed' + self.assertEqual(decode_header(h), [(h, None)]) + + def test_long(self): + h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.", + maxlinelen=76) + for l in h.encode().split('\n '): + self.failUnless(len(l) <= 76) + + def test_multilingual(self): + eq = self.assertEqual + g = Charset("iso-8859-1") + cz = Charset("iso-8859-2") + utf8 = Charset("utf-8") + g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. " + cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. " + utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8") + h = Header(g_head, g) + h.append(cz_head, cz) + h.append(utf8_head, utf8) + enc = h.encode() + eq(enc, """=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?= + =?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?= + =?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?= + =?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?= + =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?= + =?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= + =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?= + =?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?= + =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?= + =?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?= + =?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?= + =?utf-8?b?cyBOdW5zdHVjayBnaXQgdW5k?= + =?utf-8?b?IFNsb3Rlcm1leWVyPyBKYSEgQmVpaGVyaHVuZCBkYXMgT2Rl?= + =?utf-8?b?ciBkaWUgRmxpcHBlcndhbGR0?= + =?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""") + eq(decode_header(enc), + [(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"), + (utf8_head, "utf-8")]) + def suite(): @@ -1044,13 +1590,13 @@ def suite(): suite.addTest(unittest.makeSuite(TestMiscellaneous)) suite.addTest(unittest.makeSuite(TestIterators)) suite.addTest(unittest.makeSuite(TestParsers)) + suite.addTest(unittest.makeSuite(TestBase64)) + suite.addTest(unittest.makeSuite(TestQuopri)) + suite.addTest(unittest.makeSuite(TestHeader)) + suite.addTest(unittest.makeSuite(TestCharset)) return suite -def test_main(): - from test_support import run_suite - run_suite(suite()) - if __name__ == '__main__': - test_main() + unittest.main(defaultTest='suite') diff --git a/Lib/test/test_email_codecs.py b/Lib/test/test_email_codecs.py new file mode 100644 index 0000000..d0451d1 --- /dev/null +++ b/Lib/test/test_email_codecs.py @@ -0,0 +1,51 @@ +# Copyright (C) 2002 Python Software Foundation +# email package unit tests for (optional) Asian codecs + +import unittest +from test_support import TestSkipped + +from email.Charset import Charset +from email.Header import Header, decode_header + + +# See if we have the Japanese codecs package installed +try: + unicode('foo', 'japanese.iso-2022-jp') +except LookupError: + raise TestSkipped, 'Optional Japanese codecs not installed' + + + +class TestEmailAsianCodecs(unittest.TestCase): + def test_japanese_codecs(self): + eq = self.assertEqual + j = Charset("euc-jp") + g = Charset("iso-8859-1") + h = Header("Hello World!") + jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa' + ghello = 'Gr\xfc\xdf Gott!' + h.append(jhello, j) + h.append(ghello, g) + eq(h.encode(), 'Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=\n =?iso-8859-1?q?Gr=FC=DF_Gott!?=') + eq(decode_header(h.encode()), + [('Hello World!', None), + ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'), + ('Gr\xfc\xdf Gott!', 'iso-8859-1')]) + long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9' + h = Header(long, j, header_name="Subject") + # test a very long header + enc = h.encode() + eq(enc, '=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYRsoQg==?=\n =?iso-2022-jp?b?GyRCITwlayRPO0oycTxUJE4+NRsoQg==?=\n =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=') + eq(decode_header(enc), [("test-ja \x1b$B$XEj9F$5$l$?%a\x1b(B\x1b$B!<%k$O;J2q<T$N>5\x1b(B\x1b$BG'$rBT$C$F$$$^$9\x1b(B", 'iso-2022-jp')]) + + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(TestEmailAsianCodecs)) + return suite + + + +if __name__ == '__main__': + unittest.main(defaultTest='suite') |