diff options
Diffstat (limited to 'Lib/email')
68 files changed, 4951 insertions, 7032 deletions
diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py new file mode 100644 index 0000000..e9f6e20 --- /dev/null +++ b/Lib/email/_encoded_words.py @@ -0,0 +1,221 @@ +""" Routines for manipulating RFC2047 encoded words. + +This is currently a package-private API, but will be considered for promotion +to a public API if there is demand. + +""" + +# An ecoded word looks like this: +# +# =?charset[*lang]?cte?encoded_string?= +# +# for more information about charset see the charset module. Here it is one +# of the preferred MIME charset names (hopefully; you never know when parsing). +# cte (Content Transfer Encoding) is either 'q' or 'b' (ignoring case). In +# theory other letters could be used for other encodings, but in practice this +# (almost?) never happens. There could be a public API for adding entries +# to to the CTE tables, but YAGNI for now. 'q' is Quoted Printable, 'b' is +# Base64. The meaning of encoded_string should be obvious. 'lang' is optional +# as indicated by the brackets (they are not part of the syntax) but is almost +# never encountered in practice. +# +# The general interface for a CTE decoder is that it takes the encoded_string +# as its argument, and returns a tuple (cte_decoded_string, defects). The +# cte_decoded_string is the original binary that was encoded using the +# specified cte. 'defects' is a list of MessageDefect instances indicating any +# problems encountered during conversion. 'charset' and 'lang' are the +# corresponding strings extracted from the EW, case preserved. +# +# The general interface for a CTE encoder is that it takes a binary sequence +# as input and returns the cte_encoded_string, which is an ascii-only string. +# +# Each decoder must also supply a length function that takes the binary +# sequence as its argument and returns the length of the resulting encoded +# string. +# +# The main API functions for the module are decode, which calls the decoder +# referenced by the cte specifier, and encode, which adds the appropriate +# RFC 2047 "chrome" to the encoded string, and can optionally automatically +# select the shortest possible encoding. See their docstrings below for +# details. + +import re +import base64 +import binascii +import functools +from string import ascii_letters, digits +from email import errors + +__all__ = ['decode_q', + 'encode_q', + 'decode_b', + 'encode_b', + 'len_q', + 'len_b', + 'decode', + 'encode', + ] + +# +# Quoted Printable +# + +# regex based decoder. +_q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub, + lambda m: bytes([int(m.group(1), 16)])) + +def decode_q(encoded): + encoded = encoded.replace(b'_', b' ') + return _q_byte_subber(encoded), [] + + +# dict mapping bytes to their encoded form +class _QByteMap(dict): + + safe = b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii') + + def __missing__(self, key): + if key in self.safe: + self[key] = chr(key) + else: + self[key] = "={:02X}".format(key) + return self[key] + +_q_byte_map = _QByteMap() + +# In headers spaces are mapped to '_'. +_q_byte_map[ord(' ')] = '_' + +def encode_q(bstring): + return ''.join(_q_byte_map[x] for x in bstring) + +def len_q(bstring): + return sum(len(_q_byte_map[x]) for x in bstring) + + +# +# Base64 +# + +def decode_b(encoded): + defects = [] + pad_err = len(encoded) % 4 + if pad_err: + defects.append(errors.InvalidBase64PaddingDefect()) + padded_encoded = encoded + b'==='[:4-pad_err] + else: + padded_encoded = encoded + try: + return base64.b64decode(padded_encoded, validate=True), defects + except binascii.Error: + # Since we had correct padding, this must an invalid char error. + defects = [errors.InvalidBase64CharactersDefect()] + # The non-alphabet characters are ignored as far as padding + # goes, but we don't know how many there are. So we'll just + # try various padding lengths until something works. + for i in 0, 1, 2, 3: + try: + return base64.b64decode(encoded+b'='*i, validate=False), defects + except binascii.Error: + if i==0: + defects.append(errors.InvalidBase64PaddingDefect()) + else: + # This should never happen. + raise AssertionError("unexpected binascii.Error") + +def encode_b(bstring): + return base64.b64encode(bstring).decode('ascii') + +def len_b(bstring): + groups_of_3, leftover = divmod(len(bstring), 3) + # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. + return groups_of_3 * 4 + (4 if leftover else 0) + + +_cte_decoders = { + 'q': decode_q, + 'b': decode_b, + } + +def decode(ew): + """Decode encoded word and return (string, charset, lang, defects) tuple. + + An RFC 2047/2243 encoded word has the form: + + =?charset*lang?cte?encoded_string?= + + where '*lang' may be omitted but the other parts may not be. + + This function expects exactly such a string (that is, it does not check the + syntax and may raise errors if the string is not well formed), and returns + the encoded_string decoded first from its Content Transfer Encoding and + then from the resulting bytes into unicode using the specified charset. If + the cte-decoded string does not successfully decode using the specified + character set, a defect is added to the defects list and the unknown octets + are replaced by the unicode 'unknown' character \uFDFF. + + The specified charset and language are returned. The default for language, + which is rarely if ever encountered, is the empty string. + + """ + _, charset, cte, cte_string, _ = ew.split('?') + charset, _, lang = charset.partition('*') + cte = cte.lower() + # Recover the original bytes and do CTE decoding. + bstring = cte_string.encode('ascii', 'surrogateescape') + bstring, defects = _cte_decoders[cte](bstring) + # Turn the CTE decoded bytes into unicode. + try: + string = bstring.decode(charset) + except UnicodeError: + defects.append(errors.UndecodableBytesDefect("Encoded word " + "contains bytes not decodable using {} charset".format(charset))) + string = bstring.decode(charset, 'surrogateescape') + except LookupError: + string = bstring.decode('ascii', 'surrogateescape') + if charset.lower() != 'unknown-8bit': + defects.append(errors.CharsetError("Unknown charset {} " + "in encoded word; decoded as unknown bytes".format(charset))) + return string, charset, lang, defects + + +_cte_encoders = { + 'q': encode_q, + 'b': encode_b, + } + +_cte_encode_length = { + 'q': len_q, + 'b': len_b, + } + +def encode(string, charset='utf-8', encoding=None, lang=''): + """Encode string using the CTE encoding that produces the shorter result. + + Produces an RFC 2047/2243 encoded word of the form: + + =?charset*lang?cte?encoded_string?= + + where '*lang' is omitted unless the 'lang' parameter is given a value. + Optional argument charset (defaults to utf-8) specifies the charset to use + to encode the string to binary before CTE encoding it. Optional argument + 'encoding' is the cte specifier for the encoding that should be used ('q' + or 'b'); if it is None (the default) the encoding which produces the + shortest encoded sequence is used, except that 'q' is preferred if it is up + to five characters longer. Optional argument 'lang' (default '') gives the + RFC 2243 language string to specify in the encoded word. + + """ + if charset == 'unknown-8bit': + bstring = string.encode('ascii', 'surrogateescape') + else: + bstring = string.encode(charset) + if encoding is None: + qlen = _cte_encode_length['q'](bstring) + blen = _cte_encode_length['b'](bstring) + # Bias toward q. 5 is arbitrary. + encoding = 'q' if qlen - blen < 5 else 'b' + encoded = _cte_encoders[encoding](bstring) + if lang: + lang = '*' + lang + return "=?{}{}?{}?{}?=".format(charset, lang, encoding, encoded) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py new file mode 100644 index 0000000..1924ed1 --- /dev/null +++ b/Lib/email/_header_value_parser.py @@ -0,0 +1,2953 @@ +"""Header value parser implementing various email-related RFC parsing rules. + +The parsing methods defined in this module implement various email related +parsing rules. Principal among them is RFC 5322, which is the followon +to RFC 2822 and primarily a clarification of the former. It also implements +RFC 2047 encoded word decoding. + +RFC 5322 goes to considerable trouble to maintain backward compatibility with +RFC 822 in the parse phase, while cleaning up the structure on the generation +phase. This parser supports correct RFC 5322 generation by tagging white space +as folding white space only when folding is allowed in the non-obsolete rule +sets. Actually, the parser is even more generous when accepting input than RFC +5322 mandates, following the spirit of Postel's Law, which RFC 5322 encourages. +Where possible deviations from the standard are annotated on the 'defects' +attribute of tokens that deviate. + +The general structure of the parser follows RFC 5322, and uses its terminology +where there is a direct correspondence. Where the implementation requires a +somewhat different structure than that used by the formal grammar, new terms +that mimic the closest existing terms are used. Thus, it really helps to have +a copy of RFC 5322 handy when studying this code. + +Input to the parser is a string that has already been unfolded according to +RFC 5322 rules. According to the RFC this unfolding is the very first step, and +this parser leaves the unfolding step to a higher level message parser, which +will have already detected the line breaks that need unfolding while +determining the beginning and end of each header. + +The output of the parser is a TokenList object, which is a list subclass. A +TokenList is a recursive data structure. The terminal nodes of the structure +are Terminal objects, which are subclasses of str. These do not correspond +directly to terminal objects in the formal grammar, but are instead more +practical higher level combinations of true terminals. + +All TokenList and Terminal objects have a 'value' attribute, which produces the +semantically meaningful value of that part of the parse subtree. The value of +all whitespace tokens (no matter how many sub-tokens they may contain) is a +single space, as per the RFC rules. This includes 'CFWS', which is herein +included in the general class of whitespace tokens. There is one exception to +the rule that whitespace tokens are collapsed into single spaces in values: in +the value of a 'bare-quoted-string' (a quoted-string with no leading or +trailing whitespace), any whitespace that appeared between the quotation marks +is preserved in the returned value. Note that in all Terminal strings quoted +pairs are turned into their unquoted values. + +All TokenList and Terminal objects also have a string value, which attempts to +be a "canonical" representation of the RFC-compliant form of the substring that +produced the parsed subtree, including minimal use of quoted pair quoting. +Whitespace runs are not collapsed. + +Comment tokens also have a 'content' attribute providing the string found +between the parens (including any nested comments) with whitespace preserved. + +All TokenList and Terminal objects have a 'defects' attribute which is a +possibly empty list all of the defects found while creating the token. Defects +may appear on any token in the tree, and a composite list of all defects in the +subtree is available through the 'all_defects' attribute of any node. (For +Terminal notes x.defects == x.all_defects.) + +Each object in a parse tree is called a 'token', and each has a 'token_type' +attribute that gives the name from the RFC 5322 grammar that it represents. +Not all RFC 5322 nodes are produced, and there is one non-RFC 5322 node that +may be produced: 'ptext'. A 'ptext' is a string of printable ascii characters. +It is returned in place of lists of (ctext/quoted-pair) and +(qtext/quoted-pair). + +XXX: provide complete list of token types. +""" + +import re +import urllib # For urllib.parse.unquote +from collections import namedtuple, OrderedDict +from email import _encoded_words as _ew +from email import errors +from email import utils + +# +# Useful constants and functions +# + +WSP = set(' \t') +CFWS_LEADER = WSP | set('(') +SPECIALS = set(r'()<>@,:;.\"[]') +ATOM_ENDS = SPECIALS | WSP +DOT_ATOM_ENDS = ATOM_ENDS - set('.') +# '.', '"', and '(' do not end phrases in order to support obs-phrase +PHRASE_ENDS = SPECIALS - set('."(') +TSPECIALS = (SPECIALS | set('/?=')) - set('.') +TOKEN_ENDS = TSPECIALS | WSP +ASPECIALS = TSPECIALS | set("*'%") +ATTRIBUTE_ENDS = ASPECIALS | WSP +EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%') + +def quote_string(value): + return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' + +# +# Accumulator for header folding +# + +class _Folded: + + def __init__(self, maxlen, policy): + self.maxlen = maxlen + self.policy = policy + self.lastlen = 0 + self.stickyspace = None + self.firstline = True + self.done = [] + self.current = [] + + def newline(self): + self.done.extend(self.current) + self.done.append(self.policy.linesep) + self.current.clear() + self.lastlen = 0 + + def finalize(self): + if self.current: + self.newline() + + def __str__(self): + return ''.join(self.done) + + def append(self, stoken): + self.current.append(stoken) + + def append_if_fits(self, token, stoken=None): + if stoken is None: + stoken = str(token) + l = len(stoken) + if self.stickyspace is not None: + stickyspace_len = len(self.stickyspace) + if self.lastlen + stickyspace_len + l <= self.maxlen: + self.current.append(self.stickyspace) + self.lastlen += stickyspace_len + self.current.append(stoken) + self.lastlen += l + self.stickyspace = None + self.firstline = False + return True + if token.has_fws: + ws = token.pop_leading_fws() + if ws is not None: + self.stickyspace += str(ws) + stickyspace_len += len(ws) + token._fold(self) + return True + if stickyspace_len and l + 1 <= self.maxlen: + margin = self.maxlen - l + if 0 < margin < stickyspace_len: + trim = stickyspace_len - margin + self.current.append(self.stickyspace[:trim]) + self.stickyspace = self.stickyspace[trim:] + stickyspace_len = trim + self.newline() + self.current.append(self.stickyspace) + self.current.append(stoken) + self.lastlen = l + stickyspace_len + self.stickyspace = None + self.firstline = False + return True + if not self.firstline: + self.newline() + self.current.append(self.stickyspace) + self.current.append(stoken) + self.stickyspace = None + self.firstline = False + return True + if self.lastlen + l <= self.maxlen: + self.current.append(stoken) + self.lastlen += l + return True + if l < self.maxlen: + self.newline() + self.current.append(stoken) + self.lastlen = l + return True + return False + +# +# TokenList and its subclasses +# + +class TokenList(list): + + token_type = None + + def __init__(self, *args, **kw): + super().__init__(*args, **kw) + self.defects = [] + + def __str__(self): + return ''.join(str(x) for x in self) + + def __repr__(self): + return '{}({})'.format(self.__class__.__name__, + super().__repr__()) + + @property + def value(self): + return ''.join(x.value for x in self if x.value) + + @property + def all_defects(self): + return sum((x.all_defects for x in self), self.defects) + + # + # Folding API + # + # parts(): + # + # return a list of objects that constitute the "higher level syntactic + # objects" specified by the RFC as the best places to fold a header line. + # The returned objects must include leading folding white space, even if + # this means mutating the underlying parse tree of the object. Each object + # is only responsible for returning *its* parts, and should not drill down + # to any lower level except as required to meet the leading folding white + # space constraint. + # + # _fold(folded): + # + # folded: the result accumulator. This is an instance of _Folded. + # (XXX: I haven't finished factoring this out yet, the folding code + # pretty much uses this as a state object.) When the folded.current + # contains as much text as will fit, the _fold method should call + # folded.newline. + # folded.lastlen: the current length of the test stored in folded.current. + # folded.maxlen: The maximum number of characters that may appear on a + # folded line. Differs from the policy setting in that "no limit" is + # represented by +inf, which means it can be used in the trivially + # logical fashion in comparisons. + # + # Currently no subclasses implement parts, and I think this will remain + # true. A subclass only needs to implement _fold when the generic version + # isn't sufficient. _fold will need to be implemented primarily when it is + # possible for encoded words to appear in the specialized token-list, since + # there is no generic algorithm that can know where exactly the encoded + # words are allowed. A _fold implementation is responsible for filling + # lines in the same general way that the top level _fold does. It may, and + # should, call the _fold method of sub-objects in a similar fashion to that + # of the top level _fold. + # + # XXX: I'm hoping it will be possible to factor the existing code further + # to reduce redundancy and make the logic clearer. + + @property + def parts(self): + klass = self.__class__ + this = [] + for token in self: + if token.startswith_fws(): + if this: + yield this[0] if len(this)==1 else klass(this) + this.clear() + end_ws = token.pop_trailing_ws() + this.append(token) + if end_ws: + yield klass(this) + this = [end_ws] + if this: + yield this[0] if len(this)==1 else klass(this) + + def startswith_fws(self): + return self[0].startswith_fws() + + def pop_leading_fws(self): + if self[0].token_type == 'fws': + return self.pop(0) + return self[0].pop_leading_fws() + + def pop_trailing_ws(self): + if self[-1].token_type == 'cfws': + return self.pop(-1) + return self[-1].pop_trailing_ws() + + @property + def has_fws(self): + for part in self: + if part.has_fws: + return True + return False + + def has_leading_comment(self): + return self[0].has_leading_comment() + + @property + def comments(self): + comments = [] + for token in self: + comments.extend(token.comments) + return comments + + def fold(self, *, policy): + # max_line_length 0/None means no limit, ie: infinitely long. + maxlen = policy.max_line_length or float("+inf") + folded = _Folded(maxlen, policy) + self._fold(folded) + folded.finalize() + return str(folded) + + def as_encoded_word(self, charset): + # This works only for things returned by 'parts', which include + # the leading fws, if any, that should be used. + res = [] + ws = self.pop_leading_fws() + if ws: + res.append(ws) + trailer = self.pop(-1) if self[-1].token_type=='fws' else '' + res.append(_ew.encode(str(self), charset)) + res.append(trailer) + return ''.join(res) + + def cte_encode(self, charset, policy): + res = [] + for part in self: + res.append(part.cte_encode(charset, policy)) + return ''.join(res) + + def _fold(self, folded): + for part in self.parts: + tstr = str(part) + tlen = len(tstr) + try: + str(part).encode('us-ascii') + except UnicodeEncodeError: + if any(isinstance(x, errors.UndecodableBytesDefect) + for x in part.all_defects): + charset = 'unknown-8bit' + else: + # XXX: this should be a policy setting + charset = 'utf-8' + tstr = part.cte_encode(charset, folded.policy) + tlen = len(tstr) + if folded.append_if_fits(part, tstr): + continue + # Peel off the leading whitespace if any and make it sticky, to + # avoid infinite recursion. + ws = part.pop_leading_fws() + if ws is not None: + # Peel off the leading whitespace and make it sticky, to + # avoid infinite recursion. + folded.stickyspace = str(part.pop(0)) + if folded.append_if_fits(part): + continue + if part.has_fws: + part._fold(folded) + continue + # There are no fold points in this one; it is too long for a single + # line and can't be split...we just have to put it on its own line. + folded.append(tstr) + folded.newline() + + def pprint(self, indent=''): + print('\n'.join(self._pp(indent=''))) + + def ppstr(self, indent=''): + return '\n'.join(self._pp(indent='')) + + def _pp(self, indent=''): + yield '{}{}/{}('.format( + indent, + self.__class__.__name__, + self.token_type) + for token in self: + if not hasattr(token, '_pp'): + yield (indent + ' !! invalid element in token ' + 'list: {!r}'.format(token)) + else: + for line in token._pp(indent+' '): + yield line + if self.defects: + extra = ' Defects: {}'.format(self.defects) + else: + extra = '' + yield '{}){}'.format(indent, extra) + + +class WhiteSpaceTokenList(TokenList): + + @property + def value(self): + return ' ' + + @property + def comments(self): + return [x.content for x in self if x.token_type=='comment'] + + +class UnstructuredTokenList(TokenList): + + token_type = 'unstructured' + + def _fold(self, folded): + if any(x.token_type=='encoded-word' for x in self): + return self._fold_encoded(folded) + # Here we can have either a pure ASCII string that may or may not + # have surrogateescape encoded bytes, or a unicode string. + last_ew = None + for part in self.parts: + tstr = str(part) + is_ew = False + try: + str(part).encode('us-ascii') + except UnicodeEncodeError: + if any(isinstance(x, errors.UndecodableBytesDefect) + for x in part.all_defects): + charset = 'unknown-8bit' + else: + charset = 'utf-8' + if last_ew is not None: + # We've already done an EW, combine this one with it + # if there's room. + chunk = get_unstructured( + ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset) + oldlastlen = sum(len(x) for x in folded.current[:last_ew]) + schunk = str(chunk) + lchunk = len(schunk) + if oldlastlen + lchunk <= folded.maxlen: + del folded.current[last_ew:] + folded.append(schunk) + folded.lastlen = oldlastlen + lchunk + continue + tstr = part.as_encoded_word(charset) + is_ew = True + if folded.append_if_fits(part, tstr): + if is_ew: + last_ew = len(folded.current) - 1 + continue + if is_ew or last_ew: + # It's too big to fit on the line, but since we've + # got encoded words we can use encoded word folding. + part._fold_as_ew(folded) + continue + # Peel off the leading whitespace if any and make it sticky, to + # avoid infinite recursion. + ws = part.pop_leading_fws() + if ws is not None: + folded.stickyspace = str(ws) + if folded.append_if_fits(part): + continue + if part.has_fws: + part.fold(folded) + continue + # It can't be split...we just have to put it on its own line. + folded.append(tstr) + folded.newline() + last_ew = None + + def cte_encode(self, charset, policy): + res = [] + last_ew = None + for part in self: + spart = str(part) + try: + spart.encode('us-ascii') + res.append(spart) + except UnicodeEncodeError: + if last_ew is None: + res.append(part.cte_encode(charset, policy)) + last_ew = len(res) + else: + tl = get_unstructured(''.join(res[last_ew:] + [spart])) + res.append(tl.as_encoded_word()) + return ''.join(res) + + +class Phrase(TokenList): + + token_type = 'phrase' + + def _fold(self, folded): + # As with Unstructured, we can have pure ASCII with or without + # surrogateescape encoded bytes, or we could have unicode. But this + # case is more complicated, since we have to deal with the various + # sub-token types and how they can be composed in the face of + # unicode-that-needs-CTE-encoding, and the fact that if a token a + # comment that becomes a barrier across which we can't compose encoded + # words. + last_ew = None + for part in self.parts: + tstr = str(part) + tlen = len(tstr) + has_ew = False + try: + str(part).encode('us-ascii') + except UnicodeEncodeError: + if any(isinstance(x, errors.UndecodableBytesDefect) + for x in part.all_defects): + charset = 'unknown-8bit' + else: + charset = 'utf-8' + if last_ew is not None and not part.has_leading_comment(): + # We've already done an EW, let's see if we can combine + # this one with it. The last_ew logic ensures that all we + # have at this point is atoms, no comments or quoted + # strings. So we can treat the text between the last + # encoded word and the content of this token as + # unstructured text, and things will work correctly. But + # we have to strip off any trailing comment on this token + # first, and if it is a quoted string we have to pull out + # the content (we're encoding it, so it no longer needs to + # be quoted). + if part[-1].token_type == 'cfws' and part.comments: + remainder = part.pop(-1) + else: + remainder = '' + for i, token in enumerate(part): + if token.token_type == 'bare-quoted-string': + part[i] = UnstructuredTokenList(token[:]) + chunk = get_unstructured( + ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset) + schunk = str(chunk) + lchunk = len(schunk) + if last_ew + lchunk <= folded.maxlen: + del folded.current[last_ew:] + folded.append(schunk) + folded.lastlen = sum(len(x) for x in folded.current) + continue + tstr = part.as_encoded_word(charset) + tlen = len(tstr) + has_ew = True + if folded.append_if_fits(part, tstr): + if has_ew and not part.comments: + last_ew = len(folded.current) - 1 + elif part.comments or part.token_type == 'quoted-string': + # If a comment is involved we can't combine EWs. And if a + # quoted string is involved, it's not worth the effort to + # try to combine them. + last_ew = None + continue + part._fold(folded) + + def cte_encode(self, charset, policy): + res = [] + last_ew = None + is_ew = False + for part in self: + spart = str(part) + try: + spart.encode('us-ascii') + res.append(spart) + except UnicodeEncodeError: + is_ew = True + if last_ew is None: + if not part.comments: + last_ew = len(res) + res.append(part.cte_encode(charset, policy)) + elif not part.has_leading_comment(): + if part[-1].token_type == 'cfws' and part.comments: + remainder = part.pop(-1) + else: + remainder = '' + for i, token in enumerate(part): + if token.token_type == 'bare-quoted-string': + part[i] = UnstructuredTokenList(token[:]) + tl = get_unstructured(''.join(res[last_ew:] + [spart])) + res[last_ew:] = [tl.as_encoded_word(charset)] + if part.comments or (not is_ew and part.token_type == 'quoted-string'): + last_ew = None + return ''.join(res) + +class Word(TokenList): + + token_type = 'word' + + +class CFWSList(WhiteSpaceTokenList): + + token_type = 'cfws' + + def has_leading_comment(self): + return bool(self.comments) + + +class Atom(TokenList): + + token_type = 'atom' + + +class Token(TokenList): + + token_type = 'token' + + +class EncodedWord(TokenList): + + token_type = 'encoded-word' + cte = None + charset = None + lang = None + + @property + def encoded(self): + if self.cte is not None: + return self.cte + _ew.encode(str(self), self.charset) + + + +class QuotedString(TokenList): + + token_type = 'quoted-string' + + @property + def content(self): + for x in self: + if x.token_type == 'bare-quoted-string': + return x.value + + @property + def quoted_value(self): + res = [] + for x in self: + if x.token_type == 'bare-quoted-string': + res.append(str(x)) + else: + res.append(x.value) + return ''.join(res) + + @property + def stripped_value(self): + for token in self: + if token.token_type == 'bare-quoted-string': + return token.value + + +class BareQuotedString(QuotedString): + + token_type = 'bare-quoted-string' + + def __str__(self): + return quote_string(''.join(str(x) for x in self)) + + @property + def value(self): + return ''.join(str(x) for x in self) + + +class Comment(WhiteSpaceTokenList): + + token_type = 'comment' + + def __str__(self): + return ''.join(sum([ + ["("], + [self.quote(x) for x in self], + [")"], + ], [])) + + def quote(self, value): + if value.token_type == 'comment': + return str(value) + return str(value).replace('\\', '\\\\').replace( + '(', '\(').replace( + ')', '\)') + + @property + def content(self): + return ''.join(str(x) for x in self) + + @property + def comments(self): + return [self.content] + +class AddressList(TokenList): + + token_type = 'address-list' + + @property + def addresses(self): + return [x for x in self if x.token_type=='address'] + + @property + def mailboxes(self): + return sum((x.mailboxes + for x in self if x.token_type=='address'), []) + + @property + def all_mailboxes(self): + return sum((x.all_mailboxes + for x in self if x.token_type=='address'), []) + + +class Address(TokenList): + + token_type = 'address' + + @property + def display_name(self): + if self[0].token_type == 'group': + return self[0].display_name + + @property + def mailboxes(self): + if self[0].token_type == 'mailbox': + return [self[0]] + elif self[0].token_type == 'invalid-mailbox': + return [] + return self[0].mailboxes + + @property + def all_mailboxes(self): + if self[0].token_type == 'mailbox': + return [self[0]] + elif self[0].token_type == 'invalid-mailbox': + return [self[0]] + return self[0].all_mailboxes + +class MailboxList(TokenList): + + token_type = 'mailbox-list' + + @property + def mailboxes(self): + return [x for x in self if x.token_type=='mailbox'] + + @property + def all_mailboxes(self): + return [x for x in self + if x.token_type in ('mailbox', 'invalid-mailbox')] + + +class GroupList(TokenList): + + token_type = 'group-list' + + @property + def mailboxes(self): + if not self or self[0].token_type != 'mailbox-list': + return [] + return self[0].mailboxes + + @property + def all_mailboxes(self): + if not self or self[0].token_type != 'mailbox-list': + return [] + return self[0].all_mailboxes + + +class Group(TokenList): + + token_type = "group" + + @property + def mailboxes(self): + if self[2].token_type != 'group-list': + return [] + return self[2].mailboxes + + @property + def all_mailboxes(self): + if self[2].token_type != 'group-list': + return [] + return self[2].all_mailboxes + + @property + def display_name(self): + return self[0].display_name + + +class NameAddr(TokenList): + + token_type = 'name-addr' + + @property + def display_name(self): + if len(self) == 1: + return None + return self[0].display_name + + @property + def local_part(self): + return self[-1].local_part + + @property + def domain(self): + return self[-1].domain + + @property + def route(self): + return self[-1].route + + @property + def addr_spec(self): + return self[-1].addr_spec + + +class AngleAddr(TokenList): + + token_type = 'angle-addr' + + @property + def local_part(self): + for x in self: + if x.token_type == 'addr-spec': + return x.local_part + + @property + def domain(self): + for x in self: + if x.token_type == 'addr-spec': + return x.domain + + @property + def route(self): + for x in self: + if x.token_type == 'obs-route': + return x.domains + + @property + def addr_spec(self): + for x in self: + if x.token_type == 'addr-spec': + return x.addr_spec + else: + return '<>' + + +class ObsRoute(TokenList): + + token_type = 'obs-route' + + @property + def domains(self): + return [x.domain for x in self if x.token_type == 'domain'] + + +class Mailbox(TokenList): + + token_type = 'mailbox' + + @property + def display_name(self): + if self[0].token_type == 'name-addr': + return self[0].display_name + + @property + def local_part(self): + return self[0].local_part + + @property + def domain(self): + return self[0].domain + + @property + def route(self): + if self[0].token_type == 'name-addr': + return self[0].route + + @property + def addr_spec(self): + return self[0].addr_spec + + +class InvalidMailbox(TokenList): + + token_type = 'invalid-mailbox' + + @property + def display_name(self): + return None + + local_part = domain = route = addr_spec = display_name + + +class Domain(TokenList): + + token_type = 'domain' + + @property + def domain(self): + return ''.join(super().value.split()) + + +class DotAtom(TokenList): + + token_type = 'dot-atom' + + +class DotAtomText(TokenList): + + token_type = 'dot-atom-text' + + +class AddrSpec(TokenList): + + token_type = 'addr-spec' + + @property + def local_part(self): + return self[0].local_part + + @property + def domain(self): + if len(self) < 3: + return None + return self[-1].domain + + @property + def value(self): + if len(self) < 3: + return self[0].value + return self[0].value.rstrip()+self[1].value+self[2].value.lstrip() + + @property + def addr_spec(self): + nameset = set(self.local_part) + if len(nameset) > len(nameset-DOT_ATOM_ENDS): + lp = quote_string(self.local_part) + else: + lp = self.local_part + if self.domain is not None: + return lp + '@' + self.domain + return lp + + +class ObsLocalPart(TokenList): + + token_type = 'obs-local-part' + + +class DisplayName(Phrase): + + token_type = 'display-name' + + @property + def display_name(self): + res = TokenList(self) + if res[0].token_type == 'cfws': + res.pop(0) + else: + if res[0][0].token_type == 'cfws': + res[0] = TokenList(res[0][1:]) + if res[-1].token_type == 'cfws': + res.pop() + else: + if res[-1][-1].token_type == 'cfws': + res[-1] = TokenList(res[-1][:-1]) + return res.value + + @property + def value(self): + quote = False + if self.defects: + quote = True + else: + for x in self: + if x.token_type == 'quoted-string': + quote = True + if quote: + pre = post = '' + if self[0].token_type=='cfws' or self[0][0].token_type=='cfws': + pre = ' ' + if self[-1].token_type=='cfws' or self[-1][-1].token_type=='cfws': + post = ' ' + return pre+quote_string(self.display_name)+post + else: + return super().value + + +class LocalPart(TokenList): + + token_type = 'local-part' + + @property + def value(self): + if self[0].token_type == "quoted-string": + return self[0].quoted_value + else: + return self[0].value + + @property + def local_part(self): + # Strip whitespace from front, back, and around dots. + res = [DOT] + last = DOT + last_is_tl = False + for tok in self[0] + [DOT]: + if tok.token_type == 'cfws': + continue + if (last_is_tl and tok.token_type == 'dot' and + last[-1].token_type == 'cfws'): + res[-1] = TokenList(last[:-1]) + is_tl = isinstance(tok, TokenList) + if (is_tl and last.token_type == 'dot' and + tok[0].token_type == 'cfws'): + res.append(TokenList(tok[1:])) + else: + res.append(tok) + last = res[-1] + last_is_tl = is_tl + res = TokenList(res[1:-1]) + return res.value + + +class DomainLiteral(TokenList): + + token_type = 'domain-literal' + + @property + def domain(self): + return ''.join(super().value.split()) + + @property + def ip(self): + for x in self: + if x.token_type == 'ptext': + return x.value + + +class MIMEVersion(TokenList): + + token_type = 'mime-version' + major = None + minor = None + + +class Parameter(TokenList): + + token_type = 'parameter' + sectioned = False + extended = False + charset = 'us-ascii' + + @property + def section_number(self): + # Because the first token, the attribute (name) eats CFWS, the second + # token is always the section if there is one. + return self[1].number if self.sectioned else 0 + + @property + def param_value(self): + # This is part of the "handle quoted extended parameters" hack. + for token in self: + if token.token_type == 'value': + return token.stripped_value + if token.token_type == 'quoted-string': + for token in token: + if token.token_type == 'bare-quoted-string': + for token in token: + if token.token_type == 'value': + return token.stripped_value + return '' + + +class InvalidParameter(Parameter): + + token_type = 'invalid-parameter' + + +class Attribute(TokenList): + + token_type = 'attribute' + + @property + def stripped_value(self): + for token in self: + if token.token_type.endswith('attrtext'): + return token.value + +class Section(TokenList): + + token_type = 'section' + number = None + + +class Value(TokenList): + + token_type = 'value' + + @property + def stripped_value(self): + token = self[0] + if token.token_type == 'cfws': + token = self[1] + if token.token_type.endswith( + ('quoted-string', 'attribute', 'extended-attribute')): + return token.stripped_value + return self.value + + +class MimeParameters(TokenList): + + token_type = 'mime-parameters' + + @property + def params(self): + # The RFC specifically states that the ordering of parameters is not + # guaranteed and may be reordered by the transport layer. So we have + # to assume the RFC 2231 pieces can come in any order. However, we + # output them in the order that we first see a given name, which gives + # us a stable __str__. + params = OrderedDict() + for token in self: + if not token.token_type.endswith('parameter'): + continue + if token[0].token_type != 'attribute': + continue + name = token[0].value.strip() + if name not in params: + params[name] = [] + params[name].append((token.section_number, token)) + for name, parts in params.items(): + parts = sorted(parts) + # XXX: there might be more recovery we could do here if, for + # example, this is really a case of a duplicate attribute name. + value_parts = [] + charset = parts[0][1].charset + for i, (section_number, param) in enumerate(parts): + if section_number != i: + param.defects.append(errors.InvalidHeaderDefect( + "inconsistent multipart parameter numbering")) + value = param.param_value + if param.extended: + try: + value = urllib.parse.unquote_to_bytes(value) + except UnicodeEncodeError: + # source had surrogate escaped bytes. What we do now + # is a bit of an open question. I'm not sure this is + # the best choice, but it is what the old algorithm did + value = urllib.parse.unquote(value, encoding='latin-1') + else: + try: + value = value.decode(charset, 'surrogateescape') + except LookupError: + # XXX: there should really be a custom defect for + # unknown character set to make it easy to find, + # because otherwise unknown charset is a silent + # failure. + value = value.decode('us-ascii', 'surrogateescape') + if utils._has_surrogates(value): + param.defects.append(errors.UndecodableBytesDefect()) + value_parts.append(value) + value = ''.join(value_parts) + yield name, value + + def __str__(self): + params = [] + for name, value in self.params: + if value: + params.append('{}={}'.format(name, quote_string(value))) + else: + params.append(name) + params = '; '.join(params) + return ' ' + params if params else '' + + +class ParameterizedHeaderValue(TokenList): + + @property + def params(self): + for token in reversed(self): + if token.token_type == 'mime-parameters': + return token.params + return {} + + @property + def parts(self): + if self and self[-1].token_type == 'mime-parameters': + # We don't want to start a new line if all of the params don't fit + # after the value, so unwrap the parameter list. + return TokenList(self[:-1] + self[-1]) + return TokenList(self).parts + + +class ContentType(ParameterizedHeaderValue): + + token_type = 'content-type' + maintype = 'text' + subtype = 'plain' + + +class ContentDisposition(ParameterizedHeaderValue): + + token_type = 'content-disposition' + content_disposition = None + + +class ContentTransferEncoding(TokenList): + + token_type = 'content-transfer-encoding' + cte = '7bit' + + +class HeaderLabel(TokenList): + + token_type = 'header-label' + + +class Header(TokenList): + + token_type = 'header' + + def _fold(self, folded): + folded.append(str(self.pop(0))) + folded.lastlen = len(folded.current[0]) + # The first line of the header is different from all others: we don't + # want to start a new object on a new line if it has any fold points in + # it that would allow part of it to be on the first header line. + # Further, if the first fold point would fit on the new line, we want + # to do that, but if it doesn't we want to put it on the first line. + # Folded supports this via the stickyspace attribute. If this + # attribute is not None, it does the special handling. + folded.stickyspace = str(self.pop(0)) if self[0].token_type == 'cfws' else '' + rest = self.pop(0) + if self: + raise ValueError("Malformed Header token list") + rest._fold(folded) + + +# +# Terminal classes and instances +# + +class Terminal(str): + + def __new__(cls, value, token_type): + self = super().__new__(cls, value) + self.token_type = token_type + self.defects = [] + return self + + def __repr__(self): + return "{}({})".format(self.__class__.__name__, super().__repr__()) + + @property + def all_defects(self): + return list(self.defects) + + def _pp(self, indent=''): + return ["{}{}/{}({}){}".format( + indent, + self.__class__.__name__, + self.token_type, + super().__repr__(), + '' if not self.defects else ' {}'.format(self.defects), + )] + + def cte_encode(self, charset, policy): + value = str(self) + try: + value.encode('us-ascii') + return value + except UnicodeEncodeError: + return _ew.encode(value, charset) + + def pop_trailing_ws(self): + # This terminates the recursion. + return None + + def pop_leading_fws(self): + # This terminates the recursion. + return None + + @property + def comments(self): + return [] + + def has_leading_comment(self): + return False + + def __getnewargs__(self): + return(str(self), self.token_type) + + +class WhiteSpaceTerminal(Terminal): + + @property + def value(self): + return ' ' + + def startswith_fws(self): + return True + + has_fws = True + + +class ValueTerminal(Terminal): + + @property + def value(self): + return self + + def startswith_fws(self): + return False + + has_fws = False + + def as_encoded_word(self, charset): + return _ew.encode(str(self), charset) + + +class EWWhiteSpaceTerminal(WhiteSpaceTerminal): + + @property + def value(self): + return '' + + @property + def encoded(self): + return self[:] + + def __str__(self): + return '' + + has_fws = True + + +# XXX these need to become classes and used as instances so +# that a program can't change them in a parse tree and screw +# up other parse trees. Maybe should have tests for that, too. +DOT = ValueTerminal('.', 'dot') +ListSeparator = ValueTerminal(',', 'list-separator') +RouteComponentMarker = ValueTerminal('@', 'route-component-marker') + +# +# Parser +# + +"""Parse strings according to RFC822/2047/2822/5322 rules. + +This is a stateless parser. Each get_XXX function accepts a string and +returns either a Terminal or a TokenList representing the RFC object named +by the method and a string containing the remaining unparsed characters +from the input. Thus a parser method consumes the next syntactic construct +of a given type and returns a token representing the construct plus the +unparsed remainder of the input string. + +For example, if the first element of a structured header is a 'phrase', +then: + + phrase, value = get_phrase(value) + +returns the complete phrase from the start of the string value, plus any +characters left in the string after the phrase is removed. + +""" + +_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split +_non_atom_end_matcher = re.compile(r"[^{}]+".format( + ''.join(ATOM_ENDS).replace('\\','\\\\').replace(']','\]'))).match +_non_printable_finder = re.compile(r"[\x00-\x20\x7F]").findall +_non_token_end_matcher = re.compile(r"[^{}]+".format( + ''.join(TOKEN_ENDS).replace('\\','\\\\').replace(']','\]'))).match +_non_attribute_end_matcher = re.compile(r"[^{}]+".format( + ''.join(ATTRIBUTE_ENDS).replace('\\','\\\\').replace(']','\]'))).match +_non_extended_attribute_end_matcher = re.compile(r"[^{}]+".format( + ''.join(EXTENDED_ATTRIBUTE_ENDS).replace( + '\\','\\\\').replace(']','\]'))).match + +def _validate_xtext(xtext): + """If input token contains ASCII non-printables, register a defect.""" + + non_printables = _non_printable_finder(xtext) + if non_printables: + xtext.defects.append(errors.NonPrintableDefect(non_printables)) + if utils._has_surrogates(xtext): + xtext.defects.append(errors.UndecodableBytesDefect( + "Non-ASCII characters found in header token")) + +def _get_ptext_to_endchars(value, endchars): + """Scan printables/quoted-pairs until endchars and return unquoted ptext. + + This function turns a run of qcontent, ccontent-without-comments, or + dtext-with-quoted-printables into a single string by unquoting any + quoted printables. It returns the string, the remaining value, and + a flag that is True iff there were any quoted printables decoded. + + """ + fragment, *remainder = _wsp_splitter(value, 1) + vchars = [] + escape = False + had_qp = False + for pos in range(len(fragment)): + if fragment[pos] == '\\': + if escape: + escape = False + had_qp = True + else: + escape = True + continue + if escape: + escape = False + elif fragment[pos] in endchars: + break + vchars.append(fragment[pos]) + else: + pos = pos + 1 + return ''.join(vchars), ''.join([fragment[pos:]] + remainder), had_qp + +def _decode_ew_run(value): + """ Decode a run of RFC2047 encoded words. + + _decode_ew_run(value) -> (text, value, defects) + + Scans the supplied value for a run of tokens that look like they are RFC + 2047 encoded words, decodes those words into text according to RFC 2047 + rules (whitespace between encoded words is discarded), and returns the text + and the remaining value (including any leading whitespace on the remaining + value), as well as a list of any defects encountered while decoding. The + input value may not have any leading whitespace. + + """ + res = [] + defects = [] + last_ws = '' + while value: + try: + tok, ws, value = _wsp_splitter(value, 1) + except ValueError: + tok, ws, value = value, '', '' + if not (tok.startswith('=?') and tok.endswith('?=')): + return ''.join(res), last_ws + tok + ws + value, defects + text, charset, lang, new_defects = _ew.decode(tok) + res.append(text) + defects.extend(new_defects) + last_ws = ws + return ''.join(res), last_ws, defects + +def get_fws(value): + """FWS = 1*WSP + + This isn't the RFC definition. We're using fws to represent tokens where + folding can be done, but when we are parsing the *un*folding has already + been done so we don't need to watch out for CRLF. + + """ + newvalue = value.lstrip() + fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws') + return fws, newvalue + +def get_encoded_word(value): + """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" + + """ + ew = EncodedWord() + if not value.startswith('=?'): + raise errors.HeaderParseError( + "expected encoded word but found {}".format(value)) + tok, *remainder = value[2:].split('?=', 1) + if tok == value[2:]: + raise errors.HeaderParseError( + "expected encoded word but found {}".format(value)) + remstr = ''.join(remainder) + if remstr[:2].isdigit(): + rest, *remainder = remstr.split('?=', 1) + tok = tok + '?=' + rest + if len(tok.split()) > 1: + ew.defects.append(errors.InvalidHeaderDefect( + "whitespace inside encoded word")) + ew.cte = value + value = ''.join(remainder) + try: + text, charset, lang, defects = _ew.decode('=?' + tok + '?=') + except ValueError: + raise errors.HeaderParseError( + "encoded word format invalid: '{}'".format(ew.cte)) + ew.charset = charset + ew.lang = lang + ew.defects.extend(defects) + while text: + if text[0] in WSP: + token, text = get_fws(text) + ew.append(token) + continue + chars, *remainder = _wsp_splitter(text, 1) + vtext = ValueTerminal(chars, 'vtext') + _validate_xtext(vtext) + ew.append(vtext) + text = ''.join(remainder) + return ew, value + +def get_unstructured(value): + """unstructured = (*([FWS] vchar) *WSP) / obs-unstruct + obs-unstruct = *((*LF *CR *(obs-utext) *LF *CR)) / FWS) + obs-utext = %d0 / obs-NO-WS-CTL / LF / CR + + obs-NO-WS-CTL is control characters except WSP/CR/LF. + + So, basically, we have printable runs, plus control characters or nulls in + the obsolete syntax, separated by whitespace. Since RFC 2047 uses the + obsolete syntax in its specification, but requires whitespace on either + side of the encoded words, I can see no reason to need to separate the + non-printable-non-whitespace from the printable runs if they occur, so we + parse this into xtext tokens separated by WSP tokens. + + Because an 'unstructured' value must by definition constitute the entire + value, this 'get' routine does not return a remaining value, only the + parsed TokenList. + + """ + # XXX: but what about bare CR and LF? They might signal the start or + # end of an encoded word. YAGNI for now, since out current parsers + # will never send us strings with bard CR or LF. + + unstructured = UnstructuredTokenList() + while value: + if value[0] in WSP: + token, value = get_fws(value) + unstructured.append(token) + continue + if value.startswith('=?'): + try: + token, value = get_encoded_word(value) + except errors.HeaderParseError: + pass + else: + have_ws = True + if len(unstructured) > 0: + if unstructured[-1].token_type != 'fws': + unstructured.defects.append(errors.InvalidHeaderDefect( + "missing whitespace before encoded word")) + have_ws = False + if have_ws and len(unstructured) > 1: + if unstructured[-2].token_type == 'encoded-word': + unstructured[-1] = EWWhiteSpaceTerminal( + unstructured[-1], 'fws') + unstructured.append(token) + continue + tok, *remainder = _wsp_splitter(value, 1) + vtext = ValueTerminal(tok, 'vtext') + _validate_xtext(vtext) + unstructured.append(vtext) + value = ''.join(remainder) + return unstructured + +def get_qp_ctext(value): + """ctext = <printable ascii except \ ( )> + + This is not the RFC ctext, since we are handling nested comments in comment + and unquoting quoted-pairs here. We allow anything except the '()' + characters, but if we find any ASCII other than the RFC defined printable + ASCII an NonPrintableDefect is added to the token's defects list. Since + quoted pairs are converted to their unquoted values, what is returned is + a 'ptext' token. In this case it is a WhiteSpaceTerminal, so it's value + is ' '. + + """ + ptext, value, _ = _get_ptext_to_endchars(value, '()') + ptext = WhiteSpaceTerminal(ptext, 'ptext') + _validate_xtext(ptext) + return ptext, value + +def get_qcontent(value): + """qcontent = qtext / quoted-pair + + We allow anything except the DQUOTE character, but if we find any ASCII + other than the RFC defined printable ASCII an NonPrintableDefect is + added to the token's defects list. Any quoted pairs are converted to their + unquoted values, so what is returned is a 'ptext' token. In this case it + is a ValueTerminal. + + """ + ptext, value, _ = _get_ptext_to_endchars(value, '"') + ptext = ValueTerminal(ptext, 'ptext') + _validate_xtext(ptext) + return ptext, value + +def get_atext(value): + """atext = <matches _atext_matcher> + + We allow any non-ATOM_ENDS in atext, but add an InvalidATextDefect to + the token's defects list if we find non-atext characters. + """ + m = _non_atom_end_matcher(value) + if not m: + raise errors.HeaderParseError( + "expected atext but found '{}'".format(value)) + atext = m.group() + value = value[len(atext):] + atext = ValueTerminal(atext, 'atext') + _validate_xtext(atext) + return atext, value + +def get_bare_quoted_string(value): + """bare-quoted-string = DQUOTE *([FWS] qcontent) [FWS] DQUOTE + + A quoted-string without the leading or trailing white space. Its + value is the text between the quote marks, with whitespace + preserved and quoted pairs decoded. + """ + if value[0] != '"': + raise errors.HeaderParseError( + "expected '\"' but found '{}'".format(value)) + bare_quoted_string = BareQuotedString() + value = value[1:] + while value and value[0] != '"': + if value[0] in WSP: + token, value = get_fws(value) + else: + token, value = get_qcontent(value) + bare_quoted_string.append(token) + if not value: + bare_quoted_string.defects.append(errors.InvalidHeaderDefect( + "end of header inside quoted string")) + return bare_quoted_string, value + return bare_quoted_string, value[1:] + +def get_comment(value): + """comment = "(" *([FWS] ccontent) [FWS] ")" + ccontent = ctext / quoted-pair / comment + + We handle nested comments here, and quoted-pair in our qp-ctext routine. + """ + if value and value[0] != '(': + raise errors.HeaderParseError( + "expected '(' but found '{}'".format(value)) + comment = Comment() + value = value[1:] + while value and value[0] != ")": + if value[0] in WSP: + token, value = get_fws(value) + elif value[0] == '(': + token, value = get_comment(value) + else: + token, value = get_qp_ctext(value) + comment.append(token) + if not value: + comment.defects.append(errors.InvalidHeaderDefect( + "end of header inside comment")) + return comment, value + return comment, value[1:] + +def get_cfws(value): + """CFWS = (1*([FWS] comment) [FWS]) / FWS + + """ + cfws = CFWSList() + while value and value[0] in CFWS_LEADER: + if value[0] in WSP: + token, value = get_fws(value) + else: + token, value = get_comment(value) + cfws.append(token) + return cfws, value + +def get_quoted_string(value): + """quoted-string = [CFWS] <bare-quoted-string> [CFWS] + + 'bare-quoted-string' is an intermediate class defined by this + parser and not by the RFC grammar. It is the quoted string + without any attached CFWS. + """ + quoted_string = QuotedString() + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + quoted_string.append(token) + token, value = get_bare_quoted_string(value) + quoted_string.append(token) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + quoted_string.append(token) + return quoted_string, value + +def get_atom(value): + """atom = [CFWS] 1*atext [CFWS] + + """ + atom = Atom() + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + atom.append(token) + if value and value[0] in ATOM_ENDS: + raise errors.HeaderParseError( + "expected atom but found '{}'".format(value)) + token, value = get_atext(value) + atom.append(token) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + atom.append(token) + return atom, value + +def get_dot_atom_text(value): + """ dot-text = 1*atext *("." 1*atext) + + """ + dot_atom_text = DotAtomText() + if not value or value[0] in ATOM_ENDS: + raise errors.HeaderParseError("expected atom at a start of " + "dot-atom-text but found '{}'".format(value)) + while value and value[0] not in ATOM_ENDS: + token, value = get_atext(value) + dot_atom_text.append(token) + if value and value[0] == '.': + dot_atom_text.append(DOT) + value = value[1:] + if dot_atom_text[-1] is DOT: + raise errors.HeaderParseError("expected atom at end of dot-atom-text " + "but found '{}'".format('.'+value)) + return dot_atom_text, value + +def get_dot_atom(value): + """ dot-atom = [CFWS] dot-atom-text [CFWS] + + """ + dot_atom = DotAtom() + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + dot_atom.append(token) + token, value = get_dot_atom_text(value) + dot_atom.append(token) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + dot_atom.append(token) + return dot_atom, value + +def get_word(value): + """word = atom / quoted-string + + Either atom or quoted-string may start with CFWS. We have to peel off this + CFWS first to determine which type of word to parse. Afterward we splice + the leading CFWS, if any, into the parsed sub-token. + + If neither an atom or a quoted-string is found before the next special, a + HeaderParseError is raised. + + The token returned is either an Atom or a QuotedString, as appropriate. + This means the 'word' level of the formal grammar is not represented in the + parse tree; this is because having that extra layer when manipulating the + parse tree is more confusing than it is helpful. + + """ + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + else: + leader = None + if value[0]=='"': + token, value = get_quoted_string(value) + elif value[0] in SPECIALS: + raise errors.HeaderParseError("Expected 'atom' or 'quoted-string' " + "but found '{}'".format(value)) + else: + token, value = get_atom(value) + if leader is not None: + token[:0] = [leader] + return token, value + +def get_phrase(value): + """ phrase = 1*word / obs-phrase + obs-phrase = word *(word / "." / CFWS) + + This means a phrase can be a sequence of words, periods, and CFWS in any + order as long as it starts with at least one word. If anything other than + words is detected, an ObsoleteHeaderDefect is added to the token's defect + list. We also accept a phrase that starts with CFWS followed by a dot; + this is registered as an InvalidHeaderDefect, since it is not supported by + even the obsolete grammar. + + """ + phrase = Phrase() + try: + token, value = get_word(value) + phrase.append(token) + except errors.HeaderParseError: + phrase.defects.append(errors.InvalidHeaderDefect( + "phrase does not start with word")) + while value and value[0] not in PHRASE_ENDS: + if value[0]=='.': + phrase.append(DOT) + phrase.defects.append(errors.ObsoleteHeaderDefect( + "period in 'phrase'")) + value = value[1:] + else: + try: + token, value = get_word(value) + except errors.HeaderParseError: + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + phrase.defects.append(errors.ObsoleteHeaderDefect( + "comment found without atom")) + else: + raise + phrase.append(token) + return phrase, value + +def get_local_part(value): + """ local-part = dot-atom / quoted-string / obs-local-part + + """ + local_part = LocalPart() + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value: + raise errors.HeaderParseError( + "expected local-part but found '{}'".format(value)) + try: + token, value = get_dot_atom(value) + except errors.HeaderParseError: + try: + token, value = get_word(value) + except errors.HeaderParseError: + if value[0] != '\\' and value[0] in PHRASE_ENDS: + raise + token = TokenList() + if leader is not None: + token[:0] = [leader] + local_part.append(token) + if value and (value[0]=='\\' or value[0] not in PHRASE_ENDS): + obs_local_part, value = get_obs_local_part(str(local_part) + value) + if obs_local_part.token_type == 'invalid-obs-local-part': + local_part.defects.append(errors.InvalidHeaderDefect( + "local-part is not dot-atom, quoted-string, or obs-local-part")) + else: + local_part.defects.append(errors.ObsoleteHeaderDefect( + "local-part is not a dot-atom (contains CFWS)")) + local_part[0] = obs_local_part + try: + local_part.value.encode('ascii') + except UnicodeEncodeError: + local_part.defects.append(errors.NonASCIILocalPartDefect( + "local-part contains non-ASCII characters)")) + return local_part, value + +def get_obs_local_part(value): + """ obs-local-part = word *("." word) + """ + obs_local_part = ObsLocalPart() + last_non_ws_was_dot = False + while value and (value[0]=='\\' or value[0] not in PHRASE_ENDS): + if value[0] == '.': + if last_non_ws_was_dot: + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "invalid repeated '.'")) + obs_local_part.append(DOT) + last_non_ws_was_dot = True + value = value[1:] + continue + elif value[0]=='\\': + obs_local_part.append(ValueTerminal(value[0], + 'misplaced-special')) + value = value[1:] + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "'\\' character outside of quoted-string/ccontent")) + last_non_ws_was_dot = False + continue + if obs_local_part and obs_local_part[-1].token_type != 'dot': + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "missing '.' between words")) + try: + token, value = get_word(value) + last_non_ws_was_dot = False + except errors.HeaderParseError: + if value[0] not in CFWS_LEADER: + raise + token, value = get_cfws(value) + obs_local_part.append(token) + if (obs_local_part[0].token_type == 'dot' or + obs_local_part[0].token_type=='cfws' and + obs_local_part[1].token_type=='dot'): + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "Invalid leading '.' in local part")) + if (obs_local_part[-1].token_type == 'dot' or + obs_local_part[-1].token_type=='cfws' and + obs_local_part[-2].token_type=='dot'): + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "Invalid trailing '.' in local part")) + if obs_local_part.defects: + obs_local_part.token_type = 'invalid-obs-local-part' + return obs_local_part, value + +def get_dtext(value): + """ dtext = <printable ascii except \ [ ]> / obs-dtext + obs-dtext = obs-NO-WS-CTL / quoted-pair + + We allow anything except the excluded characters, but but if we find any + ASCII other than the RFC defined printable ASCII an NonPrintableDefect is + added to the token's defects list. Quoted pairs are converted to their + unquoted values, so what is returned is a ptext token, in this case a + ValueTerminal. If there were quoted-printables, an ObsoleteHeaderDefect is + added to the returned token's defect list. + + """ + ptext, value, had_qp = _get_ptext_to_endchars(value, '[]') + ptext = ValueTerminal(ptext, 'ptext') + if had_qp: + ptext.defects.append(errors.ObsoleteHeaderDefect( + "quoted printable found in domain-literal")) + _validate_xtext(ptext) + return ptext, value + +def _check_for_early_dl_end(value, domain_literal): + if value: + return False + domain_literal.append(errors.InvalidHeaderDefect( + "end of input inside domain-literal")) + domain_literal.append(ValueTerminal(']', 'domain-literal-end')) + return True + +def get_domain_literal(value): + """ domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] + + """ + domain_literal = DomainLiteral() + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + domain_literal.append(token) + if not value: + raise errors.HeaderParseError("expected domain-literal") + if value[0] != '[': + raise errors.HeaderParseError("expected '[' at start of domain-literal " + "but found '{}'".format(value)) + value = value[1:] + if _check_for_early_dl_end(value, domain_literal): + return domain_literal, value + domain_literal.append(ValueTerminal('[', 'domain-literal-start')) + if value[0] in WSP: + token, value = get_fws(value) + domain_literal.append(token) + token, value = get_dtext(value) + domain_literal.append(token) + if _check_for_early_dl_end(value, domain_literal): + return domain_literal, value + if value[0] in WSP: + token, value = get_fws(value) + domain_literal.append(token) + if _check_for_early_dl_end(value, domain_literal): + return domain_literal, value + if value[0] != ']': + raise errors.HeaderParseError("expected ']' at end of domain-literal " + "but found '{}'".format(value)) + domain_literal.append(ValueTerminal(']', 'domain-literal-end')) + value = value[1:] + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + domain_literal.append(token) + return domain_literal, value + +def get_domain(value): + """ domain = dot-atom / domain-literal / obs-domain + obs-domain = atom *("." atom)) + + """ + domain = Domain() + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value: + raise errors.HeaderParseError( + "expected domain but found '{}'".format(value)) + if value[0] == '[': + token, value = get_domain_literal(value) + if leader is not None: + token[:0] = [leader] + domain.append(token) + return domain, value + try: + token, value = get_dot_atom(value) + except errors.HeaderParseError: + token, value = get_atom(value) + if leader is not None: + token[:0] = [leader] + domain.append(token) + if value and value[0] == '.': + domain.defects.append(errors.ObsoleteHeaderDefect( + "domain is not a dot-atom (contains CFWS)")) + if domain[0].token_type == 'dot-atom': + domain[:] = domain[0] + while value and value[0] == '.': + domain.append(DOT) + token, value = get_atom(value[1:]) + domain.append(token) + return domain, value + +def get_addr_spec(value): + """ addr-spec = local-part "@" domain + + """ + addr_spec = AddrSpec() + token, value = get_local_part(value) + addr_spec.append(token) + if not value or value[0] != '@': + addr_spec.defects.append(errors.InvalidHeaderDefect( + "add-spec local part with no domain")) + return addr_spec, value + addr_spec.append(ValueTerminal('@', 'address-at-symbol')) + token, value = get_domain(value[1:]) + addr_spec.append(token) + return addr_spec, value + +def get_obs_route(value): + """ obs-route = obs-domain-list ":" + obs-domain-list = *(CFWS / ",") "@" domain *("," [CFWS] ["@" domain]) + + Returns an obs-route token with the appropriate sub-tokens (that is, + there is no obs-domain-list in the parse tree). + """ + obs_route = ObsRoute() + while value and (value[0]==',' or value[0] in CFWS_LEADER): + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + obs_route.append(token) + elif value[0] == ',': + obs_route.append(ListSeparator) + value = value[1:] + if not value or value[0] != '@': + raise errors.HeaderParseError( + "expected obs-route domain but found '{}'".format(value)) + obs_route.append(RouteComponentMarker) + token, value = get_domain(value[1:]) + obs_route.append(token) + while value and value[0]==',': + obs_route.append(ListSeparator) + value = value[1:] + if not value: + break + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + obs_route.append(token) + if value[0] == '@': + obs_route.append(RouteComponentMarker) + token, value = get_domain(value[1:]) + obs_route.append(token) + if not value: + raise errors.HeaderParseError("end of header while parsing obs-route") + if value[0] != ':': + raise errors.HeaderParseError( "expected ':' marking end of " + "obs-route but found '{}'".format(value)) + obs_route.append(ValueTerminal(':', 'end-of-obs-route-marker')) + return obs_route, value[1:] + +def get_angle_addr(value): + """ angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr + obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS] + + """ + angle_addr = AngleAddr() + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + angle_addr.append(token) + if not value or value[0] != '<': + raise errors.HeaderParseError( + "expected angle-addr but found '{}'".format(value)) + angle_addr.append(ValueTerminal('<', 'angle-addr-start')) + value = value[1:] + # Although it is not legal per RFC5322, SMTP uses '<>' in certain + # circumstances. + if value[0] == '>': + angle_addr.append(ValueTerminal('>', 'angle-addr-end')) + angle_addr.defects.append(errors.InvalidHeaderDefect( + "null addr-spec in angle-addr")) + value = value[1:] + return angle_addr, value + try: + token, value = get_addr_spec(value) + except errors.HeaderParseError: + try: + token, value = get_obs_route(value) + angle_addr.defects.append(errors.ObsoleteHeaderDefect( + "obsolete route specification in angle-addr")) + except errors.HeaderParseError: + raise errors.HeaderParseError( + "expected addr-spec or obs-route but found '{}'".format(value)) + angle_addr.append(token) + token, value = get_addr_spec(value) + angle_addr.append(token) + if value and value[0] == '>': + value = value[1:] + else: + angle_addr.defects.append(errors.InvalidHeaderDefect( + "missing trailing '>' on angle-addr")) + angle_addr.append(ValueTerminal('>', 'angle-addr-end')) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + angle_addr.append(token) + return angle_addr, value + +def get_display_name(value): + """ display-name = phrase + + Because this is simply a name-rule, we don't return a display-name + token containing a phrase, but rather a display-name token with + the content of the phrase. + + """ + display_name = DisplayName() + token, value = get_phrase(value) + display_name.extend(token[:]) + display_name.defects = token.defects[:] + return display_name, value + + +def get_name_addr(value): + """ name-addr = [display-name] angle-addr + + """ + name_addr = NameAddr() + # Both the optional display name and the angle-addr can start with cfws. + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value: + raise errors.HeaderParseError( + "expected name-addr but found '{}'".format(leader)) + if value[0] != '<': + if value[0] in PHRASE_ENDS: + raise errors.HeaderParseError( + "expected name-addr but found '{}'".format(value)) + token, value = get_display_name(value) + if not value: + raise errors.HeaderParseError( + "expected name-addr but found '{}'".format(token)) + if leader is not None: + token[0][:0] = [leader] + leader = None + name_addr.append(token) + token, value = get_angle_addr(value) + if leader is not None: + token[:0] = [leader] + name_addr.append(token) + return name_addr, value + +def get_mailbox(value): + """ mailbox = name-addr / addr-spec + + """ + # The only way to figure out if we are dealing with a name-addr or an + # addr-spec is to try parsing each one. + mailbox = Mailbox() + try: + token, value = get_name_addr(value) + except errors.HeaderParseError: + try: + token, value = get_addr_spec(value) + except errors.HeaderParseError: + raise errors.HeaderParseError( + "expected mailbox but found '{}'".format(value)) + if any(isinstance(x, errors.InvalidHeaderDefect) + for x in token.all_defects): + mailbox.token_type = 'invalid-mailbox' + mailbox.append(token) + return mailbox, value + +def get_invalid_mailbox(value, endchars): + """ Read everything up to one of the chars in endchars. + + This is outside the formal grammar. The InvalidMailbox TokenList that is + returned acts like a Mailbox, but the data attributes are None. + + """ + invalid_mailbox = InvalidMailbox() + while value and value[0] not in endchars: + if value[0] in PHRASE_ENDS: + invalid_mailbox.append(ValueTerminal(value[0], + 'misplaced-special')) + value = value[1:] + else: + token, value = get_phrase(value) + invalid_mailbox.append(token) + return invalid_mailbox, value + +def get_mailbox_list(value): + """ mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list + obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS]) + + For this routine we go outside the formal grammar in order to improve error + handling. We recognize the end of the mailbox list only at the end of the + value or at a ';' (the group terminator). This is so that we can turn + invalid mailboxes into InvalidMailbox tokens and continue parsing any + remaining valid mailboxes. We also allow all mailbox entries to be null, + and this condition is handled appropriately at a higher level. + + """ + mailbox_list = MailboxList() + while value and value[0] != ';': + try: + token, value = get_mailbox(value) + mailbox_list.append(token) + except errors.HeaderParseError: + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value or value[0] in ',;': + mailbox_list.append(leader) + mailbox_list.defects.append(errors.ObsoleteHeaderDefect( + "empty element in mailbox-list")) + else: + token, value = get_invalid_mailbox(value, ',;') + if leader is not None: + token[:0] = [leader] + mailbox_list.append(token) + mailbox_list.defects.append(errors.InvalidHeaderDefect( + "invalid mailbox in mailbox-list")) + elif value[0] == ',': + mailbox_list.defects.append(errors.ObsoleteHeaderDefect( + "empty element in mailbox-list")) + else: + token, value = get_invalid_mailbox(value, ',;') + if leader is not None: + token[:0] = [leader] + mailbox_list.append(token) + mailbox_list.defects.append(errors.InvalidHeaderDefect( + "invalid mailbox in mailbox-list")) + if value and value[0] not in ',;': + # Crap after mailbox; treat it as an invalid mailbox. + # The mailbox info will still be available. + mailbox = mailbox_list[-1] + mailbox.token_type = 'invalid-mailbox' + token, value = get_invalid_mailbox(value, ',;') + mailbox.extend(token) + mailbox_list.defects.append(errors.InvalidHeaderDefect( + "invalid mailbox in mailbox-list")) + if value and value[0] == ',': + mailbox_list.append(ListSeparator) + value = value[1:] + return mailbox_list, value + + +def get_group_list(value): + """ group-list = mailbox-list / CFWS / obs-group-list + obs-group-list = 1*([CFWS] ",") [CFWS] + + """ + group_list = GroupList() + if not value: + group_list.defects.append(errors.InvalidHeaderDefect( + "end of header before group-list")) + return group_list, value + leader = None + if value and value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value: + # This should never happen in email parsing, since CFWS-only is a + # legal alternative to group-list in a group, which is the only + # place group-list appears. + group_list.defects.append(errors.InvalidHeaderDefect( + "end of header in group-list")) + group_list.append(leader) + return group_list, value + if value[0] == ';': + group_list.append(leader) + return group_list, value + token, value = get_mailbox_list(value) + if len(token.all_mailboxes)==0: + if leader is not None: + group_list.append(leader) + group_list.extend(token) + group_list.defects.append(errors.ObsoleteHeaderDefect( + "group-list with empty entries")) + return group_list, value + if leader is not None: + token[:0] = [leader] + group_list.append(token) + return group_list, value + +def get_group(value): + """ group = display-name ":" [group-list] ";" [CFWS] + + """ + group = Group() + token, value = get_display_name(value) + if not value or value[0] != ':': + raise errors.HeaderParseError("expected ':' at end of group " + "display name but found '{}'".format(value)) + group.append(token) + group.append(ValueTerminal(':', 'group-display-name-terminator')) + value = value[1:] + if value and value[0] == ';': + group.append(ValueTerminal(';', 'group-terminator')) + return group, value[1:] + token, value = get_group_list(value) + group.append(token) + if not value: + group.defects.append(errors.InvalidHeaderDefect( + "end of header in group")) + if value[0] != ';': + raise errors.HeaderParseError( + "expected ';' at end of group but found {}".format(value)) + group.append(ValueTerminal(';', 'group-terminator')) + value = value[1:] + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + group.append(token) + return group, value + +def get_address(value): + """ address = mailbox / group + + Note that counter-intuitively, an address can be either a single address or + a list of addresses (a group). This is why the returned Address object has + a 'mailboxes' attribute which treats a single address as a list of length + one. When you need to differentiate between to two cases, extract the single + element, which is either a mailbox or a group token. + + """ + # The formal grammar isn't very helpful when parsing an address. mailbox + # and group, especially when allowing for obsolete forms, start off very + # similarly. It is only when you reach one of @, <, or : that you know + # what you've got. So, we try each one in turn, starting with the more + # likely of the two. We could perhaps make this more efficient by looking + # for a phrase and then branching based on the next character, but that + # would be a premature optimization. + address = Address() + try: + token, value = get_group(value) + except errors.HeaderParseError: + try: + token, value = get_mailbox(value) + except errors.HeaderParseError: + raise errors.HeaderParseError( + "expected address but found '{}'".format(value)) + address.append(token) + return address, value + +def get_address_list(value): + """ address_list = (address *("," address)) / obs-addr-list + obs-addr-list = *([CFWS] ",") address *("," [address / CFWS]) + + We depart from the formal grammar here by continuing to parse until the end + of the input, assuming the input to be entirely composed of an + address-list. This is always true in email parsing, and allows us + to skip invalid addresses to parse additional valid ones. + + """ + address_list = AddressList() + while value: + try: + token, value = get_address(value) + address_list.append(token) + except errors.HeaderParseError as err: + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value or value[0] == ',': + address_list.append(leader) + address_list.defects.append(errors.ObsoleteHeaderDefect( + "address-list entry with no content")) + else: + token, value = get_invalid_mailbox(value, ',') + if leader is not None: + token[:0] = [leader] + address_list.append(Address([token])) + address_list.defects.append(errors.InvalidHeaderDefect( + "invalid address in address-list")) + elif value[0] == ',': + address_list.defects.append(errors.ObsoleteHeaderDefect( + "empty element in address-list")) + else: + token, value = get_invalid_mailbox(value, ',') + if leader is not None: + token[:0] = [leader] + address_list.append(Address([token])) + address_list.defects.append(errors.InvalidHeaderDefect( + "invalid address in address-list")) + if value and value[0] != ',': + # Crap after address; treat it as an invalid mailbox. + # The mailbox info will still be available. + mailbox = address_list[-1][0] + mailbox.token_type = 'invalid-mailbox' + token, value = get_invalid_mailbox(value, ',') + mailbox.extend(token) + address_list.defects.append(errors.InvalidHeaderDefect( + "invalid address in address-list")) + if value: # Must be a , at this point. + address_list.append(ValueTerminal(',', 'list-separator')) + value = value[1:] + return address_list, value + +# +# XXX: As I begin to add additional header parsers, I'm realizing we probably +# have two level of parser routines: the get_XXX methods that get a token in +# the grammar, and parse_XXX methods that parse an entire field value. So +# get_address_list above should really be a parse_ method, as probably should +# be get_unstructured. +# + +def parse_mime_version(value): + """ mime-version = [CFWS] 1*digit [CFWS] "." [CFWS] 1*digit [CFWS] + + """ + # The [CFWS] is implicit in the RFC 2045 BNF. + # XXX: This routine is a bit verbose, should factor out a get_int method. + mime_version = MIMEVersion() + if not value: + mime_version.defects.append(errors.HeaderMissingRequiredValue( + "Missing MIME version number (eg: 1.0)")) + return mime_version + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + mime_version.append(token) + if not value: + mime_version.defects.append(errors.HeaderMissingRequiredValue( + "Expected MIME version number but found only CFWS")) + digits = '' + while value and value[0] != '.' and value[0] not in CFWS_LEADER: + digits += value[0] + value = value[1:] + if not digits.isdigit(): + mime_version.defects.append(errors.InvalidHeaderDefect( + "Expected MIME major version number but found {!r}".format(digits))) + mime_version.append(ValueTerminal(digits, 'xtext')) + else: + mime_version.major = int(digits) + mime_version.append(ValueTerminal(digits, 'digits')) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + mime_version.append(token) + if not value or value[0] != '.': + if mime_version.major is not None: + mime_version.defects.append(errors.InvalidHeaderDefect( + "Incomplete MIME version; found only major number")) + if value: + mime_version.append(ValueTerminal(value, 'xtext')) + return mime_version + mime_version.append(ValueTerminal('.', 'version-separator')) + value = value[1:] + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + mime_version.append(token) + if not value: + if mime_version.major is not None: + mime_version.defects.append(errors.InvalidHeaderDefect( + "Incomplete MIME version; found only major number")) + return mime_version + digits = '' + while value and value[0] not in CFWS_LEADER: + digits += value[0] + value = value[1:] + if not digits.isdigit(): + mime_version.defects.append(errors.InvalidHeaderDefect( + "Expected MIME minor version number but found {!r}".format(digits))) + mime_version.append(ValueTerminal(digits, 'xtext')) + else: + mime_version.minor = int(digits) + mime_version.append(ValueTerminal(digits, 'digits')) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + mime_version.append(token) + if value: + mime_version.defects.append(errors.InvalidHeaderDefect( + "Excess non-CFWS text after MIME version")) + mime_version.append(ValueTerminal(value, 'xtext')) + return mime_version + +def get_invalid_parameter(value): + """ Read everything up to the next ';'. + + This is outside the formal grammar. The InvalidParameter TokenList that is + returned acts like a Parameter, but the data attributes are None. + + """ + invalid_parameter = InvalidParameter() + while value and value[0] != ';': + if value[0] in PHRASE_ENDS: + invalid_parameter.append(ValueTerminal(value[0], + 'misplaced-special')) + value = value[1:] + else: + token, value = get_phrase(value) + invalid_parameter.append(token) + return invalid_parameter, value + +def get_ttext(value): + """ttext = <matches _ttext_matcher> + + We allow any non-TOKEN_ENDS in ttext, but add defects to the token's + defects list if we find non-ttext characters. We also register defects for + *any* non-printables even though the RFC doesn't exclude all of them, + because we follow the spirit of RFC 5322. + + """ + m = _non_token_end_matcher(value) + if not m: + raise errors.HeaderParseError( + "expected ttext but found '{}'".format(value)) + ttext = m.group() + value = value[len(ttext):] + ttext = ValueTerminal(ttext, 'ttext') + _validate_xtext(ttext) + return ttext, value + +def get_token(value): + """token = [CFWS] 1*ttext [CFWS] + + The RFC equivalent of ttext is any US-ASCII chars except space, ctls, or + tspecials. We also exclude tabs even though the RFC doesn't. + + The RFC implies the CFWS but is not explicit about it in the BNF. + + """ + mtoken = Token() + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + mtoken.append(token) + if value and value[0] in TOKEN_ENDS: + raise errors.HeaderParseError( + "expected token but found '{}'".format(value)) + token, value = get_ttext(value) + mtoken.append(token) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + mtoken.append(token) + return mtoken, value + +def get_attrtext(value): + """attrtext = 1*(any non-ATTRIBUTE_ENDS character) + + We allow any non-ATTRIBUTE_ENDS in attrtext, but add defects to the + token's defects list if we find non-attrtext characters. We also register + defects for *any* non-printables even though the RFC doesn't exclude all of + them, because we follow the spirit of RFC 5322. + + """ + m = _non_attribute_end_matcher(value) + if not m: + raise errors.HeaderParseError( + "expected attrtext but found {!r}".format(value)) + attrtext = m.group() + value = value[len(attrtext):] + attrtext = ValueTerminal(attrtext, 'attrtext') + _validate_xtext(attrtext) + return attrtext, value + +def get_attribute(value): + """ [CFWS] 1*attrtext [CFWS] + + This version of the BNF makes the CFWS explicit, and as usual we use a + value terminal for the actual run of characters. The RFC equivalent of + attrtext is the token characters, with the subtraction of '*', "'", and '%'. + We include tab in the excluded set just as we do for token. + + """ + attribute = Attribute() + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + attribute.append(token) + if value and value[0] in ATTRIBUTE_ENDS: + raise errors.HeaderParseError( + "expected token but found '{}'".format(value)) + token, value = get_attrtext(value) + attribute.append(token) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + attribute.append(token) + return attribute, value + +def get_extended_attrtext(value): + """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') + + This is a special parsing routine so that we get a value that + includes % escapes as a single string (which we decode as a single + string later). + + """ + m = _non_extended_attribute_end_matcher(value) + if not m: + raise errors.HeaderParseError( + "expected extended attrtext but found {!r}".format(value)) + attrtext = m.group() + value = value[len(attrtext):] + attrtext = ValueTerminal(attrtext, 'extended-attrtext') + _validate_xtext(attrtext) + return attrtext, value + +def get_extended_attribute(value): + """ [CFWS] 1*extended_attrtext [CFWS] + + This is like the non-extended version except we allow % characters, so that + we can pick up an encoded value as a single string. + + """ + # XXX: should we have an ExtendedAttribute TokenList? + attribute = Attribute() + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + attribute.append(token) + if value and value[0] in EXTENDED_ATTRIBUTE_ENDS: + raise errors.HeaderParseError( + "expected token but found '{}'".format(value)) + token, value = get_extended_attrtext(value) + attribute.append(token) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + attribute.append(token) + return attribute, value + +def get_section(value): + """ '*' digits + + The formal BNF is more complicated because leading 0s are not allowed. We + check for that and add a defect. We also assume no CFWS is allowed between + the '*' and the digits, though the RFC is not crystal clear on that. + The caller should already have dealt with leading CFWS. + + """ + section = Section() + if not value or value[0] != '*': + raise errors.HeaderParseError("Expected section but found {}".format( + value)) + section.append(ValueTerminal('*', 'section-marker')) + value = value[1:] + if not value or not value[0].isdigit(): + raise errors.HeaderParseError("Expected section number but " + "found {}".format(value)) + digits = '' + while value and value[0].isdigit(): + digits += value[0] + value = value[1:] + if digits[0] == '0' and digits != '0': + section.defects.append(errors.InvalidHeaderError("section number" + "has an invalid leading 0")) + section.number = int(digits) + section.append(ValueTerminal(digits, 'digits')) + return section, value + + +def get_value(value): + """ quoted-string / attribute + + """ + v = Value() + if not value: + raise errors.HeaderParseError("Expected value but found end of string") + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value: + raise errors.HeaderParseError("Expected value but found " + "only {}".format(leader)) + if value[0] == '"': + token, value = get_quoted_string(value) + else: + token, value = get_extended_attribute(value) + if leader is not None: + token[:0] = [leader] + v.append(token) + return v, value + +def get_parameter(value): + """ attribute [section] ["*"] [CFWS] "=" value + + The CFWS is implied by the RFC but not made explicit in the BNF. This + simplified form of the BNF from the RFC is made to conform with the RFC BNF + through some extra checks. We do it this way because it makes both error + recovery and working with the resulting parse tree easier. + """ + # It is possible CFWS would also be implicitly allowed between the section + # and the 'extended-attribute' marker (the '*') , but we've never seen that + # in the wild and we will therefore ignore the possibility. + param = Parameter() + token, value = get_attribute(value) + param.append(token) + if not value or value[0] == ';': + param.defects.append(errors.InvalidHeaderDefect("Parameter contains " + "name ({}) but no value".format(token))) + return param, value + if value[0] == '*': + try: + token, value = get_section(value) + param.sectioned = True + param.append(token) + except errors.HeaderParseError: + pass + if not value: + raise errors.HeaderParseError("Incomplete parameter") + if value[0] == '*': + param.append(ValueTerminal('*', 'extended-parameter-marker')) + value = value[1:] + param.extended = True + if value[0] != '=': + raise errors.HeaderParseError("Parameter not followed by '='") + param.append(ValueTerminal('=', 'parameter-separator')) + value = value[1:] + leader = None + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + param.append(token) + remainder = None + appendto = param + if param.extended and value and value[0] == '"': + # Now for some serious hackery to handle the common invalid case of + # double quotes around an extended value. We also accept (with defect) + # a value marked as encoded that isn't really. + qstring, remainder = get_quoted_string(value) + inner_value = qstring.stripped_value + semi_valid = False + if param.section_number == 0: + if inner_value and inner_value[0] == "'": + semi_valid = True + else: + token, rest = get_attrtext(inner_value) + if rest and rest[0] == "'": + semi_valid = True + else: + try: + token, rest = get_extended_attrtext(inner_value) + except: + pass + else: + if not rest: + semi_valid = True + if semi_valid: + param.defects.append(errors.InvalidHeaderDefect( + "Quoted string value for extended parameter is invalid")) + param.append(qstring) + for t in qstring: + if t.token_type == 'bare-quoted-string': + t[:] = [] + appendto = t + break + value = inner_value + else: + remainder = None + param.defects.append(errors.InvalidHeaderDefect( + "Parameter marked as extended but appears to have a " + "quoted string value that is non-encoded")) + if value and value[0] == "'": + token = None + else: + token, value = get_value(value) + if not param.extended or param.section_number > 0: + if not value or value[0] != "'": + appendto.append(token) + if remainder is not None: + assert not value, value + value = remainder + return param, value + param.defects.append(errors.InvalidHeaderDefect( + "Apparent initial-extended-value but attribute " + "was not marked as extended or was not initial section")) + if not value: + # Assume the charset/lang is missing and the token is the value. + param.defects.append(errors.InvalidHeaderDefect( + "Missing required charset/lang delimiters")) + appendto.append(token) + if remainder is None: + return param, value + else: + if token is not None: + for t in token: + if t.token_type == 'extended-attrtext': + break + t.token_type == 'attrtext' + appendto.append(t) + param.charset = t.value + if value[0] != "'": + raise errors.HeaderParseError("Expected RFC2231 char/lang encoding " + "delimiter, but found {!r}".format(value)) + appendto.append(ValueTerminal("'", 'RFC2231 delimiter')) + value = value[1:] + if value and value[0] != "'": + token, value = get_attrtext(value) + appendto.append(token) + param.lang = token.value + if not value or value[0] != "'": + raise errors.HeaderParseError("Expected RFC2231 char/lang encoding " + "delimiter, but found {}".format(value)) + appendto.append(ValueTerminal("'", 'RFC2231 delimiter')) + value = value[1:] + if remainder is not None: + # Treat the rest of value as bare quoted string content. + v = Value() + while value: + if value[0] in WSP: + token, value = get_fws(value) + else: + token, value = get_qcontent(value) + v.append(token) + token = v + else: + token, value = get_value(value) + appendto.append(token) + if remainder is not None: + assert not value, value + value = remainder + return param, value + +def parse_mime_parameters(value): + """ parameter *( ";" parameter ) + + That BNF is meant to indicate this routine should only be called after + finding and handling the leading ';'. There is no corresponding rule in + the formal RFC grammar, but it is more convenient for us for the set of + parameters to be treated as its own TokenList. + + This is 'parse' routine because it consumes the reminaing value, but it + would never be called to parse a full header. Instead it is called to + parse everything after the non-parameter value of a specific MIME header. + + """ + mime_parameters = MimeParameters() + while value: + try: + token, value = get_parameter(value) + mime_parameters.append(token) + except errors.HeaderParseError as err: + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value: + mime_parameters.append(leader) + return mime_parameters + if value[0] == ';': + if leader is not None: + mime_parameters.append(leader) + mime_parameters.defects.append(errors.InvalidHeaderDefect( + "parameter entry with no content")) + else: + token, value = get_invalid_parameter(value) + if leader: + token[:0] = [leader] + mime_parameters.append(token) + mime_parameters.defects.append(errors.InvalidHeaderDefect( + "invalid parameter {!r}".format(token))) + if value and value[0] != ';': + # Junk after the otherwise valid parameter. Mark it as + # invalid, but it will have a value. + param = mime_parameters[-1] + param.token_type = 'invalid-parameter' + token, value = get_invalid_parameter(value) + param.extend(token) + mime_parameters.defects.append(errors.InvalidHeaderDefect( + "parameter with invalid trailing text {!r}".format(token))) + if value: + # Must be a ';' at this point. + mime_parameters.append(ValueTerminal(';', 'parameter-separator')) + value = value[1:] + return mime_parameters + +def _find_mime_parameters(tokenlist, value): + """Do our best to find the parameters in an invalid MIME header + + """ + while value and value[0] != ';': + if value[0] in PHRASE_ENDS: + tokenlist.append(ValueTerminal(value[0], 'misplaced-special')) + value = value[1:] + else: + token, value = get_phrase(value) + tokenlist.append(token) + if not value: + return + tokenlist.append(ValueTerminal(';', 'parameter-separator')) + tokenlist.append(parse_mime_parameters(value[1:])) + +def parse_content_type_header(value): + """ maintype "/" subtype *( ";" parameter ) + + The maintype and substype are tokens. Theoretically they could + be checked against the official IANA list + x-token, but we + don't do that. + """ + ctype = ContentType() + recover = False + if not value: + ctype.defects.append(errors.HeaderMissingRequiredValue( + "Missing content type specification")) + return ctype + try: + token, value = get_token(value) + except errors.HeaderParseError: + ctype.defects.append(errors.InvalidHeaderDefect( + "Expected content maintype but found {!r}".format(value))) + _find_mime_parameters(ctype, value) + return ctype + ctype.append(token) + # XXX: If we really want to follow the formal grammer we should make + # mantype and subtype specialized TokenLists here. Probably not worth it. + if not value or value[0] != '/': + ctype.defects.append(errors.InvalidHeaderDefect( + "Invalid content type")) + if value: + _find_mime_parameters(ctype, value) + return ctype + ctype.maintype = token.value.strip().lower() + ctype.append(ValueTerminal('/', 'content-type-separator')) + value = value[1:] + try: + token, value = get_token(value) + except errors.HeaderParseError: + ctype.defects.append(errors.InvalidHeaderDefect( + "Expected content subtype but found {!r}".format(value))) + _find_mime_parameters(ctype, value) + return ctype + ctype.append(token) + ctype.subtype = token.value.strip().lower() + if not value: + return ctype + if value[0] != ';': + ctype.defects.append(errors.InvalidHeaderDefect( + "Only parameters are valid after content type, but " + "found {!r}".format(value))) + # The RFC requires that a syntactically invalid content-type be treated + # as text/plain. Perhaps we should postel this, but we should probably + # only do that if we were checking the subtype value against IANA. + del ctype.maintype, ctype.subtype + _find_mime_parameters(ctype, value) + return ctype + ctype.append(ValueTerminal(';', 'parameter-separator')) + ctype.append(parse_mime_parameters(value[1:])) + return ctype + +def parse_content_disposition_header(value): + """ disposition-type *( ";" parameter ) + + """ + disp_header = ContentDisposition() + if not value: + disp_header.defects.append(errors.HeaderMissingRequiredValue( + "Missing content disposition")) + return disp_header + try: + token, value = get_token(value) + except errors.HeaderParseError: + ctype.defects.append(errors.InvalidHeaderDefect( + "Expected content disposition but found {!r}".format(value))) + _find_mime_parameters(disp_header, value) + return disp_header + disp_header.append(token) + disp_header.content_disposition = token.value.strip().lower() + if not value: + return disp_header + if value[0] != ';': + disp_header.defects.append(errors.InvalidHeaderDefect( + "Only parameters are valid after content disposition, but " + "found {!r}".format(value))) + _find_mime_parameters(disp_header, value) + return disp_header + disp_header.append(ValueTerminal(';', 'parameter-separator')) + disp_header.append(parse_mime_parameters(value[1:])) + return disp_header + +def parse_content_transfer_encoding_header(value): + """ mechanism + + """ + # We should probably validate the values, since the list is fixed. + cte_header = ContentTransferEncoding() + if not value: + cte_header.defects.append(errors.HeaderMissingRequiredValue( + "Missing content transfer encoding")) + return cte_header + try: + token, value = get_token(value) + except errors.HeaderParseError: + ctype.defects.append(errors.InvalidHeaderDefect( + "Expected content trnasfer encoding but found {!r}".format(value))) + else: + cte_header.append(token) + cte_header.cte = token.value.strip().lower() + if not value: + return cte_header + while value: + cte_header.defects.append(errors.InvalidHeaderDefect( + "Extra text after content transfer encoding")) + if value[0] in PHRASE_ENDS: + cte_header.append(ValueTerminal(value[0], 'misplaced-special')) + value = value[1:] + else: + token, value = get_phrase(value) + cte_header.append(token) + return cte_header diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py index 79573c6..cdfa372 100644 --- a/Lib/email/_parseaddr.py +++ b/Lib/email/_parseaddr.py @@ -47,6 +47,25 @@ def parsedate_tz(data): Accounts for military timezones. """ + res = _parsedate_tz(data) + if not res: + return + if res[9] is None: + res[9] = 0 + return tuple(res) + +def _parsedate_tz(data): + """Convert date to extended time tuple. + + The last (additional) element is the time zone offset in seconds, except if + the timezone was specified as -0000. In that case the last element is + None. This indicates a UTC timestamp that explicitly declaims knowledge of + the source timezone, as opposed to a +0000 timestamp that indicates the + source timezone really was UTC. + + """ + if not data: + return data = data.split() # The FWS after the comma after the day-of-week is optional, so search and # adjust for this. @@ -99,6 +118,14 @@ def parsedate_tz(data): tss = '0' elif len(tm) == 3: [thh, tmm, tss] = tm + elif len(tm) == 1 and '.' in tm[0]: + # Some non-compliant MUAs use '.' to separate time elements. + tm = tm[0].split('.') + if len(tm) == 2: + [thh, tmm] = tm + tss = 0 + elif len(tm) == 3: + [thh, tmm, tss] = tm else: return None try: @@ -130,6 +157,8 @@ def parsedate_tz(data): tzoffset = int(tz) except ValueError: pass + if tzoffset==0 and tz.startswith('-'): + tzoffset = None # Convert a timezone offset into seconds ; -0500 -> -18000 if tzoffset: if tzoffset < 0: @@ -139,7 +168,7 @@ def parsedate_tz(data): tzsign = 1 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) # Daylight Saving Time flag is set to -1, since DST is unknown. - return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset + return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset] def parsedate(data): diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py new file mode 100644 index 0000000..8106114 --- /dev/null +++ b/Lib/email/_policybase.py @@ -0,0 +1,358 @@ +"""Policy framework for the email package. + +Allows fine grained feature control of how the package parses and emits data. +""" + +import abc +from email import header +from email import charset as _charset +from email.utils import _has_surrogates + +__all__ = [ + 'Policy', + 'Compat32', + 'compat32', + ] + + +class _PolicyBase: + + """Policy Object basic framework. + + This class is useless unless subclassed. A subclass should define + class attributes with defaults for any values that are to be + managed by the Policy object. The constructor will then allow + non-default values to be set for these attributes at instance + creation time. The instance will be callable, taking these same + attributes keyword arguments, and returning a new instance + identical to the called instance except for those values changed + by the keyword arguments. Instances may be added, yielding new + instances with any non-default values from the right hand + operand overriding those in the left hand operand. That is, + + A + B == A(<non-default values of B>) + + The repr of an instance can be used to reconstruct the object + if and only if the repr of the values can be used to reconstruct + those values. + + """ + + def __init__(self, **kw): + """Create new Policy, possibly overriding some defaults. + + See class docstring for a list of overridable attributes. + + """ + for name, value in kw.items(): + if hasattr(self, name): + super(_PolicyBase,self).__setattr__(name, value) + else: + raise TypeError( + "{!r} is an invalid keyword argument for {}".format( + name, self.__class__.__name__)) + + def __repr__(self): + args = [ "{}={!r}".format(name, value) + for name, value in self.__dict__.items() ] + return "{}({})".format(self.__class__.__name__, ', '.join(args)) + + def clone(self, **kw): + """Return a new instance with specified attributes changed. + + The new instance has the same attribute values as the current object, + except for the changes passed in as keyword arguments. + + """ + newpolicy = self.__class__.__new__(self.__class__) + for attr, value in self.__dict__.items(): + object.__setattr__(newpolicy, attr, value) + for attr, value in kw.items(): + if not hasattr(self, attr): + raise TypeError( + "{!r} is an invalid keyword argument for {}".format( + attr, self.__class__.__name__)) + object.__setattr__(newpolicy, attr, value) + return newpolicy + + def __setattr__(self, name, value): + if hasattr(self, name): + msg = "{!r} object attribute {!r} is read-only" + else: + msg = "{!r} object has no attribute {!r}" + raise AttributeError(msg.format(self.__class__.__name__, name)) + + def __add__(self, other): + """Non-default values from right operand override those from left. + + The object returned is a new instance of the subclass. + + """ + return self.clone(**other.__dict__) + + +def _append_doc(doc, added_doc): + doc = doc.rsplit('\n', 1)[0] + added_doc = added_doc.split('\n', 1)[1] + return doc + '\n' + added_doc + +def _extend_docstrings(cls): + if cls.__doc__ and cls.__doc__.startswith('+'): + cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__) + for name, attr in cls.__dict__.items(): + if attr.__doc__ and attr.__doc__.startswith('+'): + for c in (c for base in cls.__bases__ for c in base.mro()): + doc = getattr(getattr(c, name), '__doc__') + if doc: + attr.__doc__ = _append_doc(doc, attr.__doc__) + break + return cls + + +class Policy(_PolicyBase, metaclass=abc.ABCMeta): + + r"""Controls for how messages are interpreted and formatted. + + Most of the classes and many of the methods in the email package accept + Policy objects as parameters. A Policy object contains a set of values and + functions that control how input is interpreted and how output is rendered. + For example, the parameter 'raise_on_defect' controls whether or not an RFC + violation results in an error being raised or not, while 'max_line_length' + controls the maximum length of output lines when a Message is serialized. + + Any valid attribute may be overridden when a Policy is created by passing + it as a keyword argument to the constructor. Policy objects are immutable, + but a new Policy object can be created with only certain values changed by + calling the Policy instance with keyword arguments. Policy objects can + also be added, producing a new Policy object in which the non-default + attributes set in the right hand operand overwrite those specified in the + left operand. + + Settable attributes: + + raise_on_defect -- If true, then defects should be raised as errors. + Default: False. + + linesep -- string containing the value to use as separation + between output lines. Default '\n'. + + cte_type -- Type of allowed content transfer encodings + + 7bit -- ASCII only + 8bit -- Content-Transfer-Encoding: 8bit is allowed + + Default: 8bit. Also controls the disposition of + (RFC invalid) binary data in headers; see the + documentation of the binary_fold method. + + max_line_length -- maximum length of lines, excluding 'linesep', + during serialization. None or 0 means no line + wrapping is done. Default is 78. + + """ + + raise_on_defect = False + linesep = '\n' + cte_type = '8bit' + max_line_length = 78 + + def handle_defect(self, obj, defect): + """Based on policy, either raise defect or call register_defect. + + handle_defect(obj, defect) + + defect should be a Defect subclass, but in any case must be an + Exception subclass. obj is the object on which the defect should be + registered if it is not raised. If the raise_on_defect is True, the + defect is raised as an error, otherwise the object and the defect are + passed to register_defect. + + This method is intended to be called by parsers that discover defects. + The email package parsers always call it with Defect instances. + + """ + if self.raise_on_defect: + raise defect + self.register_defect(obj, defect) + + def register_defect(self, obj, defect): + """Record 'defect' on 'obj'. + + Called by handle_defect if raise_on_defect is False. This method is + part of the Policy API so that Policy subclasses can implement custom + defect handling. The default implementation calls the append method of + the defects attribute of obj. The objects used by the email package by + default that get passed to this method will always have a defects + attribute with an append method. + + """ + obj.defects.append(defect) + + def header_max_count(self, name): + """Return the maximum allowed number of headers named 'name'. + + Called when a header is added to a Message object. If the returned + value is not 0 or None, and there are already a number of headers with + the name 'name' equal to the value returned, a ValueError is raised. + + Because the default behavior of Message's __setitem__ is to append the + value to the list of headers, it is easy to create duplicate headers + without realizing it. This method allows certain headers to be limited + in the number of instances of that header that may be added to a + Message programmatically. (The limit is not observed by the parser, + which will faithfully produce as many headers as exist in the message + being parsed.) + + The default implementation returns None for all header names. + """ + return None + + @abc.abstractmethod + def header_source_parse(self, sourcelines): + """Given a list of linesep terminated strings constituting the lines of + a single header, return the (name, value) tuple that should be stored + in the model. The input lines should retain their terminating linesep + characters. The lines passed in by the email package may contain + surrogateescaped binary data. + """ + raise NotImplementedError + + @abc.abstractmethod + def header_store_parse(self, name, value): + """Given the header name and the value provided by the application + program, return the (name, value) that should be stored in the model. + """ + raise NotImplementedError + + @abc.abstractmethod + def header_fetch_parse(self, name, value): + """Given the header name and the value from the model, return the value + to be returned to the application program that is requesting that + header. The value passed in by the email package may contain + surrogateescaped binary data if the lines were parsed by a BytesParser. + The returned value should not contain any surrogateescaped data. + + """ + raise NotImplementedError + + @abc.abstractmethod + def fold(self, name, value): + """Given the header name and the value from the model, return a string + containing linesep characters that implement the folding of the header + according to the policy controls. The value passed in by the email + package may contain surrogateescaped binary data if the lines were + parsed by a BytesParser. The returned value should not contain any + surrogateescaped data. + + """ + raise NotImplementedError + + @abc.abstractmethod + def fold_binary(self, name, value): + """Given the header name and the value from the model, return binary + data containing linesep characters that implement the folding of the + header according to the policy controls. The value passed in by the + email package may contain surrogateescaped binary data. + + """ + raise NotImplementedError + + +@_extend_docstrings +class Compat32(Policy): + + """+ + This particular policy is the backward compatibility Policy. It + replicates the behavior of the email package version 5.1. + """ + + def _sanitize_header(self, name, value): + # If the header value contains surrogates, return a Header using + # the unknown-8bit charset to encode the bytes as encoded words. + if not isinstance(value, str): + # Assume it is already a header object + return value + if _has_surrogates(value): + return header.Header(value, charset=_charset.UNKNOWN8BIT, + header_name=name) + else: + return value + + def header_source_parse(self, sourcelines): + """+ + The name is parsed as everything up to the ':' and returned unmodified. + The value is determined by stripping leading whitespace off the + remainder of the first line, joining all subsequent lines together, and + stripping any trailing carriage return or linefeed characters. + + """ + name, value = sourcelines[0].split(':', 1) + value = value.lstrip(' \t') + ''.join(sourcelines[1:]) + return (name, value.rstrip('\r\n')) + + def header_store_parse(self, name, value): + """+ + The name and value are returned unmodified. + """ + return (name, value) + + def header_fetch_parse(self, name, value): + """+ + If the value contains binary data, it is converted into a Header object + using the unknown-8bit charset. Otherwise it is returned unmodified. + """ + return self._sanitize_header(name, value) + + def fold(self, name, value): + """+ + Headers are folded using the Header folding algorithm, which preserves + existing line breaks in the value, and wraps each resulting line to the + max_line_length. Non-ASCII binary data are CTE encoded using the + unknown-8bit charset. + + """ + return self._fold(name, value, sanitize=True) + + def fold_binary(self, name, value): + """+ + Headers are folded using the Header folding algorithm, which preserves + existing line breaks in the value, and wraps each resulting line to the + max_line_length. If cte_type is 7bit, non-ascii binary data is CTE + encoded using the unknown-8bit charset. Otherwise the original source + header is used, with its existing line breaks and/or binary data. + + """ + folded = self._fold(name, value, sanitize=self.cte_type=='7bit') + return folded.encode('ascii', 'surrogateescape') + + def _fold(self, name, value, sanitize): + parts = [] + parts.append('%s: ' % name) + if isinstance(value, str): + if _has_surrogates(value): + if sanitize: + h = header.Header(value, + charset=_charset.UNKNOWN8BIT, + header_name=name) + else: + # If we have raw 8bit data in a byte string, we have no idea + # what the encoding is. There is no safe way to split this + # string. If it's ascii-subset, then we could do a normal + # ascii split, but if it's multibyte then we could break the + # string. There's no way to know so the least harm seems to + # be to not split the string and risk it being too long. + parts.append(value) + h = None + else: + h = header.Header(value, header_name=name) + else: + # Assume it is a Header-like object. + h = value + if h is not None: + parts.append(h.encode(linesep=self.linesep, + maxlinelen=self.max_line_length)) + parts.append(self.linesep) + return ''.join(parts) + + +compat32 = Compat32() diff --git a/Lib/email/architecture.rst b/Lib/email/architecture.rst new file mode 100644 index 0000000..80d24fe --- /dev/null +++ b/Lib/email/architecture.rst @@ -0,0 +1,216 @@ +:mod:`email` Package Architecture +================================= + +Overview +-------- + +The email package consists of three major components: + + Model + An object structure that represents an email message, and provides an + API for creating, querying, and modifying a message. + + Parser + Takes a sequence of characters or bytes and produces a model of the + email message represented by those characters or bytes. + + Generator + Takes a model and turns it into a sequence of characters or bytes. The + sequence can either be intended for human consumption (a printable + unicode string) or bytes suitable for transmission over the wire. In + the latter case all data is properly encoded using the content transfer + encodings specified by the relevant RFCs. + +Conceptually the package is organized around the model. The model provides both +"external" APIs intended for use by application programs using the library, +and "internal" APIs intended for use by the Parser and Generator components. +This division is intentionally a bit fuzy; the API described by this documentation +is all a public, stable API. This allows for an application with special needs +to implement its own parser and/or generator. + +In addition to the three major functional components, there is a third key +component to the architecture: + + Policy + An object that specifies various behavioral settings and carries + implementations of various behavior-controlling methods. + +The Policy framework provides a simple and convenient way to control the +behavior of the library, making it possible for the library to be used in a +very flexible fashion while leveraging the common code required to parse, +represent, and generate message-like objects. For example, in addition to the +default :rfc:`5322` email message policy, we also have a policy that manages +HTTP headers in a fashion compliant with :rfc:`2616`. Individual policy +controls, such as the maximum line length produced by the generator, can also +be controlled individually to meet specialized application requirements. + + +The Model +--------- + +The message model is implemented by the :class:`~email.message.Message` class. +The model divides a message into the two fundamental parts discussed by the +RFC: the header section and the body. The `Message` object acts as a +pseudo-dictionary of named headers. Its dictionary interface provides +convenient access to individual headers by name. However, all headers are kept +internally in an ordered list, so that the information about the order of the +headers in the original message is preserved. + +The `Message` object also has a `payload` that holds the body. A `payload` can +be one of two things: data, or a list of `Message` objects. The latter is used +to represent a multipart MIME message. Lists can be nested arbitrarily deeply +in order to represent the message, with all terminal leaves having non-list +data payloads. + + +Message Lifecycle +----------------- + +The general lifecyle of a message is: + + Creation + A `Message` object can be created by a Parser, or it can be + instantiated as an empty message by an application. + + Manipulation + The application may examine one or more headers, and/or the + payload, and it may modify one or more headers and/or + the payload. This may be done on the top level `Message` + object, or on any sub-object. + + Finalization + The Model is converted into a unicode or binary stream, + or the model is discarded. + + + +Header Policy Control During Lifecycle +-------------------------------------- + +One of the major controls exerted by the Policy is the management of headers +during the `Message` lifecycle. Most applications don't need to be aware of +this. + +A header enters the model in one of two ways: via a Parser, or by being set to +a specific value by an application program after the Model already exists. +Similarly, a header exits the model in one of two ways: by being serialized by +a Generator, or by being retrieved from a Model by an application program. The +Policy object provides hooks for all four of these pathways. + +The model storage for headers is a list of (name, value) tuples. + +The Parser identifies headers during parsing, and passes them to the +:meth:`~email.policy.Policy.header_source_parse` method of the Policy. The +result of that method is the (name, value) tuple to be stored in the model. + +When an application program supplies a header value (for example, through the +`Message` object `__setitem__` interface), the name and the value are passed to +the :meth:`~email.policy.Policy.header_store_parse` method of the Policy, which +returns the (name, value) tuple to be stored in the model. + +When an application program retrieves a header (through any of the dict or list +interfaces of `Message`), the name and value are passed to the +:meth:`~email.policy.Policy.header_fetch_parse` method of the Policy to +obtain the value returned to the application. + +When a Generator requests a header during serialization, the name and value are +passed to the :meth:`~email.policy.Policy.fold` method of the Policy, which +returns a string containing line breaks in the appropriate places. The +:meth:`~email.policy.Policy.cte_type` Policy control determines whether or +not Content Transfer Encoding is performed on the data in the header. There is +also a :meth:`~email.policy.Policy.binary_fold` method for use by generators +that produce binary output, which returns the folded header as binary data, +possibly folded at different places than the corresponding string would be. + + +Handling Binary Data +-------------------- + +In an ideal world all message data would conform to the RFCs, meaning that the +parser could decode the message into the idealized unicode message that the +sender originally wrote. In the real world, the email package must also be +able to deal with badly formatted messages, including messages containing +non-ASCII characters that either have no indicated character set or are not +valid characters in the indicated character set. + +Since email messages are *primarily* text data, and operations on message data +are primarily text operations (except for binary payloads of course), the model +stores all text data as unicode strings. Un-decodable binary inside text +data is handled by using the `surrogateescape` error handler of the ASCII +codec. As with the binary filenames the error handler was introduced to +handle, this allows the email package to "carry" the binary data received +during parsing along until the output stage, at which time it is regenerated +in its original form. + +This carried binary data is almost entirely an implementation detail. The one +place where it is visible in the API is in the "internal" API. A Parser must +do the `surrogateescape` encoding of binary input data, and pass that data to +the appropriate Policy method. The "internal" interface used by the Generator +to access header values preserves the `surrogateescaped` bytes. All other +interfaces convert the binary data either back into bytes or into a safe form +(losing information in some cases). + + +Backward Compatibility +---------------------- + +The :class:`~email.policy.Policy.Compat32` Policy provides backward +compatibility with version 5.1 of the email package. It does this via the +following implementation of the four+1 Policy methods described above: + +header_source_parse + Splits the first line on the colon to obtain the name, discards any spaces + after the colon, and joins the remainder of the line with all of the + remaining lines, preserving the linesep characters to obtain the value. + Trailing carriage return and/or linefeed characters are stripped from the + resulting value string. + +header_store_parse + Returns the name and value exactly as received from the application. + +header_fetch_parse + If the value contains any `surrogateescaped` binary data, return the value + as a :class:`~email.header.Header` object, using the character set + `unknown-8bit`. Otherwise just returns the value. + +fold + Uses :class:`~email.header.Header`'s folding to fold headers in the + same way the email5.1 generator did. + +binary_fold + Same as fold, but encodes to 'ascii'. + + +New Algorithm +------------- + +header_source_parse + Same as legacy behavior. + +header_store_parse + Same as legacy behavior. + +header_fetch_parse + If the value is already a header object, returns it. Otherwise, parses the + value using the new parser, and returns the resulting object as the value. + `surrogateescaped` bytes get turned into unicode unknown character code + points. + +fold + Uses the new header folding algorithm, respecting the policy settings. + surrogateescaped bytes are encoded using the ``unknown-8bit`` charset for + ``cte_type=7bit`` or ``8bit``. Returns a string. + + At some point there will also be a ``cte_type=unicode``, and for that + policy fold will serialize the idealized unicode message with RFC-like + folding, converting any surrogateescaped bytes into the unicode + unknown character glyph. + +binary_fold + Uses the new header folding algorithm, respecting the policy settings. + surrogateescaped bytes are encoded using the `unknown-8bit` charset for + ``cte_type=7bit``, and get turned back into bytes for ``cte_type=8bit``. + Returns bytes. + + At some point there will also be a ``cte_type=unicode``, and for that + policy binary_fold will serialize the message according to :rfc:``5335``. diff --git a/Lib/email/errors.py b/Lib/email/errors.py index d52a624..791239f 100644 --- a/Lib/email/errors.py +++ b/Lib/email/errors.py @@ -5,7 +5,6 @@ """email package exception classes.""" - class MessageError(Exception): """Base class for errors in the email package.""" @@ -30,12 +29,13 @@ class CharsetError(MessageError): """An illegal charset was given.""" - # These are parsing defects which the parser was able to work around. -class MessageDefect: +class MessageDefect(ValueError): """Base class for a message defect.""" def __init__(self, line=None): + if line is not None: + super().__init__(line) self.line = line class NoBoundaryInMultipartDefect(MessageDefect): @@ -44,14 +44,64 @@ class NoBoundaryInMultipartDefect(MessageDefect): class StartBoundaryNotFoundDefect(MessageDefect): """The claimed start boundary was never found.""" +class CloseBoundaryNotFoundDefect(MessageDefect): + """A start boundary was found, but not the corresponding close boundary.""" + class FirstHeaderLineIsContinuationDefect(MessageDefect): """A message had a continuation line as its first header line.""" class MisplacedEnvelopeHeaderDefect(MessageDefect): """A 'Unix-from' header was found in the middle of a header block.""" -class MalformedHeaderDefect(MessageDefect): - """Found a header that was missing a colon, or was otherwise malformed.""" +class MissingHeaderBodySeparatorDefect(MessageDefect): + """Found line with no leading whitespace and no colon before blank line.""" +# XXX: backward compatibility, just in case (it was never emitted). +MalformedHeaderDefect = MissingHeaderBodySeparatorDefect class MultipartInvariantViolationDefect(MessageDefect): """A message claimed to be a multipart but no subparts were found.""" + +class InvalidMultipartContentTransferEncodingDefect(MessageDefect): + """An invalid content transfer encoding was set on the multipart itself.""" + +class UndecodableBytesDefect(MessageDefect): + """Header contained bytes that could not be decoded""" + +class InvalidBase64PaddingDefect(MessageDefect): + """base64 encoded sequence had an incorrect length""" + +class InvalidBase64CharactersDefect(MessageDefect): + """base64 encoded sequence had characters not in base64 alphabet""" + +# These errors are specific to header parsing. + +class HeaderDefect(MessageDefect): + """Base class for a header defect.""" + + def __init__(self, *args, **kw): + super().__init__(*args, **kw) + +class InvalidHeaderDefect(HeaderDefect): + """Header is not valid, message gives details.""" + +class HeaderMissingRequiredValue(HeaderDefect): + """A header that must have a value had none""" + +class NonPrintableDefect(HeaderDefect): + """ASCII characters outside the ascii-printable range found""" + + def __init__(self, non_printables): + super().__init__(non_printables) + self.non_printables = non_printables + + def __str__(self): + return ("the following ASCII non-printables found in header: " + "{}".format(self.non_printables)) + +class ObsoleteHeaderDefect(HeaderDefect): + """Header uses syntax declared obsolete by RFC 5322""" + +class NonASCIILocalPartDefect(HeaderDefect): + """local_part contains non-ASCII characters""" + # This defect only occurs during unicode parsing, not when + # parsing messages decoded from binary. diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py index aa8a2ff..56f50df 100644 --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -25,6 +25,7 @@ import re from email import errors from email import message +from email._policybase import compat32 NLCRE = re.compile('\r\n|\r|\n') NLCRE_bol = re.compile('(\r\n|\r|\n)') @@ -120,9 +121,6 @@ class BufferedSubFile(object): # Reverse and insert at the front of the lines. self._lines[:0] = lines[::-1] - def is_closed(self): - return self._closed - def __iter__(self): return self @@ -137,9 +135,22 @@ class BufferedSubFile(object): class FeedParser: """A feed-style parser of email.""" - def __init__(self, _factory=message.Message): - """_factory is called with no arguments to create a new message obj""" + def __init__(self, _factory=message.Message, *, policy=compat32): + """_factory is called with no arguments to create a new message obj + + The policy keyword specifies a policy object that controls a number of + aspects of the parser's operation. The default policy maintains + backward compatibility. + + """ self._factory = _factory + self.policy = policy + try: + _factory(policy=self.policy) + self._factory_kwds = lambda: {'policy': self.policy} + except TypeError: + # Assume this is an old-style factory + self._factory_kwds = lambda: {} self._input = BufferedSubFile() self._msgstack = [] self._parse = self._parsegen().__next__ @@ -171,11 +182,12 @@ class FeedParser: # Look for final set of defects if root.get_content_maintype() == 'multipart' \ and not root.is_multipart(): - root.defects.append(errors.MultipartInvariantViolationDefect()) + defect = errors.MultipartInvariantViolationDefect() + self.policy.handle_defect(root, defect) return root def _new_message(self): - msg = self._factory() + msg = self._factory(**self._factory_kwds()) if self._cur and self._cur.get_content_type() == 'multipart/digest': msg.set_default_type('message/rfc822') if self._msgstack: @@ -207,6 +219,8 @@ class FeedParser: # (i.e. newline), just throw it away. Otherwise the line is # part of the body so push it back. if not NLCRE.match(line): + defect = errors.MissingHeaderBodySeparatorDefect() + self.policy.handle_defect(self._cur, defect) self._input.unreadline(line) break headers.append(line) @@ -284,7 +298,8 @@ class FeedParser: # defined a boundary. That's a problem which we'll handle by # reading everything until the EOF and marking the message as # defective. - self._cur.defects.append(errors.NoBoundaryInMultipartDefect()) + defect = errors.NoBoundaryInMultipartDefect() + self.policy.handle_defect(self._cur, defect) lines = [] for line in self._input: if line is NeedMoreData: @@ -293,6 +308,11 @@ class FeedParser: lines.append(line) self._cur.set_payload(EMPTYSTRING.join(lines)) return + # Make sure a valid content type was specified per RFC 2045:6.4. + if (self._cur.get('content-transfer-encoding', '8bit').lower() + not in ('7bit', '8bit', 'binary')): + defect = errors.InvalidMultipartContentTransferEncodingDefect() + self.policy.handle_defect(self._cur, defect) # Create a line match predicate which matches the inter-part # boundary as well as the end-of-multipart boundary. Don't push # this onto the input stream until we've scanned past the @@ -304,6 +324,7 @@ class FeedParser: capturing_preamble = True preamble = [] linesep = False + close_boundary_seen = False while True: line = self._input.readline() if line is NeedMoreData: @@ -318,6 +339,7 @@ class FeedParser: # the closing boundary, then we need to initialize the # epilogue with the empty string (see below). if mo.group('end'): + close_boundary_seen = True linesep = mo.group('linesep') break # We saw an inter-part boundary. Were we in the preamble? @@ -386,9 +408,9 @@ class FeedParser: # We've seen either the EOF or the end boundary. If we're still # capturing the preamble, we never saw the start boundary. Note # that as a defect and store the captured text as the payload. - # Everything from here to the EOF is epilogue. if capturing_preamble: - self._cur.defects.append(errors.StartBoundaryNotFoundDefect()) + defect = errors.StartBoundaryNotFoundDefect() + self.policy.handle_defect(self._cur, defect) self._cur.set_payload(EMPTYSTRING.join(preamble)) epilogue = [] for line in self._input: @@ -397,8 +419,15 @@ class FeedParser: continue self._cur.epilogue = EMPTYSTRING.join(epilogue) return - # If the end boundary ended in a newline, we'll need to make sure - # the epilogue isn't None + # If we're not processing the preamble, then we might have seen + # EOF without seeing that end boundary...that is also a defect. + if not close_boundary_seen: + defect = errors.CloseBoundaryNotFoundDefect() + self.policy.handle_defect(self._cur, defect) + return + # Everything from here to the EOF is epilogue. If the end boundary + # ended in a newline, we'll need to make sure the epilogue isn't + # None if linesep: epilogue = [''] else: @@ -440,14 +469,12 @@ class FeedParser: # is illegal, so let's note the defect, store the illegal # line, and ignore it for purposes of headers. defect = errors.FirstHeaderLineIsContinuationDefect(line) - self._cur.defects.append(defect) + self.policy.handle_defect(self._cur, defect) continue lastvalue.append(line) continue if lastheader: - # XXX reconsider the joining of folded lines - lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n') - self._cur[lastheader] = lhdr + self._cur.set_raw(*self.policy.header_source_parse(lastvalue)) lastheader, lastvalue = '', [] # Check for envelope header, i.e. unix-from if line.startswith('From '): @@ -471,19 +498,17 @@ class FeedParser: self._cur.defects.append(defect) continue # Split the line on the colon separating field name from value. + # There will always be a colon, because if there wasn't the part of + # the parser that calls us would have started parsing the body. i = line.find(':') - if i < 0: - defect = errors.MalformedHeaderDefect(line) - self._cur.defects.append(defect) - continue + assert i>0, "_parse_headers fed line with no : and no leading WS" lastheader = line[:i] - lastvalue = [line[i+1:].lstrip()] + lastvalue = [line] # Done with all the lines, so handle the last header. if lastheader: - # XXX reconsider the joining of folded lines - self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n') + self._cur.set_raw(*self.policy.header_source_parse(lastvalue)) + - class BytesFeedParser(FeedParser): """Like FeedParser, but feed accepts bytes.""" diff --git a/Lib/email/generator.py b/Lib/email/generator.py index c6bfb70..899adbc 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -13,8 +13,10 @@ import random import warnings from io import StringIO, BytesIO +from email._policybase import compat32 from email.header import Header -from email.message import _has_surrogates +from email.utils import _has_surrogates +import email.charset as _charset UNDERSCORE = '_' NL = '\n' # XXX: no longer used by the code below. @@ -33,7 +35,8 @@ class Generator: # Public interface # - def __init__(self, outfp, mangle_from_=True, maxheaderlen=78): + def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, *, + policy=None): """Create the generator for message flattening. outfp is the output file-like object for writing the message to. It @@ -49,16 +52,22 @@ class Generator: defined in the Header class. Set maxheaderlen to zero to disable header wrapping. The default is 78, as recommended (but not required) by RFC 2822. + + The policy keyword specifies a policy object that controls a number of + aspects of the generator's operation. The default policy maintains + backward compatibility. + """ self._fp = outfp self._mangle_from_ = mangle_from_ - self._maxheaderlen = maxheaderlen + self.maxheaderlen = maxheaderlen + self.policy = policy def write(self, s): # Just delegate to the file object self._fp.write(s) - def flatten(self, msg, unixfrom=False, linesep='\n'): + def flatten(self, msg, unixfrom=False, linesep=None): r"""Print the message object tree rooted at msg to the output file specified when the Generator instance was created. @@ -70,29 +79,47 @@ class Generator: Note that for subobjects, no From_ line is printed. linesep specifies the characters used to indicate a new line in - the output. The default value is the most useful for typical - Python applications, but it can be set to \r\n to produce RFC-compliant - line separators when needed. + the output. The default value is determined by the policy. """ # We use the _XXX constants for operating on data that comes directly # from the msg, and _encoded_XXX constants for operating on data that # has already been converted (to bytes in the BytesGenerator) and # inserted into a temporary buffer. - self._NL = linesep - self._encoded_NL = self._encode(linesep) + policy = msg.policy if self.policy is None else self.policy + if linesep is not None: + policy = policy.clone(linesep=linesep) + if self.maxheaderlen is not None: + policy = policy.clone(max_line_length=self.maxheaderlen) + self._NL = policy.linesep + self._encoded_NL = self._encode(self._NL) self._EMPTY = '' self._encoded_EMTPY = self._encode('') - if unixfrom: - ufrom = msg.get_unixfrom() - if not ufrom: - ufrom = 'From nobody ' + time.ctime(time.time()) - self.write(ufrom + self._NL) - self._write(msg) + # Because we use clone (below) when we recursively process message + # subparts, and because clone uses the computed policy (not None), + # submessages will automatically get set to the computed policy when + # they are processed by this code. + old_gen_policy = self.policy + old_msg_policy = msg.policy + try: + self.policy = policy + msg.policy = policy + if unixfrom: + ufrom = msg.get_unixfrom() + if not ufrom: + ufrom = 'From nobody ' + time.ctime(time.time()) + self.write(ufrom + self._NL) + self._write(msg) + finally: + self.policy = old_gen_policy + msg.policy = old_msg_policy def clone(self, fp): """Clone this generator with the exact same options.""" - return self.__class__(fp, self._mangle_from_, self._maxheaderlen) + return self.__class__(fp, + self._mangle_from_, + None, # Use policy setting, which we've adjusted + policy=self.policy) # # Protected interface - undocumented ;/ @@ -167,16 +194,8 @@ class Generator: # def _write_headers(self, msg): - for h, v in msg.items(): - self.write('%s: ' % h) - if isinstance(v, Header): - self.write(v.encode( - maxlinelen=self._maxheaderlen, linesep=self._NL)+self._NL) - else: - # Header's got lots of smarts, so use it. - header = Header(v, maxlinelen=self._maxheaderlen, - header_name=h) - self.write(header.encode(linesep=self._NL)+self._NL) + for h, v in msg.raw_items(): + self.write(self.policy.fold(h, v)) # A blank line always separates headers from body self.write(self._NL) @@ -265,12 +284,12 @@ class Generator: # The contents of signed parts has to stay unmodified in order to keep # the signature intact per RFC1847 2.1, so we disable header wrapping. # RDM: This isn't enough to completely preserve the part, but it helps. - old_maxheaderlen = self._maxheaderlen + p = self.policy + self.policy = p.clone(max_line_length=0) try: - self._maxheaderlen = 0 self._handle_multipart(msg) finally: - self._maxheaderlen = old_maxheaderlen + self.policy = p def _handle_message_delivery_status(self, msg): # We can't just write the headers directly to self's file object @@ -305,10 +324,12 @@ class Generator: # message/rfc822. Such messages are generated by, for example, # Groupwise when forwarding unadorned messages. (Issue 7970.) So # in that case we just emit the string body. - payload = msg.get_payload() + payload = msg._payload if isinstance(payload, list): g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL) payload = s.getvalue() + else: + payload = self._encode(payload) self._fp.write(payload) # This used to be a module level function; we use a classmethod for this @@ -344,7 +365,10 @@ class BytesGenerator(Generator): Functionally identical to the base Generator except that the output is bytes and not string. When surrogates were used in the input to encode - bytes, these are decoded back to bytes for output. + bytes, these are decoded back to bytes for output. If the policy has + cte_type set to 7bit, then the message is transformed such that the + non-ASCII bytes are properly content transfer encoded, using the charset + unknown-8bit. The outfp object must accept bytes in its write method. """ @@ -365,23 +389,8 @@ class BytesGenerator(Generator): def _write_headers(self, msg): # This is almost the same as the string version, except for handling # strings with 8bit bytes. - for h, v in msg._headers: - self.write('%s: ' % h) - if isinstance(v, Header): - self.write(v.encode(maxlinelen=self._maxheaderlen)+self._NL) - elif _has_surrogates(v): - # If we have raw 8bit data in a byte string, we have no idea - # what the encoding is. There is no safe way to split this - # string. If it's ascii-subset, then we could do a normal - # ascii split, but if it's multibyte then we could break the - # string. There's no way to know so the least harm seems to - # be to not split the string and risk it being too long. - self.write(v+NL) - else: - # Header's got lots of smarts and this string is safe... - header = Header(v, maxlinelen=self._maxheaderlen, - header_name=h) - self.write(header.encode(linesep=self._NL)+self._NL) + for h, v in msg.raw_items(): + self._fp.write(self.policy.fold_binary(h, v)) # A blank line always separates headers from body self.write(self._NL) @@ -390,7 +399,7 @@ class BytesGenerator(Generator): # just write it back out. if msg._payload is None: return - if _has_surrogates(msg._payload): + if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit': if self._mangle_from_: msg._payload = fcre.sub(">From ", msg._payload) self.write(msg._payload) diff --git a/Lib/email/header.py b/Lib/email/header.py index 3250d36..a89219d 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -40,7 +40,6 @@ ecre = re.compile(r''' \? # literal ? (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string \?= # literal ?= - (?=[ \t]|$) # whitespace or the end of the string ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) # Field name regexp, including trailing colon, but not separating whitespace, @@ -86,8 +85,12 @@ def decode_header(header): words = [] for line in header.splitlines(): parts = ecre.split(line) + first = True while parts: - unencoded = parts.pop(0).strip() + unencoded = parts.pop(0) + if first: + unencoded = unencoded.lstrip() + first = False if unencoded: words.append((unencoded, None, None)) if parts: @@ -95,6 +98,16 @@ def decode_header(header): encoding = parts.pop(0).lower() encoded = parts.pop(0) words.append((encoded, encoding, charset)) + # Now loop over words and remove words that consist of whitespace + # between two encoded strings. + import sys + droplist = [] + for n, w in enumerate(words): + if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace(): + droplist.append(n-1) + for d in reversed(droplist): + del words[d] + # The next step is to decode each encoded word by applying the reverse # base64 or quopri transformation. decoded_words is now a list of the # form (decoded_word, charset). @@ -217,22 +230,27 @@ class Header: self._normalize() uchunks = [] lastcs = None + lastspace = None for string, charset in self._chunks: # We must preserve spaces between encoded and non-encoded word # boundaries, which means for us we need to add a space when we go # from a charset to None/us-ascii, or from None/us-ascii to a # charset. Only do this for the second and subsequent chunks. + # Don't add a space if the None/us-ascii string already has + # a space (trailing or leading depending on transition) nextcs = charset if nextcs == _charset.UNKNOWN8BIT: original_bytes = string.encode('ascii', 'surrogateescape') string = original_bytes.decode('ascii', 'replace') if uchunks: + hasspace = string and self._nonctext(string[0]) if lastcs not in (None, 'us-ascii'): - if nextcs in (None, 'us-ascii'): + if nextcs in (None, 'us-ascii') and not hasspace: uchunks.append(SPACE) nextcs = None - elif nextcs not in (None, 'us-ascii'): + elif nextcs not in (None, 'us-ascii') and not lastspace: uchunks.append(SPACE) + lastspace = string and self._nonctext(string[-1]) lastcs = nextcs uchunks.append(string) return EMPTYSTRING.join(uchunks) @@ -291,6 +309,11 @@ class Header: charset = UTF8 self._chunks.append((s, charset)) + def _nonctext(self, s): + """True if string s is not a ctext character of RFC822. + """ + return s.isspace() or s in ('(', ')', '\\') + def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): r"""Encode a message header into an RFC-compliant format. @@ -334,7 +357,20 @@ class Header: maxlinelen = 1000000 formatter = _ValueFormatter(self._headerlen, maxlinelen, self._continuation_ws, splitchars) + lastcs = None + hasspace = lastspace = None for string, charset in self._chunks: + if hasspace is not None: + hasspace = string and self._nonctext(string[0]) + import sys + if lastcs not in (None, 'us-ascii'): + if not hasspace or charset not in (None, 'us-ascii'): + formatter.add_transition() + elif charset not in (None, 'us-ascii') and not lastspace: + formatter.add_transition() + lastspace = string and self._nonctext(string[-1]) + lastcs = charset + hasspace = False lines = string.splitlines() if lines: formatter.feed('', lines[0], charset) @@ -351,6 +387,7 @@ class Header: formatter.feed(fws, sline, charset) if len(lines) > 1: formatter.newline() + if self._chunks: formatter.add_transition() value = formatter._str(linesep) if _embeded_header.search(value): diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py new file mode 100644 index 0000000..1fae950 --- /dev/null +++ b/Lib/email/headerregistry.py @@ -0,0 +1,583 @@ +"""Representing and manipulating email headers via custom objects. + +This module provides an implementation of the HeaderRegistry API. +The implementation is designed to flexibly follow RFC5322 rules. + +Eventually HeaderRegistry will be a public API, but it isn't yet, +and will probably change some before that happens. + +""" + +from email import utils +from email import errors +from email import _header_value_parser as parser + +class Address: + + def __init__(self, display_name='', username='', domain='', addr_spec=None): + """Create an object represeting a full email address. + + An address can have a 'display_name', a 'username', and a 'domain'. In + addition to specifying the username and domain separately, they may be + specified together by using the addr_spec keyword *instead of* the + username and domain keywords. If an addr_spec string is specified it + must be properly quoted according to RFC 5322 rules; an error will be + raised if it is not. + + An Address object has display_name, username, domain, and addr_spec + attributes, all of which are read-only. The addr_spec and the string + value of the object are both quoted according to RFC5322 rules, but + without any Content Transfer Encoding. + + """ + # This clause with its potential 'raise' may only happen when an + # application program creates an Address object using an addr_spec + # keyword. The email library code itself must always supply username + # and domain. + if addr_spec is not None: + if username or domain: + raise TypeError("addrspec specified when username and/or " + "domain also specified") + a_s, rest = parser.get_addr_spec(addr_spec) + if rest: + raise ValueError("Invalid addr_spec; only '{}' " + "could be parsed from '{}'".format( + a_s, addr_spec)) + if a_s.all_defects: + raise a_s.all_defects[0] + username = a_s.local_part + domain = a_s.domain + self._display_name = display_name + self._username = username + self._domain = domain + + @property + def display_name(self): + return self._display_name + + @property + def username(self): + return self._username + + @property + def domain(self): + return self._domain + + @property + def addr_spec(self): + """The addr_spec (username@domain) portion of the address, quoted + according to RFC 5322 rules, but with no Content Transfer Encoding. + """ + nameset = set(self.username) + if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS): + lp = parser.quote_string(self.username) + else: + lp = self.username + if self.domain: + return lp + '@' + self.domain + if not lp: + return '<>' + return lp + + def __repr__(self): + return "Address(display_name={!r}, username={!r}, domain={!r})".format( + self.display_name, self.username, self.domain) + + def __str__(self): + nameset = set(self.display_name) + if len(nameset) > len(nameset-parser.SPECIALS): + disp = parser.quote_string(self.display_name) + else: + disp = self.display_name + if disp: + addr_spec = '' if self.addr_spec=='<>' else self.addr_spec + return "{} <{}>".format(disp, addr_spec) + return self.addr_spec + + def __eq__(self, other): + if type(other) != type(self): + return False + return (self.display_name == other.display_name and + self.username == other.username and + self.domain == other.domain) + + +class Group: + + def __init__(self, display_name=None, addresses=None): + """Create an object representing an address group. + + An address group consists of a display_name followed by colon and an + list of addresses (see Address) terminated by a semi-colon. The Group + is created by specifying a display_name and a possibly empty list of + Address objects. A Group can also be used to represent a single + address that is not in a group, which is convenient when manipulating + lists that are a combination of Groups and individual Addresses. In + this case the display_name should be set to None. In particular, the + string representation of a Group whose display_name is None is the same + as the Address object, if there is one and only one Address object in + the addresses list. + + """ + self._display_name = display_name + self._addresses = tuple(addresses) if addresses else tuple() + + @property + def display_name(self): + return self._display_name + + @property + def addresses(self): + return self._addresses + + def __repr__(self): + return "Group(display_name={!r}, addresses={!r}".format( + self.display_name, self.addresses) + + def __str__(self): + if self.display_name is None and len(self.addresses)==1: + return str(self.addresses[0]) + disp = self.display_name + if disp is not None: + nameset = set(disp) + if len(nameset) > len(nameset-parser.SPECIALS): + disp = parser.quote_string(disp) + adrstr = ", ".join(str(x) for x in self.addresses) + adrstr = ' ' + adrstr if adrstr else adrstr + return "{}:{};".format(disp, adrstr) + + def __eq__(self, other): + if type(other) != type(self): + return False + return (self.display_name == other.display_name and + self.addresses == other.addresses) + + +# Header Classes # + +class BaseHeader(str): + + """Base class for message headers. + + Implements generic behavior and provides tools for subclasses. + + A subclass must define a classmethod named 'parse' that takes an unfolded + value string and a dictionary as its arguments. The dictionary will + contain one key, 'defects', initialized to an empty list. After the call + the dictionary must contain two additional keys: parse_tree, set to the + parse tree obtained from parsing the header, and 'decoded', set to the + string value of the idealized representation of the data from the value. + (That is, encoded words are decoded, and values that have canonical + representations are so represented.) + + The defects key is intended to collect parsing defects, which the message + parser will subsequently dispose of as appropriate. The parser should not, + insofar as practical, raise any errors. Defects should be added to the + list instead. The standard header parsers register defects for RFC + compliance issues, for obsolete RFC syntax, and for unrecoverable parsing + errors. + + The parse method may add additional keys to the dictionary. In this case + the subclass must define an 'init' method, which will be passed the + dictionary as its keyword arguments. The method should use (usually by + setting them as the value of similarly named attributes) and remove all the + extra keys added by its parse method, and then use super to call its parent + class with the remaining arguments and keywords. + + The subclass should also make sure that a 'max_count' attribute is defined + that is either None or 1. XXX: need to better define this API. + + """ + + def __new__(cls, name, value): + kwds = {'defects': []} + cls.parse(value, kwds) + if utils._has_surrogates(kwds['decoded']): + kwds['decoded'] = utils._sanitize(kwds['decoded']) + self = str.__new__(cls, kwds['decoded']) + del kwds['decoded'] + self.init(name, **kwds) + return self + + def init(self, name, *, parse_tree, defects): + self._name = name + self._parse_tree = parse_tree + self._defects = defects + + @property + def name(self): + return self._name + + @property + def defects(self): + return tuple(self._defects) + + def __reduce__(self): + return ( + _reconstruct_header, + ( + self.__class__.__name__, + self.__class__.__bases__, + str(self), + ), + self.__dict__) + + @classmethod + def _reconstruct(cls, value): + return str.__new__(cls, value) + + def fold(self, *, policy): + """Fold header according to policy. + + The parsed representation of the header is folded according to + RFC5322 rules, as modified by the policy. If the parse tree + contains surrogateescaped bytes, the bytes are CTE encoded using + the charset 'unknown-8bit". + + Any non-ASCII characters in the parse tree are CTE encoded using + charset utf-8. XXX: make this a policy setting. + + The returned value is an ASCII-only string possibly containing linesep + characters, and ending with a linesep character. The string includes + the header name and the ': ' separator. + + """ + # At some point we need to only put fws here if it was in the source. + header = parser.Header([ + parser.HeaderLabel([ + parser.ValueTerminal(self.name, 'header-name'), + parser.ValueTerminal(':', 'header-sep')]), + parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]), + self._parse_tree]) + return header.fold(policy=policy) + + +def _reconstruct_header(cls_name, bases, value): + return type(cls_name, bases, {})._reconstruct(value) + + +class UnstructuredHeader: + + max_count = None + value_parser = staticmethod(parser.get_unstructured) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = cls.value_parser(value) + kwds['decoded'] = str(kwds['parse_tree']) + + +class UniqueUnstructuredHeader(UnstructuredHeader): + + max_count = 1 + + +class DateHeader: + + """Header whose value consists of a single timestamp. + + Provides an additional attribute, datetime, which is either an aware + datetime using a timezone, or a naive datetime if the timezone + in the input string is -0000. Also accepts a datetime as input. + The 'value' attribute is the normalized form of the timestamp, + which means it is the output of format_datetime on the datetime. + """ + + max_count = None + + # This is used only for folding, not for creating 'decoded'. + value_parser = staticmethod(parser.get_unstructured) + + @classmethod + def parse(cls, value, kwds): + if not value: + kwds['defects'].append(errors.HeaderMissingRequiredValue()) + kwds['datetime'] = None + kwds['decoded'] = '' + kwds['parse_tree'] = parser.TokenList() + return + if isinstance(value, str): + value = utils.parsedate_to_datetime(value) + kwds['datetime'] = value + kwds['decoded'] = utils.format_datetime(kwds['datetime']) + kwds['parse_tree'] = cls.value_parser(kwds['decoded']) + + def init(self, *args, **kw): + self._datetime = kw.pop('datetime') + super().init(*args, **kw) + + @property + def datetime(self): + return self._datetime + + +class UniqueDateHeader(DateHeader): + + max_count = 1 + + +class AddressHeader: + + max_count = None + + @staticmethod + def value_parser(value): + address_list, value = parser.get_address_list(value) + assert not value, 'this should not happen' + return address_list + + @classmethod + def parse(cls, value, kwds): + if isinstance(value, str): + # We are translating here from the RFC language (address/mailbox) + # to our API language (group/address). + kwds['parse_tree'] = address_list = cls.value_parser(value) + groups = [] + for addr in address_list.addresses: + groups.append(Group(addr.display_name, + [Address(mb.display_name or '', + mb.local_part or '', + mb.domain or '') + for mb in addr.all_mailboxes])) + defects = list(address_list.all_defects) + else: + # Assume it is Address/Group stuff + if not hasattr(value, '__iter__'): + value = [value] + groups = [Group(None, [item]) if not hasattr(item, 'addresses') + else item + for item in value] + defects = [] + kwds['groups'] = groups + kwds['defects'] = defects + kwds['decoded'] = ', '.join([str(item) for item in groups]) + if 'parse_tree' not in kwds: + kwds['parse_tree'] = cls.value_parser(kwds['decoded']) + + def init(self, *args, **kw): + self._groups = tuple(kw.pop('groups')) + self._addresses = None + super().init(*args, **kw) + + @property + def groups(self): + return self._groups + + @property + def addresses(self): + if self._addresses is None: + self._addresses = tuple([address for group in self._groups + for address in group.addresses]) + return self._addresses + + +class UniqueAddressHeader(AddressHeader): + + max_count = 1 + + +class SingleAddressHeader(AddressHeader): + + @property + def address(self): + if len(self.addresses)!=1: + raise ValueError(("value of single address header {} is not " + "a single address").format(self.name)) + return self.addresses[0] + + +class UniqueSingleAddressHeader(SingleAddressHeader): + + max_count = 1 + + +class MIMEVersionHeader: + + max_count = 1 + + value_parser = staticmethod(parser.parse_mime_version) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = str(parse_tree) + kwds['defects'].extend(parse_tree.all_defects) + kwds['major'] = None if parse_tree.minor is None else parse_tree.major + kwds['minor'] = parse_tree.minor + if parse_tree.minor is not None: + kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor']) + else: + kwds['version'] = None + + def init(self, *args, **kw): + self._version = kw.pop('version') + self._major = kw.pop('major') + self._minor = kw.pop('minor') + super().init(*args, **kw) + + @property + def major(self): + return self._major + + @property + def minor(self): + return self._minor + + @property + def version(self): + return self._version + + +class ParameterizedMIMEHeader: + + # Mixin that handles the params dict. Must be subclassed and + # a property value_parser for the specific header provided. + + max_count = 1 + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = str(parse_tree) + kwds['defects'].extend(parse_tree.all_defects) + if parse_tree.params is None: + kwds['params'] = {} + else: + # The MIME RFCs specify that parameter ordering is arbitrary. + kwds['params'] = {utils._sanitize(name).lower(): + utils._sanitize(value) + for name, value in parse_tree.params} + + def init(self, *args, **kw): + self._params = kw.pop('params') + super().init(*args, **kw) + + @property + def params(self): + return self._params.copy() + + +class ContentTypeHeader(ParameterizedMIMEHeader): + + value_parser = staticmethod(parser.parse_content_type_header) + + def init(self, *args, **kw): + super().init(*args, **kw) + self._maintype = utils._sanitize(self._parse_tree.maintype) + self._subtype = utils._sanitize(self._parse_tree.subtype) + + @property + def maintype(self): + return self._maintype + + @property + def subtype(self): + return self._subtype + + @property + def content_type(self): + return self.maintype + '/' + self.subtype + + +class ContentDispositionHeader(ParameterizedMIMEHeader): + + value_parser = staticmethod(parser.parse_content_disposition_header) + + def init(self, *args, **kw): + super().init(*args, **kw) + cd = self._parse_tree.content_disposition + self._content_disposition = cd if cd is None else utils._sanitize(cd) + + @property + def content_disposition(self): + return self._content_disposition + + +class ContentTransferEncodingHeader: + + max_count = 1 + + value_parser = staticmethod(parser.parse_content_transfer_encoding_header) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = str(parse_tree) + kwds['defects'].extend(parse_tree.all_defects) + + def init(self, *args, **kw): + super().init(*args, **kw) + self._cte = utils._sanitize(self._parse_tree.cte) + + @property + def cte(self): + return self._cte + + +# The header factory # + +_default_header_map = { + 'subject': UniqueUnstructuredHeader, + 'date': UniqueDateHeader, + 'resent-date': DateHeader, + 'orig-date': UniqueDateHeader, + 'sender': UniqueSingleAddressHeader, + 'resent-sender': SingleAddressHeader, + 'to': UniqueAddressHeader, + 'resent-to': AddressHeader, + 'cc': UniqueAddressHeader, + 'resent-cc': AddressHeader, + 'bcc': UniqueAddressHeader, + 'resent-bcc': AddressHeader, + 'from': UniqueAddressHeader, + 'resent-from': AddressHeader, + 'reply-to': UniqueAddressHeader, + 'mime-version': MIMEVersionHeader, + 'content-type': ContentTypeHeader, + 'content-disposition': ContentDispositionHeader, + 'content-transfer-encoding': ContentTransferEncodingHeader, + } + +class HeaderRegistry: + + """A header_factory and header registry.""" + + def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader, + use_default_map=True): + """Create a header_factory that works with the Policy API. + + base_class is the class that will be the last class in the created + header class's __bases__ list. default_class is the class that will be + used if "name" (see __call__) does not appear in the registry. + use_default_map controls whether or not the default mapping of names to + specialized classes is copied in to the registry when the factory is + created. The default is True. + + """ + self.registry = {} + self.base_class = base_class + self.default_class = default_class + if use_default_map: + self.registry.update(_default_header_map) + + def map_to_type(self, name, cls): + """Register cls as the specialized class for handling "name" headers. + + """ + self.registry[name.lower()] = cls + + def __getitem__(self, name): + cls = self.registry.get(name.lower(), self.default_class) + return type('_'+cls.__name__, (cls, self.base_class), {}) + + def __call__(self, name, value): + """Create a header instance for header 'name' from 'value'. + + Creates a header instance by creating a specialized class for parsing + and representing the specified header by combining the factory + base_class with a specialized class from the registry or the + default_class, and passing the name and value to the constructed + class's constructor. + + """ + return self[name](name, value) diff --git a/Lib/email/message.py b/Lib/email/message.py index f1ffcdb..3feab52 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -10,14 +10,14 @@ import re import uu import base64 import binascii -import warnings from io import BytesIO, StringIO # Intrapackage imports from email import utils from email import errors -from email import header +from email._policybase import compat32 from email import charset as _charset +from email._encoded_words import decode_b Charset = _charset.Charset SEMISPACE = '; ' @@ -26,24 +26,6 @@ SEMISPACE = '; ' # existence of which force quoting of the parameter value. tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') -# How to figure out if we are processing strings that come from a byte -# source with undecodable characters. -_has_surrogates = re.compile( - '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search - - -# Helper functions -def _sanitize_header(name, value): - # If the header value contains surrogates, return a Header using - # the unknown-8bit charset to encode the bytes as encoded words. - if not isinstance(value, str): - # Assume it is already a header object - return value - if _has_surrogates(value): - return header.Header(value, charset=_charset.UNKNOWN8BIT, - header_name=name) - else: - return value def _splitparam(param): # Split header parameters. BAW: this may be too simple. It isn't @@ -136,7 +118,8 @@ class Message: you must use the explicit API to set or get all the headers. Not all of the mapping methods are implemented. """ - def __init__(self): + def __init__(self, policy=compat32): + self.policy = policy self._headers = [] self._unixfrom = None self._payload = None @@ -246,7 +229,7 @@ class Message: cte = str(self.get('content-transfer-encoding', '')).lower() # payload may be bytes here. if isinstance(payload, str): - if _has_surrogates(payload): + if utils._has_surrogates(payload): bpayload = payload.encode('ascii', 'surrogateescape') if not decode: try: @@ -267,11 +250,12 @@ class Message: if cte == 'quoted-printable': return utils._qdecode(bpayload) elif cte == 'base64': - try: - return base64.b64decode(bpayload) - except binascii.Error: - # Incorrect padding - return bpayload + # XXX: this is a bit of a hack; decode_b should probably be factored + # out somewhere, but I haven't figured out where yet. + value, defects = decode_b(b''.join(bpayload.splitlines())) + for defect in defects: + self.policy.handle_defect(self, defect) + return value elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): in_file = BytesIO(bpayload) out_file = BytesIO() @@ -362,7 +346,17 @@ class Message: Note: this does not overwrite an existing header with the same field name. Use __delitem__() first to delete any existing headers. """ - self._headers.append((name, val)) + max_count = self.policy.header_max_count(name) + if max_count: + lname = name.lower() + found = 0 + for k, v in self._headers: + if k.lower() == lname: + found += 1 + if found >= max_count: + raise ValueError("There may be at most {} {} headers " + "in a message".format(max_count, name)) + self._headers.append(self.policy.header_store_parse(name, val)) def __delitem__(self, name): """Delete all occurrences of a header, if present. @@ -401,7 +395,8 @@ class Message: Any fields deleted and re-inserted are always appended to the header list. """ - return [_sanitize_header(k, v) for k, v in self._headers] + return [self.policy.header_fetch_parse(k, v) + for k, v in self._headers] def items(self): """Get all the message's header fields and values. @@ -411,7 +406,8 @@ class Message: Any fields deleted and re-inserted are always appended to the header list. """ - return [(k, _sanitize_header(k, v)) for k, v in self._headers] + return [(k, self.policy.header_fetch_parse(k, v)) + for k, v in self._headers] def get(self, name, failobj=None): """Get a header value. @@ -422,10 +418,29 @@ class Message: name = name.lower() for k, v in self._headers: if k.lower() == name: - return _sanitize_header(k, v) + return self.policy.header_fetch_parse(k, v) return failobj # + # "Internal" methods (public API, but only intended for use by a parser + # or generator, not normal application code. + # + + def set_raw(self, name, value): + """Store name and value in the model without modification. + + This is an "internal" API, intended only for use by a parser. + """ + self._headers.append((name, value)) + + def raw_items(self): + """Return the (name, value) header pairs without modification. + + This is an "internal" API, intended only for use by a generator. + """ + return iter(self._headers.copy()) + + # # Additional useful stuff # @@ -442,7 +457,7 @@ class Message: name = name.lower() for k, v in self._headers: if k.lower() == name: - values.append(_sanitize_header(k, v)) + values.append(self.policy.header_fetch_parse(k, v)) if not values: return failobj return values @@ -475,7 +490,7 @@ class Message: parts.append(_formatparam(k.replace('_', '-'), v)) if _value is not None: parts.insert(0, _value) - self._headers.append((_name, SEMISPACE.join(parts))) + self[_name] = SEMISPACE.join(parts) def replace_header(self, _name, _value): """Replace a header. @@ -487,7 +502,7 @@ class Message: _name = _name.lower() for i, (k, v) in zip(range(len(self._headers)), self._headers): if k.lower() == _name: - self._headers[i] = (k, _value) + self._headers[i] = self.policy.header_store_parse(k, _value) break else: raise KeyError(_name) @@ -803,7 +818,8 @@ class Message: parts.append(k) else: parts.append('%s=%s' % (k, v)) - newheaders.append((h, SEMISPACE.join(parts))) + val = SEMISPACE.join(parts) + newheaders.append(self.policy.header_store_parse(h, val)) else: newheaders.append((h, v)) diff --git a/Lib/email/mime/text.py b/Lib/email/mime/text.py index 5747db5..80ff950 100644 --- a/Lib/email/mime/text.py +++ b/Lib/email/mime/text.py @@ -14,7 +14,7 @@ from email.mime.nonmultipart import MIMENonMultipart class MIMEText(MIMENonMultipart): """Class for generating text/* type MIME documents.""" - def __init__(self, _text, _subtype='plain', _charset='us-ascii'): + def __init__(self, _text, _subtype='plain', _charset=None): """Create a text/* type MIME document. _text is the string for this message object. @@ -25,6 +25,18 @@ class MIMEText(MIMENonMultipart): header. This defaults to "us-ascii". Note that as a side-effect, the Content-Transfer-Encoding header will also be set. """ + + # If no _charset was specified, check to see see if there are non-ascii + # characters present. If not, use 'us-ascii', otherwise use utf-8. + # XXX: This can be removed once #7304 is fixed. + if _charset is None: + try: + _text.encode('us-ascii') + _charset = 'us-ascii' + except UnicodeEncodeError: + _charset = 'utf-8' + MIMENonMultipart.__init__(self, 'text', _subtype, **{'charset': _charset}) + self.set_payload(_text, _charset) diff --git a/Lib/email/parser.py b/Lib/email/parser.py index 1c931ea..1aab012 100644 --- a/Lib/email/parser.py +++ b/Lib/email/parser.py @@ -4,18 +4,19 @@ """A parser of RFC 2822 and MIME email messages.""" -__all__ = ['Parser', 'HeaderParser', 'BytesParser'] +__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser'] import warnings from io import StringIO, TextIOWrapper from email.feedparser import FeedParser from email.message import Message +from email._policybase import compat32 class Parser: - def __init__(self, *args, **kws): + def __init__(self, _class=Message, *, policy=compat32): """Parser of RFC 2822 and MIME email messages. Creates an in-memory object tree representing the email message, which @@ -30,28 +31,14 @@ class Parser: _class is the class to instantiate for new message objects when they must be created. This class must have a constructor that can take zero arguments. Default is Message.Message. + + The policy keyword specifies a policy object that controls a number of + aspects of the parser's operation. The default policy maintains + backward compatibility. + """ - if len(args) >= 1: - if '_class' in kws: - raise TypeError("Multiple values for keyword arg '_class'") - kws['_class'] = args[0] - if len(args) == 2: - if 'strict' in kws: - raise TypeError("Multiple values for keyword arg 'strict'") - kws['strict'] = args[1] - if len(args) > 2: - raise TypeError('Too many arguments') - if '_class' in kws: - self._class = kws['_class'] - del kws['_class'] - else: - self._class = Message - if 'strict' in kws: - warnings.warn("'strict' argument is deprecated (and ignored)", - DeprecationWarning, 2) - del kws['strict'] - if kws: - raise TypeError('Unexpected keyword arguments') + self._class = _class + self.policy = policy def parse(self, fp, headersonly=False): """Create a message structure from the data in a file. @@ -61,7 +48,7 @@ class Parser: parsing after reading the headers or not. The default is False, meaning it parses the entire contents of the file. """ - feedparser = FeedParser(self._class) + feedparser = FeedParser(self._class, policy=self.policy) if headersonly: feedparser._set_headersonly() while True: @@ -134,3 +121,11 @@ class BytesParser: """ text = text.decode('ASCII', errors='surrogateescape') return self.parser.parsestr(text, headersonly) + + +class BytesHeaderParser(BytesParser): + def parse(self, fp, headersonly=True): + return BytesParser.parse(self, fp, headersonly=True) + + def parsebytes(self, text, headersonly=True): + return BytesParser.parsebytes(self, text, headersonly=True) diff --git a/Lib/email/policy.py b/Lib/email/policy.py new file mode 100644 index 0000000..32cad0d --- /dev/null +++ b/Lib/email/policy.py @@ -0,0 +1,188 @@ +"""This will be the home for the policy that hooks in the new +code that adds all the email6 features. +""" + +from email._policybase import Policy, Compat32, compat32, _extend_docstrings +from email.utils import _has_surrogates +from email.headerregistry import HeaderRegistry as HeaderRegistry + +__all__ = [ + 'Compat32', + 'compat32', + 'Policy', + 'EmailPolicy', + 'default', + 'strict', + 'SMTP', + 'HTTP', + ] + +@_extend_docstrings +class EmailPolicy(Policy): + + """+ + PROVISIONAL + + The API extensions enabled by this this policy are currently provisional. + Refer to the documentation for details. + + This policy adds new header parsing and folding algorithms. Instead of + simple strings, headers are custom objects with custom attributes + depending on the type of the field. The folding algorithm fully + implements RFCs 2047 and 5322. + + In addition to the settable attributes listed above that apply to + all Policies, this policy adds the following additional attributes: + + refold_source -- if the value for a header in the Message object + came from the parsing of some source, this attribute + indicates whether or not a generator should refold + that value when transforming the message back into + stream form. The possible values are: + + none -- all source values use original folding + long -- source values that have any line that is + longer than max_line_length will be + refolded + all -- all values are refolded. + + The default is 'long'. + + header_factory -- a callable that takes two arguments, 'name' and + 'value', where 'name' is a header field name and + 'value' is an unfolded header field value, and + returns a string-like object that represents that + header. A default header_factory is provided that + understands some of the RFC5322 header field types. + (Currently address fields and date fields have + special treatment, while all other fields are + treated as unstructured. This list will be + completed before the extension is marked stable.) + """ + + refold_source = 'long' + header_factory = HeaderRegistry() + + def __init__(self, **kw): + # Ensure that each new instance gets a unique header factory + # (as opposed to clones, which share the factory). + if 'header_factory' not in kw: + object.__setattr__(self, 'header_factory', HeaderRegistry()) + super().__init__(**kw) + + def header_max_count(self, name): + """+ + The implementation for this class returns the max_count attribute from + the specialized header class that would be used to construct a header + of type 'name'. + """ + return self.header_factory[name].max_count + + # The logic of the next three methods is chosen such that it is possible to + # switch a Message object between a Compat32 policy and a policy derived + # from this class and have the results stay consistent. This allows a + # Message object constructed with this policy to be passed to a library + # that only handles Compat32 objects, or to receive such an object and + # convert it to use the newer style by just changing its policy. It is + # also chosen because it postpones the relatively expensive full rfc5322 + # parse until as late as possible when parsing from source, since in many + # applications only a few headers will actually be inspected. + + def header_source_parse(self, sourcelines): + """+ + The name is parsed as everything up to the ':' and returned unmodified. + The value is determined by stripping leading whitespace off the + remainder of the first line, joining all subsequent lines together, and + stripping any trailing carriage return or linefeed characters. (This + is the same as Compat32). + + """ + name, value = sourcelines[0].split(':', 1) + value = value.lstrip(' \t') + ''.join(sourcelines[1:]) + return (name, value.rstrip('\r\n')) + + def header_store_parse(self, name, value): + """+ + The name is returned unchanged. If the input value has a 'name' + attribute and it matches the name ignoring case, the value is returned + unchanged. Otherwise the name and value are passed to header_factory + method, and the resulting custom header object is returned as the + value. In this case a ValueError is raised if the input value contains + CR or LF characters. + + """ + if hasattr(value, 'name') and value.name.lower() == name.lower(): + return (name, value) + if isinstance(value, str) and len(value.splitlines())>1: + raise ValueError("Header values may not contain linefeed " + "or carriage return characters") + return (name, self.header_factory(name, value)) + + def header_fetch_parse(self, name, value): + """+ + If the value has a 'name' attribute, it is returned to unmodified. + Otherwise the name and the value with any linesep characters removed + are passed to the header_factory method, and the resulting custom + header object is returned. Any surrogateescaped bytes get turned + into the unicode unknown-character glyph. + + """ + if hasattr(value, 'name'): + return value + return self.header_factory(name, ''.join(value.splitlines())) + + def fold(self, name, value): + """+ + Header folding is controlled by the refold_source policy setting. A + value is considered to be a 'source value' if and only if it does not + have a 'name' attribute (having a 'name' attribute means it is a header + object of some sort). If a source value needs to be refolded according + to the policy, it is converted into a custom header object by passing + the name and the value with any linesep characters removed to the + header_factory method. Folding of a custom header object is done by + calling its fold method with the current policy. + + Source values are split into lines using splitlines. If the value is + not to be refolded, the lines are rejoined using the linesep from the + policy and returned. The exception is lines containing non-ascii + binary data. In that case the value is refolded regardless of the + refold_source setting, which causes the binary data to be CTE encoded + using the unknown-8bit charset. + + """ + return self._fold(name, value, refold_binary=True) + + def fold_binary(self, name, value): + """+ + The same as fold if cte_type is 7bit, except that the returned value is + bytes. + + If cte_type is 8bit, non-ASCII binary data is converted back into + bytes. Headers with binary data are not refolded, regardless of the + refold_header setting, since there is no way to know whether the binary + data consists of single byte characters or multibyte characters. + + """ + folded = self._fold(name, value, refold_binary=self.cte_type=='7bit') + return folded.encode('ascii', 'surrogateescape') + + def _fold(self, name, value, refold_binary=False): + if hasattr(value, 'name'): + return value.fold(policy=self) + maxlen = self.max_line_length if self.max_line_length else float('inf') + lines = value.splitlines() + refold = (self.refold_source == 'all' or + self.refold_source == 'long' and + (len(lines[0])+len(name)+2 > maxlen or + any(len(x) > maxlen for x in lines[1:]))) + if refold or refold_binary and _has_surrogates(value): + return self.header_factory(name, ''.join(lines)).fold(policy=self) + return name + ': ' + self.linesep.join(lines) + self.linesep + + +default = EmailPolicy() +# Make the default policy use the class default header_factory +del default.header_factory +strict = default.clone(raise_on_defect=True) +SMTP = default.clone(linesep='\r\n') +HTTP = default.clone(linesep='\r\n', max_line_length=None) diff --git a/Lib/email/test/__init__.py b/Lib/email/test/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/Lib/email/test/__init__.py +++ /dev/null diff --git a/Lib/email/test/data/PyBanner048.gif b/Lib/email/test/data/PyBanner048.gif Binary files differdeleted file mode 100644 index 1a5c87f..0000000 --- a/Lib/email/test/data/PyBanner048.gif +++ /dev/null diff --git a/Lib/email/test/data/audiotest.au b/Lib/email/test/data/audiotest.au Binary files differdeleted file mode 100644 index f76b050..0000000 --- a/Lib/email/test/data/audiotest.au +++ /dev/null diff --git a/Lib/email/test/data/msg_01.txt b/Lib/email/test/data/msg_01.txt deleted file mode 100644 index 7e33bcf..0000000 --- a/Lib/email/test/data/msg_01.txt +++ /dev/null @@ -1,19 +0,0 @@ -Return-Path: <bbb@zzz.org> -Delivered-To: bbb@zzz.org -Received: by mail.zzz.org (Postfix, from userid 889) - id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) -MIME-Version: 1.0 -Content-Type: text/plain; charset=us-ascii -Content-Transfer-Encoding: 7bit -Message-ID: <15090.61304.110929.45684@aaa.zzz.org> -From: bbb@ddd.com (John X. Doe) -To: bbb@zzz.org -Subject: This is a test message -Date: Fri, 4 May 2001 14:05:44 -0400 - - -Hi, - -Do you like this message? - --Me diff --git a/Lib/email/test/data/msg_02.txt b/Lib/email/test/data/msg_02.txt deleted file mode 100644 index 43f2480..0000000 --- a/Lib/email/test/data/msg_02.txt +++ /dev/null @@ -1,135 +0,0 @@ -MIME-version: 1.0 -From: ppp-request@zzz.org -Sender: ppp-admin@zzz.org -To: ppp@zzz.org -Subject: Ppp digest, Vol 1 #2 - 5 msgs -Date: Fri, 20 Apr 2001 20:18:00 -0400 (EDT) -X-Mailer: Mailman v2.0.4 -X-Mailman-Version: 2.0.4 -Content-Type: multipart/mixed; boundary="192.168.1.2.889.32614.987812255.500.21814" - ---192.168.1.2.889.32614.987812255.500.21814 -Content-type: text/plain; charset=us-ascii -Content-description: Masthead (Ppp digest, Vol 1 #2) - -Send Ppp mailing list submissions to - ppp@zzz.org - -To subscribe or unsubscribe via the World Wide Web, visit - http://www.zzz.org/mailman/listinfo/ppp -or, via email, send a message with subject or body 'help' to - ppp-request@zzz.org - -You can reach the person managing the list at - ppp-admin@zzz.org - -When replying, please edit your Subject line so it is more specific -than "Re: Contents of Ppp digest..." - - ---192.168.1.2.889.32614.987812255.500.21814 -Content-type: text/plain; charset=us-ascii -Content-description: Today's Topics (5 msgs) - -Today's Topics: - - 1. testing #1 (Barry A. Warsaw) - 2. testing #2 (Barry A. Warsaw) - 3. testing #3 (Barry A. Warsaw) - 4. testing #4 (Barry A. Warsaw) - 5. testing #5 (Barry A. Warsaw) - ---192.168.1.2.889.32614.987812255.500.21814 -Content-Type: multipart/digest; boundary="__--__--" - ---__--__-- - -Message: 1 -Content-Type: text/plain; charset=us-ascii -Content-Transfer-Encoding: 7bit -Date: Fri, 20 Apr 2001 20:16:13 -0400 -To: ppp@zzz.org -From: barry@digicool.com (Barry A. Warsaw) -Subject: [Ppp] testing #1 -Precedence: bulk - - -hello - - ---__--__-- - -Message: 2 -Date: Fri, 20 Apr 2001 20:16:21 -0400 -Content-Type: text/plain; charset=us-ascii -Content-Transfer-Encoding: 7bit -To: ppp@zzz.org -From: barry@digicool.com (Barry A. Warsaw) -Precedence: bulk - - -hello - - ---__--__-- - -Message: 3 -Date: Fri, 20 Apr 2001 20:16:25 -0400 -Content-Type: text/plain; charset=us-ascii -Content-Transfer-Encoding: 7bit -To: ppp@zzz.org -From: barry@digicool.com (Barry A. Warsaw) -Subject: [Ppp] testing #3 -Precedence: bulk - - -hello - - ---__--__-- - -Message: 4 -Date: Fri, 20 Apr 2001 20:16:28 -0400 -Content-Type: text/plain; charset=us-ascii -Content-Transfer-Encoding: 7bit -To: ppp@zzz.org -From: barry@digicool.com (Barry A. Warsaw) -Subject: [Ppp] testing #4 -Precedence: bulk - - -hello - - ---__--__-- - -Message: 5 -Date: Fri, 20 Apr 2001 20:16:32 -0400 -Content-Type: text/plain; charset=us-ascii -Content-Transfer-Encoding: 7bit -To: ppp@zzz.org -From: barry@digicool.com (Barry A. Warsaw) -Subject: [Ppp] testing #5 -Precedence: bulk - - -hello - - - - ---__--__---- ---192.168.1.2.889.32614.987812255.500.21814 -Content-type: text/plain; charset=us-ascii -Content-description: Digest Footer - -_______________________________________________ -Ppp mailing list -Ppp@zzz.org -http://www.zzz.org/mailman/listinfo/ppp - - ---192.168.1.2.889.32614.987812255.500.21814-- - -End of Ppp Digest - diff --git a/Lib/email/test/data/msg_03.txt b/Lib/email/test/data/msg_03.txt deleted file mode 100644 index c748ebf..0000000 --- a/Lib/email/test/data/msg_03.txt +++ /dev/null @@ -1,16 +0,0 @@ -Return-Path: <bbb@zzz.org> -Delivered-To: bbb@zzz.org -Received: by mail.zzz.org (Postfix, from userid 889) - id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) -Message-ID: <15090.61304.110929.45684@aaa.zzz.org> -From: bbb@ddd.com (John X. Doe) -To: bbb@zzz.org -Subject: This is a test message -Date: Fri, 4 May 2001 14:05:44 -0400 - - -Hi, - -Do you like this message? - --Me diff --git a/Lib/email/test/data/msg_04.txt b/Lib/email/test/data/msg_04.txt deleted file mode 100644 index 1f633c4..0000000 --- a/Lib/email/test/data/msg_04.txt +++ /dev/null @@ -1,37 +0,0 @@ -Return-Path: <barry@python.org> -Delivered-To: barry@python.org -Received: by mail.python.org (Postfix, from userid 889) - id C2BF0D37C6; Tue, 11 Sep 2001 00:05:05 -0400 (EDT) -MIME-Version: 1.0 -Content-Type: multipart/mixed; boundary="h90VIIIKmx" -Content-Transfer-Encoding: 7bit -Message-ID: <15261.36209.358846.118674@anthem.python.org> -From: barry@python.org (Barry A. Warsaw) -To: barry@python.org -Subject: a simple multipart -Date: Tue, 11 Sep 2001 00:05:05 -0400 -X-Mailer: VM 6.95 under 21.4 (patch 4) "Artificial Intelligence" XEmacs Lucid -X-Attribution: BAW -X-Oblique-Strategy: Make a door into a window - - ---h90VIIIKmx -Content-Type: text/plain -Content-Disposition: inline; - filename="msg.txt" -Content-Transfer-Encoding: 7bit - -a simple kind of mirror -to reflect upon our own - ---h90VIIIKmx -Content-Type: text/plain -Content-Disposition: inline; - filename="msg.txt" -Content-Transfer-Encoding: 7bit - -a simple kind of mirror -to reflect upon our own - ---h90VIIIKmx-- - diff --git a/Lib/email/test/data/msg_05.txt b/Lib/email/test/data/msg_05.txt deleted file mode 100644 index 87d5e9c..0000000 --- a/Lib/email/test/data/msg_05.txt +++ /dev/null @@ -1,28 +0,0 @@ -From: foo -Subject: bar -To: baz -MIME-Version: 1.0 -Content-Type: multipart/report; report-type=delivery-status; - boundary="D1690A7AC1.996856090/mail.example.com" -Message-Id: <20010803162810.0CA8AA7ACC@mail.example.com> - -This is a MIME-encapsulated message. - ---D1690A7AC1.996856090/mail.example.com -Content-Type: text/plain - -Yadda yadda yadda - ---D1690A7AC1.996856090/mail.example.com - -Yadda yadda yadda - ---D1690A7AC1.996856090/mail.example.com -Content-Type: message/rfc822 - -From: nobody@python.org - -Yadda yadda yadda - ---D1690A7AC1.996856090/mail.example.com-- - diff --git a/Lib/email/test/data/msg_06.txt b/Lib/email/test/data/msg_06.txt deleted file mode 100644 index 69f3a47..0000000 --- a/Lib/email/test/data/msg_06.txt +++ /dev/null @@ -1,33 +0,0 @@ -Return-Path: <barry@python.org> -Delivered-To: barry@python.org -MIME-Version: 1.0 -Content-Type: message/rfc822 -Content-Description: forwarded message -Content-Transfer-Encoding: 7bit -Message-ID: <15265.9482.641338.555352@python.org> -From: barry@zope.com (Barry A. Warsaw) -Sender: barry@python.org -To: barry@python.org -Subject: forwarded message from Barry A. Warsaw -Date: Thu, 13 Sep 2001 17:28:42 -0400 -X-Mailer: VM 6.95 under 21.4 (patch 4) "Artificial Intelligence" XEmacs Lucid -X-Attribution: BAW -X-Oblique-Strategy: Be dirty -X-Url: http://barry.wooz.org - -MIME-Version: 1.0 -Content-Type: text/plain; charset=us-ascii -Return-Path: <barry@python.org> -Delivered-To: barry@python.org -Message-ID: <15265.9468.713530.98441@python.org> -From: barry@zope.com (Barry A. Warsaw) -Sender: barry@python.org -To: barry@python.org -Subject: testing -Date: Thu, 13 Sep 2001 17:28:28 -0400 -X-Mailer: VM 6.95 under 21.4 (patch 4) "Artificial Intelligence" XEmacs Lucid -X-Attribution: BAW -X-Oblique-Strategy: Spectrum analysis -X-Url: http://barry.wooz.org - - diff --git a/Lib/email/test/data/msg_07.txt b/Lib/email/test/data/msg_07.txt deleted file mode 100644 index 721f3a0..0000000 --- a/Lib/email/test/data/msg_07.txt +++ /dev/null @@ -1,83 +0,0 @@ -MIME-Version: 1.0 -From: Barry <barry@digicool.com> -To: Dingus Lovers <cravindogs@cravindogs.com> -Subject: Here is your dingus fish -Date: Fri, 20 Apr 2001 19:35:02 -0400 -Content-Type: multipart/mixed; boundary="BOUNDARY" - ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" - -Hi there, - -This is the dingus fish. - ---BOUNDARY -Content-Type: image/gif; name="dingusfish.gif" -Content-Transfer-Encoding: base64 -content-disposition: attachment; filename="dingusfish.gif" - -R0lGODdhAAEAAfAAAP///wAAACwAAAAAAAEAAQAC/oSPqcvtD6OctNqLs968+w+G4kiW5omm6sq2 -7gvH8kzX9o3n+s73/g8MCofEovGITGICTKbyCV0FDNOo9SqpQqpOrJfXzTQj2vD3TGtqL+NtGQ2f -qTXmxzuOd7WXdcc9DyjU53ewFni4s0fGhdiYaEhGBelICTNoV1j5NUnFcrmUqemjNifJVWpaOqaI -oFq3SspZsSraE7sHq3jr1MZqWvi662vxV4tD+pvKW6aLDOCLyur8PDwbanyDeq0N3DctbQYeLDvR -RY6t95m6UB0d3mwIrV7e2VGNvjjffukeJp4w7F65KecGFsTHQGAygOrgrWs1jt28Rc88KESYcGLA -/obvTkH6p+CinWJiJmIMqXGQwH/y4qk0SYjgQTczT3ajKZGfuI0uJ4kkVI/DT5s3/ejkxI0aT4Y+ -YTYgWbImUaXk9nlLmnSh1qJiJFl0OpUqRK4oOy7NyRQtHWofhoYVxkwWXKUSn0YsS+fUV6lhqfYb -6ayd3Z5qQdG1B7bvQzaJjwUV2lixMUZ7JVsOlfjWVr/3NB/uFvnySBN6Dcb6rGwaRM3wsormw5cC -M9NxWy/bWdufudCvy8bOAjXjVVwta/uO21sE5RHBCzNFXtgq9ORtH4eYjVP4Yryo026nvkFmCeyA -B29efV6ravCMK5JwWd5897Qrx7ll38o6iHDZ/rXPR//feevhF4l7wjUGX3xq1eeRfM4RSJGBIV1D -z1gKPkfWag3mVBVvva1RlX5bAJTPR/2YqNtw/FkIYYEi/pIZiAdpcxpoHtmnYYoZtvhUftzdx5ZX -JSKDW405zkGcZzzGZ6KEv4FI224oDmijlEf+xp6MJK5ojY/ASeVUR+wsKRuJ+XFZ5o7ZeEime8t1 -ouUsU6YjF5ZtUihhkGfCdFQLWQFJ3UXxmElfhQnR+eCdcDbkFZp6vTRmj56ApCihn5QGpaToNZmR -n3NVSpZcQpZ2KEONusaiCsKAug0wkQbJSFO+PTSjneGxOuFjPlUk3ovWvdIerjUg9ZGIOtGq/qeX -eCYrrCX+1UPsgTKGGRSbzd5q156d/gpfbJxe66eD5iQKrXj7RGgruGxs62qebBHUKS32CKluCiqZ -qh+pmehmEb71noAUoe5e9Zm17S7773V10pjrtG4CmuurCV/n6zLK5turWNhqOvFXbjhZrMD0YhKe -wR0zOyuvsh6MWrGoIuzvyWu5y1WIFAqmJselypxXh6dKLNOKEB98L88bS2rkNqqlKzCNJp9c0G0j -Gzh0iRrCbHSXmPR643QS+4rWhgFmnSbSuXCjS0xAOWkU2UdLqyuUNfHSFdUouy3bm5i5GnDM3tG8 -doJ4r5tqu3pPbRSVfvs8uJzeNXhp3n4j/tZ42SwH7eaWUUOjc3qFV9453UHTXZfcLH+OeNs5g36x -lBnHvTm7EbMbLeuaLncao8vWCXimfo1o+843Ak6y4ChNeGntvAYvfLK4ezmoyNIbNCLTCXO9ZV3A -E8/s88RczPzDwI4Ob7XZyl7+9Miban29h+tJZPrE21wgvBphDfrrfPdCTPKJD/y98L1rZwHcV6Jq -Zab0metpuNIX/qAFPoz171WUaUb4HAhBSzHuHfjzHb3kha/2Cctis/ORArVHNYfFyYRH2pYIRzic -isVOfPWD1b6mRTqpCRBozzof6UZVvFXRxWIr3GGrEviGYgyPMfahheiSaLs/9QeFu7oZ/ndSY8DD -ya9x+uPed+7mxN2IzIISBOMLFYWVqC3Pew1T2nFuuCiwZS5/v6II10i4t1OJcUH2U9zxKodHsGGv -Oa+zkvNUYUOa/TCCRutF9MzDwdlUMJADTCGSbDQ5OV4PTamDoPEi6Ecc/RF5RWwkcdSXvSOaDWSn -I9LlvubFTQpuc6JKXLcKeb+xdbKRBnwREemXyjg6ME65aJiOuBgrktzykfPLJBKR9ClMavJ62/Ff -BlNIyod9yX9wcSXexnXFpvkrbXk64xsx5Db7wXKP5fSgsvwIMM/9631VLBfkmtbHRXpqmtei52hG -pUwSlo+BASQoeILDOBgREECxBBh5/iYmNsQ9dIv5+OI++QkqdsJPc3uykz5fkM+OraeekcQF7X4n -B5S67za5U967PmooGQhUXfF7afXyCD7ONdRe17QogYjVx38uLwtrS6nhTnm15LQUnu9E2uK6CNI/ -1HOABj0ESwOjut4FEpFQpdNAm4K2LHnDWHNcmKB2ioKBogysVZtMO2nSxUdZ8Yk2kJc7URioLVI0 -YgmtIwZj4LoeKemgnOnbUdGnzZ4Oa6scqiolBGqS6RgWNLu0RMhcaE6rhhU4hiuqFXPAG8fGwTPW -FKeLMtdVmXLSs5YJGF/YeVm7rREMlY3UYE+yCxbaMXX8y15m5zVHq6GOKDMynzII/jdUHdyVqIy0 -ifX2+r/EgtZcvRzSb72gU9ui87M2VecjKildW/aFqaYhKoryUjfB/g4qtyVuc60xFDGmCxwjW+qu -zjuwl2GkOWn66+3QiiEctvd04OVvcCVzjgT7lrkvjVGKKHmmlDUKowSeikb5kK/mJReuWOxONx+s -ULsl+Lqb0CVn0SrVyJ6wt4t6yTeSCafhPhAf0OXn6L60UMxiLolFAtmN35S2Ob1lZpQ1r/n0Qb5D -oQ1zJiRVDgF8N3Q8TYfbi3DyWCy3lT1nxyBs6FT3S2GOzWRlxwKvlRP0RPJA9SjxEy0UoEnkA+M4 -cnzLMJrBGWLFEaaUb5lvpqbq/loOaU5+DFuHPxo82/OZuM8FXG3oVNZhtWpMpb/0Xu5m/LfLhHZQ -7yuVI0MqZ7NE43imC8jH3IwGZlbPm0xkJYs7+2U48hXTsFSMqgGDvai0kLxyynKNT/waj+q1c1tz -GjOpPBgdCSq3UKZxCSsqFIY+O6JbAWGWcV1pwqLyj5sGqCF1xb1F3varUWqrJv6cN3PrUXzijtfZ -FshpBL3Xwr4GIPvU2N8EjrJgS1zl21rbXQMXeXc5jjFyrhpCzijSv/RQtyPSzHCFMhlME95fHglt -pRsX+dfSQjUeHAlpWzJ5iOo79Ldnaxai6bXTcGO3fp07ri7HLEmXXPlYi8bv/qVxvNcdra6m7Rlb -6JBTb5fd66VhFRjGArh2n7R1rDW4P5NOT9K0I183T2scYkeZ3q/VFyLb09U9ajzXBS8Kgkhc4mBS -kYY9cy3Vy9lUnuNJH8HGIclUilwnBtjUOH0gteGOZ4c/XNrhXLSYDyxfnD8z1pDy7rYRvDolhnbe -UMzxCZUs40s6s7UIvBnLgc0+vKuOkIXeOrDymlp+Zxra4MZLBbVrqD/jTJ597pDmnw5c4+DbyB88 -9Cg9DodYcSuMZT/114pptqc/EuTjRPvH/z5slzI3tluOEBBLqOXLOX+0I5929tO97wkvl/atCz+y -xJrdwteW2FNW/NSmBP+f/maYtVs/bYyBC7Ox3jsYZHL05CIrBa/nS+b3bHfiYm4Ueil1YZZSgAUI -fFZ1dxUmeA2oQRQ3RuGXNGLFV9/XbGFGPV6kfzk1TBBCd+izc7q1H+OHMJwmaBX2IQNYVAKHYepV -SSGCe6CnbYHHETKGNe43EDvFgZr0gB/nVHPHZ80VV1ojOiI3XDvYIkl4ayo4bxQIgrFXWTvBI0nH -VElWMuw2aLUWCRHHf8ymVCHjFlJnOSojfevCYyyyZDH0IcvHhrsnQ5O1OsWzONuVVKIxSxiFZ/tR -fKDAf6xFTnw4O9Qig2VCfW2hJQrmMOuHW0W3dLQmCMO2ccdUd/xyfflH/olTiHZVdGwb8nIwRzSE -J15jFlOJuBZBZ4CiyHyd2IFylFlB+HgHhYabhWOGwYO1ZH/Og1dtQlFMk352CGRSIFTapnWQEUtN -l4zv8S0aaCFDyGCBqDUxZYpxGHX01y/JuH1xhn7TOCnNCI4eKDs5WGX4R425F4vF1o3BJ4vO0otq -I3rimI7jJY1jISqnBxknCIvruF83mF5wN4X7qGLIhR8A2Vg0yFERSIXn9Vv3GHy3Vj/WIkKddlYi -yIMv2I/VMjTLpW7pt05SWIZR0RPyxpB4SIUM9lBPGBl0GC7oSEEwRYLe4pJpZY2P0zbI1n+Oc44w -qY3PUnmF0ixjVpDD/mJ9wpOBGTVgXlaCaZiPcIWK5NiKBIiPdGaQ0TWGvAiG7nMchdZb7Vgf8zNi -MuMyzRdy/lePe9iC4TRx7WhhOQI/QiSVNAmAa2lT/piFbuh7ofJoYSZzrSZ1bvmWw3eN2nKUPVky -uPN5/VRfohRd0VYZoqhKIlU6TXYhJxmPUIloAwc1bPmHEpaZYZORHNlXUJM07hATwHR8MJYqkwWR -WaIezFhxSFlc8/Fq82hEnpeRozg3ULhhr9lAGtVEkCg5ZNRuuVleBPaZadhG0ZgkyPmDOTOKzViM -YgOcpukKqQcbjAWS0IleQ2ROjdh6A+md1qWdBRSX7iSYgFRTtRmBpJioieXJiHfJiMGIR9fJOn8I -MSfXYhspn4ooSa2mSAj4n+8Bmg03fBJZoPOJgsVZRxu1oOMRPXYYjdqjihFaEoZpXBREanuJoRI6 -cibFinq4ngUKh/wQd/H5ofYCZ0HJXR62opZFaAT0iFIZo4DIiUojkjeqKiuoZirKo5Y1a7AWckGa -BkuYoD5lpDK6eUs6CkDqpETwl1EqpfhJpVeKpVl6EgUAADs= - ---BOUNDARY-- diff --git a/Lib/email/test/data/msg_08.txt b/Lib/email/test/data/msg_08.txt deleted file mode 100644 index b563083..0000000 --- a/Lib/email/test/data/msg_08.txt +++ /dev/null @@ -1,24 +0,0 @@ -MIME-Version: 1.0 -From: Barry Warsaw <barry@zope.com> -To: Dingus Lovers <cravindogs@cravindogs.com> -Subject: Lyrics -Date: Fri, 20 Apr 2001 19:35:02 -0400 -Content-Type: multipart/mixed; boundary="BOUNDARY" - ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" - - ---BOUNDARY -Content-Type: text/html; charset="iso-8859-1" - - ---BOUNDARY -Content-Type: text/plain; charset="iso-8859-2" - - ---BOUNDARY -Content-Type: text/plain; charset="koi8-r" - - ---BOUNDARY-- diff --git a/Lib/email/test/data/msg_09.txt b/Lib/email/test/data/msg_09.txt deleted file mode 100644 index 575c4c2..0000000 --- a/Lib/email/test/data/msg_09.txt +++ /dev/null @@ -1,24 +0,0 @@ -MIME-Version: 1.0 -From: Barry Warsaw <barry@zope.com> -To: Dingus Lovers <cravindogs@cravindogs.com> -Subject: Lyrics -Date: Fri, 20 Apr 2001 19:35:02 -0400 -Content-Type: multipart/mixed; boundary="BOUNDARY" - ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" - - ---BOUNDARY -Content-Type: text/html; charset="iso-8859-1" - - ---BOUNDARY -Content-Type: text/plain - - ---BOUNDARY -Content-Type: text/plain; charset="koi8-r" - - ---BOUNDARY-- diff --git a/Lib/email/test/data/msg_10.txt b/Lib/email/test/data/msg_10.txt deleted file mode 100644 index 0790396..0000000 --- a/Lib/email/test/data/msg_10.txt +++ /dev/null @@ -1,39 +0,0 @@ -MIME-Version: 1.0 -From: Barry Warsaw <barry@zope.com> -To: Dingus Lovers <cravindogs@cravindogs.com> -Subject: Lyrics -Date: Fri, 20 Apr 2001 19:35:02 -0400 -Content-Type: multipart/mixed; boundary="BOUNDARY" - ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" -Content-Transfer-Encoding: 7bit - -This is a 7bit encoded message. - ---BOUNDARY -Content-Type: text/html; charset="iso-8859-1" -Content-Transfer-Encoding: Quoted-Printable - -=A1This is a Quoted Printable encoded message! - ---BOUNDARY -Content-Type: text/plain; charset="iso-8859-1" -Content-Transfer-Encoding: Base64 - -VGhpcyBpcyBhIEJhc2U2NCBlbmNvZGVkIG1lc3NhZ2Uu - - ---BOUNDARY -Content-Type: text/plain; charset="iso-8859-1" -Content-Transfer-Encoding: Base64 - -VGhpcyBpcyBhIEJhc2U2NCBlbmNvZGVkIG1lc3NhZ2UuCg== - - ---BOUNDARY -Content-Type: text/plain; charset="iso-8859-1" - -This has no Content-Transfer-Encoding: header. - ---BOUNDARY-- diff --git a/Lib/email/test/data/msg_11.txt b/Lib/email/test/data/msg_11.txt deleted file mode 100644 index 8f7f199..0000000 --- a/Lib/email/test/data/msg_11.txt +++ /dev/null @@ -1,7 +0,0 @@ -Content-Type: message/rfc822 -MIME-Version: 1.0 -Subject: The enclosing message - -Subject: An enclosed message - -Here is the body of the message. diff --git a/Lib/email/test/data/msg_12.txt b/Lib/email/test/data/msg_12.txt deleted file mode 100644 index 4bec8d9..0000000 --- a/Lib/email/test/data/msg_12.txt +++ /dev/null @@ -1,36 +0,0 @@ -MIME-Version: 1.0 -From: Barry Warsaw <barry@zope.com> -To: Dingus Lovers <cravindogs@cravindogs.com> -Subject: Lyrics -Date: Fri, 20 Apr 2001 19:35:02 -0400 -Content-Type: multipart/mixed; boundary="BOUNDARY" - ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" - - ---BOUNDARY -Content-Type: text/html; charset="iso-8859-1" - - ---BOUNDARY -Content-Type: multipart/mixed; boundary="ANOTHER" - ---ANOTHER -Content-Type: text/plain; charset="iso-8859-2" - - ---ANOTHER -Content-Type: text/plain; charset="iso-8859-3" - ---ANOTHER-- - ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" - - ---BOUNDARY -Content-Type: text/plain; charset="koi8-r" - - ---BOUNDARY-- diff --git a/Lib/email/test/data/msg_12a.txt b/Lib/email/test/data/msg_12a.txt deleted file mode 100644 index e94224e..0000000 --- a/Lib/email/test/data/msg_12a.txt +++ /dev/null @@ -1,38 +0,0 @@ -MIME-Version: 1.0 -From: Barry Warsaw <barry@zope.com> -To: Dingus Lovers <cravindogs@cravindogs.com> -Subject: Lyrics -Date: Fri, 20 Apr 2001 19:35:02 -0400 -Content-Type: multipart/mixed; boundary="BOUNDARY" - ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" - - ---BOUNDARY -Content-Type: text/html; charset="iso-8859-1" - - ---BOUNDARY -Content-Type: multipart/mixed; boundary="ANOTHER" - ---ANOTHER -Content-Type: text/plain; charset="iso-8859-2" - - ---ANOTHER -Content-Type: text/plain; charset="iso-8859-3" - - ---ANOTHER-- - - ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" - - ---BOUNDARY -Content-Type: text/plain; charset="koi8-r" - - ---BOUNDARY-- diff --git a/Lib/email/test/data/msg_13.txt b/Lib/email/test/data/msg_13.txt deleted file mode 100644 index 8e6d52d..0000000 --- a/Lib/email/test/data/msg_13.txt +++ /dev/null @@ -1,94 +0,0 @@ -MIME-Version: 1.0 -From: Barry <barry@digicool.com> -To: Dingus Lovers <cravindogs@cravindogs.com> -Subject: Here is your dingus fish -Date: Fri, 20 Apr 2001 19:35:02 -0400 -Content-Type: multipart/mixed; boundary="OUTER" - ---OUTER -Content-Type: text/plain; charset="us-ascii" - -A text/plain part - ---OUTER -Content-Type: multipart/mixed; boundary=BOUNDARY - - ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" - -Hi there, - -This is the dingus fish. - ---BOUNDARY -Content-Type: image/gif; name="dingusfish.gif" -Content-Transfer-Encoding: base64 -content-disposition: attachment; filename="dingusfish.gif" - -R0lGODdhAAEAAfAAAP///wAAACwAAAAAAAEAAQAC/oSPqcvtD6OctNqLs968+w+G4kiW5omm6sq2 -7gvH8kzX9o3n+s73/g8MCofEovGITGICTKbyCV0FDNOo9SqpQqpOrJfXzTQj2vD3TGtqL+NtGQ2f -qTXmxzuOd7WXdcc9DyjU53ewFni4s0fGhdiYaEhGBelICTNoV1j5NUnFcrmUqemjNifJVWpaOqaI -oFq3SspZsSraE7sHq3jr1MZqWvi662vxV4tD+pvKW6aLDOCLyur8PDwbanyDeq0N3DctbQYeLDvR -RY6t95m6UB0d3mwIrV7e2VGNvjjffukeJp4w7F65KecGFsTHQGAygOrgrWs1jt28Rc88KESYcGLA -/obvTkH6p+CinWJiJmIMqXGQwH/y4qk0SYjgQTczT3ajKZGfuI0uJ4kkVI/DT5s3/ejkxI0aT4Y+ -YTYgWbImUaXk9nlLmnSh1qJiJFl0OpUqRK4oOy7NyRQtHWofhoYVxkwWXKUSn0YsS+fUV6lhqfYb -6ayd3Z5qQdG1B7bvQzaJjwUV2lixMUZ7JVsOlfjWVr/3NB/uFvnySBN6Dcb6rGwaRM3wsormw5cC -M9NxWy/bWdufudCvy8bOAjXjVVwta/uO21sE5RHBCzNFXtgq9ORtH4eYjVP4Yryo026nvkFmCeyA -B29efV6ravCMK5JwWd5897Qrx7ll38o6iHDZ/rXPR//feevhF4l7wjUGX3xq1eeRfM4RSJGBIV1D -z1gKPkfWag3mVBVvva1RlX5bAJTPR/2YqNtw/FkIYYEi/pIZiAdpcxpoHtmnYYoZtvhUftzdx5ZX -JSKDW405zkGcZzzGZ6KEv4FI224oDmijlEf+xp6MJK5ojY/ASeVUR+wsKRuJ+XFZ5o7ZeEime8t1 -ouUsU6YjF5ZtUihhkGfCdFQLWQFJ3UXxmElfhQnR+eCdcDbkFZp6vTRmj56ApCihn5QGpaToNZmR -n3NVSpZcQpZ2KEONusaiCsKAug0wkQbJSFO+PTSjneGxOuFjPlUk3ovWvdIerjUg9ZGIOtGq/qeX -eCYrrCX+1UPsgTKGGRSbzd5q156d/gpfbJxe66eD5iQKrXj7RGgruGxs62qebBHUKS32CKluCiqZ -qh+pmehmEb71noAUoe5e9Zm17S7773V10pjrtG4CmuurCV/n6zLK5turWNhqOvFXbjhZrMD0YhKe -wR0zOyuvsh6MWrGoIuzvyWu5y1WIFAqmJselypxXh6dKLNOKEB98L88bS2rkNqqlKzCNJp9c0G0j -Gzh0iRrCbHSXmPR643QS+4rWhgFmnSbSuXCjS0xAOWkU2UdLqyuUNfHSFdUouy3bm5i5GnDM3tG8 -doJ4r5tqu3pPbRSVfvs8uJzeNXhp3n4j/tZ42SwH7eaWUUOjc3qFV9453UHTXZfcLH+OeNs5g36x -lBnHvTm7EbMbLeuaLncao8vWCXimfo1o+843Ak6y4ChNeGntvAYvfLK4ezmoyNIbNCLTCXO9ZV3A -E8/s88RczPzDwI4Ob7XZyl7+9Miban29h+tJZPrE21wgvBphDfrrfPdCTPKJD/y98L1rZwHcV6Jq -Zab0metpuNIX/qAFPoz171WUaUb4HAhBSzHuHfjzHb3kha/2Cctis/ORArVHNYfFyYRH2pYIRzic -isVOfPWD1b6mRTqpCRBozzof6UZVvFXRxWIr3GGrEviGYgyPMfahheiSaLs/9QeFu7oZ/ndSY8DD -ya9x+uPed+7mxN2IzIISBOMLFYWVqC3Pew1T2nFuuCiwZS5/v6II10i4t1OJcUH2U9zxKodHsGGv -Oa+zkvNUYUOa/TCCRutF9MzDwdlUMJADTCGSbDQ5OV4PTamDoPEi6Ecc/RF5RWwkcdSXvSOaDWSn -I9LlvubFTQpuc6JKXLcKeb+xdbKRBnwREemXyjg6ME65aJiOuBgrktzykfPLJBKR9ClMavJ62/Ff -BlNIyod9yX9wcSXexnXFpvkrbXk64xsx5Db7wXKP5fSgsvwIMM/9631VLBfkmtbHRXpqmtei52hG -pUwSlo+BASQoeILDOBgREECxBBh5/iYmNsQ9dIv5+OI++QkqdsJPc3uykz5fkM+OraeekcQF7X4n -B5S67za5U967PmooGQhUXfF7afXyCD7ONdRe17QogYjVx38uLwtrS6nhTnm15LQUnu9E2uK6CNI/ -1HOABj0ESwOjut4FEpFQpdNAm4K2LHnDWHNcmKB2ioKBogysVZtMO2nSxUdZ8Yk2kJc7URioLVI0 -YgmtIwZj4LoeKemgnOnbUdGnzZ4Oa6scqiolBGqS6RgWNLu0RMhcaE6rhhU4hiuqFXPAG8fGwTPW -FKeLMtdVmXLSs5YJGF/YeVm7rREMlY3UYE+yCxbaMXX8y15m5zVHq6GOKDMynzII/jdUHdyVqIy0 -ifX2+r/EgtZcvRzSb72gU9ui87M2VecjKildW/aFqaYhKoryUjfB/g4qtyVuc60xFDGmCxwjW+qu -zjuwl2GkOWn66+3QiiEctvd04OVvcCVzjgT7lrkvjVGKKHmmlDUKowSeikb5kK/mJReuWOxONx+s -ULsl+Lqb0CVn0SrVyJ6wt4t6yTeSCafhPhAf0OXn6L60UMxiLolFAtmN35S2Ob1lZpQ1r/n0Qb5D -oQ1zJiRVDgF8N3Q8TYfbi3DyWCy3lT1nxyBs6FT3S2GOzWRlxwKvlRP0RPJA9SjxEy0UoEnkA+M4 -cnzLMJrBGWLFEaaUb5lvpqbq/loOaU5+DFuHPxo82/OZuM8FXG3oVNZhtWpMpb/0Xu5m/LfLhHZQ -7yuVI0MqZ7NE43imC8jH3IwGZlbPm0xkJYs7+2U48hXTsFSMqgGDvai0kLxyynKNT/waj+q1c1tz -GjOpPBgdCSq3UKZxCSsqFIY+O6JbAWGWcV1pwqLyj5sGqCF1xb1F3varUWqrJv6cN3PrUXzijtfZ -FshpBL3Xwr4GIPvU2N8EjrJgS1zl21rbXQMXeXc5jjFyrhpCzijSv/RQtyPSzHCFMhlME95fHglt -pRsX+dfSQjUeHAlpWzJ5iOo79Ldnaxai6bXTcGO3fp07ri7HLEmXXPlYi8bv/qVxvNcdra6m7Rlb -6JBTb5fd66VhFRjGArh2n7R1rDW4P5NOT9K0I183T2scYkeZ3q/VFyLb09U9ajzXBS8Kgkhc4mBS -kYY9cy3Vy9lUnuNJH8HGIclUilwnBtjUOH0gteGOZ4c/XNrhXLSYDyxfnD8z1pDy7rYRvDolhnbe -UMzxCZUs40s6s7UIvBnLgc0+vKuOkIXeOrDymlp+Zxra4MZLBbVrqD/jTJ597pDmnw5c4+DbyB88 -9Cg9DodYcSuMZT/114pptqc/EuTjRPvH/z5slzI3tluOEBBLqOXLOX+0I5929tO97wkvl/atCz+y -xJrdwteW2FNW/NSmBP+f/maYtVs/bYyBC7Ox3jsYZHL05CIrBa/nS+b3bHfiYm4Ueil1YZZSgAUI -fFZ1dxUmeA2oQRQ3RuGXNGLFV9/XbGFGPV6kfzk1TBBCd+izc7q1H+OHMJwmaBX2IQNYVAKHYepV -SSGCe6CnbYHHETKGNe43EDvFgZr0gB/nVHPHZ80VV1ojOiI3XDvYIkl4ayo4bxQIgrFXWTvBI0nH -VElWMuw2aLUWCRHHf8ymVCHjFlJnOSojfevCYyyyZDH0IcvHhrsnQ5O1OsWzONuVVKIxSxiFZ/tR -fKDAf6xFTnw4O9Qig2VCfW2hJQrmMOuHW0W3dLQmCMO2ccdUd/xyfflH/olTiHZVdGwb8nIwRzSE -J15jFlOJuBZBZ4CiyHyd2IFylFlB+HgHhYabhWOGwYO1ZH/Og1dtQlFMk352CGRSIFTapnWQEUtN -l4zv8S0aaCFDyGCBqDUxZYpxGHX01y/JuH1xhn7TOCnNCI4eKDs5WGX4R425F4vF1o3BJ4vO0otq -I3rimI7jJY1jISqnBxknCIvruF83mF5wN4X7qGLIhR8A2Vg0yFERSIXn9Vv3GHy3Vj/WIkKddlYi -yIMv2I/VMjTLpW7pt05SWIZR0RPyxpB4SIUM9lBPGBl0GC7oSEEwRYLe4pJpZY2P0zbI1n+Oc44w -qY3PUnmF0ixjVpDD/mJ9wpOBGTVgXlaCaZiPcIWK5NiKBIiPdGaQ0TWGvAiG7nMchdZb7Vgf8zNi -MuMyzRdy/lePe9iC4TRx7WhhOQI/QiSVNAmAa2lT/piFbuh7ofJoYSZzrSZ1bvmWw3eN2nKUPVky -uPN5/VRfohRd0VYZoqhKIlU6TXYhJxmPUIloAwc1bPmHEpaZYZORHNlXUJM07hATwHR8MJYqkwWR -WaIezFhxSFlc8/Fq82hEnpeRozg3ULhhr9lAGtVEkCg5ZNRuuVleBPaZadhG0ZgkyPmDOTOKzViM -YgOcpukKqQcbjAWS0IleQ2ROjdh6A+md1qWdBRSX7iSYgFRTtRmBpJioieXJiHfJiMGIR9fJOn8I -MSfXYhspn4ooSa2mSAj4n+8Bmg03fBJZoPOJgsVZRxu1oOMRPXYYjdqjihFaEoZpXBREanuJoRI6 -cibFinq4ngUKh/wQd/H5ofYCZ0HJXR62opZFaAT0iFIZo4DIiUojkjeqKiuoZirKo5Y1a7AWckGa -BkuYoD5lpDK6eUs6CkDqpETwl1EqpfhJpVeKpVl6EgUAADs= - ---BOUNDARY-- - ---OUTER-- diff --git a/Lib/email/test/data/msg_14.txt b/Lib/email/test/data/msg_14.txt deleted file mode 100644 index 5d98d2f..0000000 --- a/Lib/email/test/data/msg_14.txt +++ /dev/null @@ -1,23 +0,0 @@ -Return-Path: <bbb@zzz.org> -Delivered-To: bbb@zzz.org -Received: by mail.zzz.org (Postfix, from userid 889) - id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) -MIME-Version: 1.0 -Content-Type: text; charset=us-ascii -Content-Transfer-Encoding: 7bit -Message-ID: <15090.61304.110929.45684@aaa.zzz.org> -From: bbb@ddd.com (John X. Doe) -To: bbb@zzz.org -Subject: This is a test message -Date: Fri, 4 May 2001 14:05:44 -0400 - - -Hi, - -I'm sorry but I'm using a drainbread ISP, which although big and -wealthy can't seem to generate standard compliant email. :( - -This message has a Content-Type: header with no subtype. I hope you -can still read it. - --Me diff --git a/Lib/email/test/data/msg_15.txt b/Lib/email/test/data/msg_15.txt deleted file mode 100644 index 0025624..0000000 --- a/Lib/email/test/data/msg_15.txt +++ /dev/null @@ -1,52 +0,0 @@ -Return-Path: <xx@xx.dk> -Received: from fepD.post.tele.dk (195.41.46.149) by mail.groupcare.dk (LSMTP for Windows NT v1.1b) with SMTP id <0.0014F8A2@mail.groupcare.dk>; Mon, 30 Apr 2001 12:17:50 +0200 -User-Agent: Microsoft-Outlook-Express-Macintosh-Edition/5.02.2106 -Subject: XX -From: xx@xx.dk -To: XX -Message-ID: <xxxx> -Mime-version: 1.0 -Content-type: multipart/mixed; - boundary="MS_Mac_OE_3071477847_720252_MIME_Part" - -> Denne meddelelse er i MIME-format. Da dit postl - ---MS_Mac_OE_3071477847_720252_MIME_Part -Content-type: multipart/alternative; - boundary="MS_Mac_OE_3071477847_720252_MIME_Part" - - ---MS_Mac_OE_3071477847_720252_MIME_Part -Content-type: text/plain; charset="ISO-8859-1" -Content-transfer-encoding: quoted-printable - -Some removed test. - ---MS_Mac_OE_3071477847_720252_MIME_Part -Content-type: text/html; charset="ISO-8859-1" -Content-transfer-encoding: quoted-printable - -<HTML> -<HEAD> -<TITLE>Some removed HTML</TITLE> -</HEAD> -<BODY> -Some removed text. -</BODY> -</HTML> - - ---MS_Mac_OE_3071477847_720252_MIME_Part-- - - ---MS_Mac_OE_3071477847_720252_MIME_Part -Content-type: image/gif; name="xx.gif"; - x-mac-creator="6F676C65"; - x-mac-type="47494666" -Content-disposition: attachment -Content-transfer-encoding: base64 - -Some removed base64 encoded chars. - ---MS_Mac_OE_3071477847_720252_MIME_Part-- - diff --git a/Lib/email/test/data/msg_16.txt b/Lib/email/test/data/msg_16.txt deleted file mode 100644 index 56167e9..0000000 --- a/Lib/email/test/data/msg_16.txt +++ /dev/null @@ -1,123 +0,0 @@ -Return-Path: <> -Delivered-To: scr-admin@socal-raves.org -Received: from cougar.noc.ucla.edu (cougar.noc.ucla.edu [169.232.10.18]) - by babylon.socal-raves.org (Postfix) with ESMTP id CCC2C51B84 - for <scr-admin@socal-raves.org>; Sun, 23 Sep 2001 20:13:54 -0700 (PDT) -Received: from sims-ms-daemon by cougar.noc.ucla.edu - (Sun Internet Mail Server sims.3.5.2000.03.23.18.03.p10) - id <0GK500B01D0B8Y@cougar.noc.ucla.edu> for scr-admin@socal-raves.org; Sun, - 23 Sep 2001 20:14:35 -0700 (PDT) -Received: from cougar.noc.ucla.edu - (Sun Internet Mail Server sims.3.5.2000.03.23.18.03.p10) - id <0GK500B01D0B8X@cougar.noc.ucla.edu>; Sun, 23 Sep 2001 20:14:35 -0700 (PDT) -Date: Sun, 23 Sep 2001 20:14:35 -0700 (PDT) -From: Internet Mail Delivery <postmaster@ucla.edu> -Subject: Delivery Notification: Delivery has failed -To: scr-admin@socal-raves.org -Message-id: <0GK500B04D0B8X@cougar.noc.ucla.edu> -MIME-version: 1.0 -Sender: scr-owner@socal-raves.org -Errors-To: scr-owner@socal-raves.org -X-BeenThere: scr@socal-raves.org -X-Mailman-Version: 2.1a3 -Precedence: bulk -List-Help: <mailto:scr-request@socal-raves.org?subject=help> -List-Post: <mailto:scr@socal-raves.org> -List-Subscribe: <http://socal-raves.org/mailman/listinfo/scr>, - <mailto:scr-request@socal-raves.org?subject=subscribe> -List-Id: SoCal-Raves <scr.socal-raves.org> -List-Unsubscribe: <http://socal-raves.org/mailman/listinfo/scr>, - <mailto:scr-request@socal-raves.org?subject=unsubscribe> -List-Archive: <http://socal-raves.org/mailman/private/scr/> -Content-Type: multipart/report; boundary="Boundary_(ID_PGS2F2a+z+/jL7hupKgRhA)" - - ---Boundary_(ID_PGS2F2a+z+/jL7hupKgRhA) -Content-type: text/plain; charset=ISO-8859-1 - -This report relates to a message you sent with the following header fields: - - Message-id: <002001c144a6$8752e060$56104586@oxy.edu> - Date: Sun, 23 Sep 2001 20:10:55 -0700 - From: "Ian T. Henry" <henryi@oxy.edu> - To: SoCal Raves <scr@socal-raves.org> - Subject: [scr] yeah for Ians!! - -Your message cannot be delivered to the following recipients: - - Recipient address: jangel1@cougar.noc.ucla.edu - Reason: recipient reached disk quota - - ---Boundary_(ID_PGS2F2a+z+/jL7hupKgRhA) -Content-type: message/DELIVERY-STATUS - -Original-envelope-id: 0GK500B4HD0888@cougar.noc.ucla.edu -Reporting-MTA: dns; cougar.noc.ucla.edu - -Action: failed -Status: 5.0.0 (recipient reached disk quota) -Original-recipient: rfc822;jangel1@cougar.noc.ucla.edu -Final-recipient: rfc822;jangel1@cougar.noc.ucla.edu - ---Boundary_(ID_PGS2F2a+z+/jL7hupKgRhA) -Content-type: MESSAGE/RFC822 - -Return-path: scr-admin@socal-raves.org -Received: from sims-ms-daemon by cougar.noc.ucla.edu - (Sun Internet Mail Server sims.3.5.2000.03.23.18.03.p10) - id <0GK500B01D0B8X@cougar.noc.ucla.edu>; Sun, 23 Sep 2001 20:14:35 -0700 (PDT) -Received: from panther.noc.ucla.edu by cougar.noc.ucla.edu - (Sun Internet Mail Server sims.3.5.2000.03.23.18.03.p10) - with ESMTP id <0GK500B4GD0888@cougar.noc.ucla.edu> for jangel1@sims-ms-daemon; - Sun, 23 Sep 2001 20:14:33 -0700 (PDT) -Received: from babylon.socal-raves.org - (ip-209-85-222-117.dreamhost.com [209.85.222.117]) - by panther.noc.ucla.edu (8.9.1a/8.9.1) with ESMTP id UAA09793 for - <jangel1@ucla.edu>; Sun, 23 Sep 2001 20:14:32 -0700 (PDT) -Received: from babylon (localhost [127.0.0.1]) by babylon.socal-raves.org - (Postfix) with ESMTP id D3B2951B70; Sun, 23 Sep 2001 20:13:47 -0700 (PDT) -Received: by babylon.socal-raves.org (Postfix, from userid 60001) - id A611F51B82; Sun, 23 Sep 2001 20:13:46 -0700 (PDT) -Received: from tiger.cc.oxy.edu (tiger.cc.oxy.edu [134.69.3.112]) - by babylon.socal-raves.org (Postfix) with ESMTP id ADA7351B70 for - <scr@socal-raves.org>; Sun, 23 Sep 2001 20:13:44 -0700 (PDT) -Received: from ent (n16h86.dhcp.oxy.edu [134.69.16.86]) - by tiger.cc.oxy.edu (8.8.8/8.8.8) with SMTP id UAA08100 for - <scr@socal-raves.org>; Sun, 23 Sep 2001 20:14:24 -0700 (PDT) -Date: Sun, 23 Sep 2001 20:10:55 -0700 -From: "Ian T. Henry" <henryi@oxy.edu> -Subject: [scr] yeah for Ians!! -Sender: scr-admin@socal-raves.org -To: SoCal Raves <scr@socal-raves.org> -Errors-to: scr-admin@socal-raves.org -Message-id: <002001c144a6$8752e060$56104586@oxy.edu> -MIME-version: 1.0 -X-Mailer: Microsoft Outlook Express 5.50.4522.1200 -Content-type: text/plain; charset=us-ascii -Precedence: bulk -Delivered-to: scr-post@babylon.socal-raves.org -Delivered-to: scr@socal-raves.org -X-Converted-To-Plain-Text: from multipart/alternative by demime 0.98e -X-Converted-To-Plain-Text: Alternative section used was text/plain -X-BeenThere: scr@socal-raves.org -X-Mailman-Version: 2.1a3 -List-Help: <mailto:scr-request@socal-raves.org?subject=help> -List-Post: <mailto:scr@socal-raves.org> -List-Subscribe: <http://socal-raves.org/mailman/listinfo/scr>, - <mailto:scr-request@socal-raves.org?subject=subscribe> -List-Id: SoCal-Raves <scr.socal-raves.org> -List-Unsubscribe: <http://socal-raves.org/mailman/listinfo/scr>, - <mailto:scr-request@socal-raves.org?subject=unsubscribe> -List-Archive: <http://socal-raves.org/mailman/private/scr/> - -I always love to find more Ian's that are over 3 years old!! - -Ian -_______________________________________________ -For event info, list questions, or to unsubscribe, see http://www.socal-raves.org/ - - - ---Boundary_(ID_PGS2F2a+z+/jL7hupKgRhA)-- - diff --git a/Lib/email/test/data/msg_17.txt b/Lib/email/test/data/msg_17.txt deleted file mode 100644 index 8d86e41..0000000 --- a/Lib/email/test/data/msg_17.txt +++ /dev/null @@ -1,12 +0,0 @@ -MIME-Version: 1.0 -From: Barry <barry@digicool.com> -To: Dingus Lovers <cravindogs@cravindogs.com> -Subject: Here is your dingus fish -Date: Fri, 20 Apr 2001 19:35:02 -0400 -Content-Type: multipart/mixed; boundary="BOUNDARY" - -Hi there, - -This is the dingus fish. - -[Non-text (image/gif) part of message omitted, filename dingusfish.gif] diff --git a/Lib/email/test/data/msg_18.txt b/Lib/email/test/data/msg_18.txt deleted file mode 100644 index f9f4904..0000000 --- a/Lib/email/test/data/msg_18.txt +++ /dev/null @@ -1,6 +0,0 @@ -Content-Type: text/plain; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals"; - spooge="yummy"; hippos="gargantuan"; marshmallows="gooey" - diff --git a/Lib/email/test/data/msg_19.txt b/Lib/email/test/data/msg_19.txt deleted file mode 100644 index 49bf7fc..0000000 --- a/Lib/email/test/data/msg_19.txt +++ /dev/null @@ -1,43 +0,0 @@ -Send Ppp mailing list submissions to - ppp@zzz.org - -To subscribe or unsubscribe via the World Wide Web, visit - http://www.zzz.org/mailman/listinfo/ppp -or, via email, send a message with subject or body 'help' to - ppp-request@zzz.org - -You can reach the person managing the list at - ppp-admin@zzz.org - -When replying, please edit your Subject line so it is more specific -than "Re: Contents of Ppp digest..." - -Today's Topics: - - 1. testing #1 (Barry A. Warsaw) - 2. testing #2 (Barry A. Warsaw) - 3. testing #3 (Barry A. Warsaw) - 4. testing #4 (Barry A. Warsaw) - 5. testing #5 (Barry A. Warsaw) - -hello - - -hello - - -hello - - -hello - - -hello - - - -_______________________________________________ -Ppp mailing list -Ppp@zzz.org -http://www.zzz.org/mailman/listinfo/ppp - diff --git a/Lib/email/test/data/msg_20.txt b/Lib/email/test/data/msg_20.txt deleted file mode 100644 index 1a6a887..0000000 --- a/Lib/email/test/data/msg_20.txt +++ /dev/null @@ -1,22 +0,0 @@ -Return-Path: <bbb@zzz.org> -Delivered-To: bbb@zzz.org -Received: by mail.zzz.org (Postfix, from userid 889) - id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) -MIME-Version: 1.0 -Content-Type: text/plain; charset=us-ascii -Content-Transfer-Encoding: 7bit -Message-ID: <15090.61304.110929.45684@aaa.zzz.org> -From: bbb@ddd.com (John X. Doe) -To: bbb@zzz.org -Cc: ccc@zzz.org -CC: ddd@zzz.org -cc: eee@zzz.org -Subject: This is a test message -Date: Fri, 4 May 2001 14:05:44 -0400 - - -Hi, - -Do you like this message? - --Me diff --git a/Lib/email/test/data/msg_21.txt b/Lib/email/test/data/msg_21.txt deleted file mode 100644 index 23590b2..0000000 --- a/Lib/email/test/data/msg_21.txt +++ /dev/null @@ -1,20 +0,0 @@ -From: aperson@dom.ain -To: bperson@dom.ain -Subject: Test -Content-Type: multipart/mixed; boundary="BOUNDARY" - -MIME message ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit - -One ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit - -Two ---BOUNDARY-- -End of MIME message diff --git a/Lib/email/test/data/msg_22.txt b/Lib/email/test/data/msg_22.txt deleted file mode 100644 index af9de5f..0000000 --- a/Lib/email/test/data/msg_22.txt +++ /dev/null @@ -1,46 +0,0 @@ -Mime-Version: 1.0 -Message-Id: <a05001902b7f1c33773e9@[134.84.183.138]> -Date: Tue, 16 Oct 2001 13:59:25 +0300 -To: a@example.com -From: b@example.com -Content-Type: multipart/mixed; boundary="============_-1208892523==_============" - ---============_-1208892523==_============ -Content-Type: text/plain; charset="us-ascii" ; format="flowed" - -Text text text. ---============_-1208892523==_============ -Content-Id: <a05001902b7f1c33773e9@[134.84.183.138].0.0> -Content-Type: image/jpeg; name="wibble.JPG" - ; x-mac-type="4A504547" - ; x-mac-creator="474B4F4E" -Content-Disposition: attachment; filename="wibble.JPG" -Content-Transfer-Encoding: base64 - -/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB -AQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/wAALCAXABIEBAREA -g6bCjjw/pIZSjO6FWFpldjySOmCNrO7DBZibUXhTwtCixw+GtAijVdqxxaPp0aKvmGXa -qrbBQvms0mAMeYS/3iTV1dG0hHaRNK01XblnWxtVdjkHLMIgTyqnk9VB7CrP2KzIINpa -4O7I+zxYO9WV8jZg71Zlb+8rMDkEirAVQFAUAKAFAAAUAYAUDgADgY6DjpRtXj5RxjHA -4wQRj0wQCMdCAewpaKKK/9k= ---============_-1208892523==_============ -Content-Id: <a05001902b7f1c33773e9@[134.84.183.138].0.1> -Content-Type: image/jpeg; name="wibble2.JPG" - ; x-mac-type="4A504547" - ; x-mac-creator="474B4F4E" -Content-Disposition: attachment; filename="wibble2.JPG" -Content-Transfer-Encoding: base64 - -/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB -AQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/wAALCAXABJ0BAREA -/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQA -W6NFJJBEkU10kKGTcWMDwxuU+0JHvk8qAtOpNwqSR0n8c3BlDyXHlqsUltHEiTvdXLxR -7vMiGDNJAJWkAMk8ZkCFp5G2oo5W++INrbQtNfTQxJAuXlupz9oS4d5Y1W+E2XlWZJJE -Y7LWYQxTLE1zuMbfBPxw8X2fibVdIbSbI6nLZxX635t9TjtYreWR7WGKJTLJFFKSlozO -0ShxIXM43uC3/9k= ---============_-1208892523==_============ -Content-Type: text/plain; charset="us-ascii" ; format="flowed" - -Text text text. ---============_-1208892523==_============-- - diff --git a/Lib/email/test/data/msg_23.txt b/Lib/email/test/data/msg_23.txt deleted file mode 100644 index bb2e8ec..0000000 --- a/Lib/email/test/data/msg_23.txt +++ /dev/null @@ -1,8 +0,0 @@ -From: aperson@dom.ain -Content-Type: multipart/mixed; boundary="BOUNDARY" - ---BOUNDARY -Content-Type: text/plain - -A message part ---BOUNDARY-- diff --git a/Lib/email/test/data/msg_24.txt b/Lib/email/test/data/msg_24.txt deleted file mode 100644 index 4e52339..0000000 --- a/Lib/email/test/data/msg_24.txt +++ /dev/null @@ -1,10 +0,0 @@ -Content-Type: multipart/mixed; boundary="BOUNDARY" -MIME-Version: 1.0 -Subject: A subject -To: aperson@dom.ain -From: bperson@dom.ain - ---BOUNDARY - - ---BOUNDARY-- diff --git a/Lib/email/test/data/msg_25.txt b/Lib/email/test/data/msg_25.txt deleted file mode 100644 index 9e35275..0000000 --- a/Lib/email/test/data/msg_25.txt +++ /dev/null @@ -1,117 +0,0 @@ -From MAILER-DAEMON Fri Apr 06 16:46:09 2001 -Received: from [204.245.199.98] (helo=zinfandel.lacita.com) - by www.linux.org.uk with esmtp (Exim 3.13 #1) - id 14lYR6-0008Iv-00 - for linuxuser-admin@www.linux.org.uk; Fri, 06 Apr 2001 16:46:09 +0100 -Received: from localhost (localhost) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with internal id JAB03225; Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800) -Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800) -From: Mail Delivery Subsystem <MAILER-DAEMON@zinfandel.lacita.com> -Subject: Returned mail: Too many hops 19 (17 max): from <linuxuser-admin@www.linux.org.uk> via [199.164.235.226], to <scoffman@wellpartner.com> -Message-Id: <200104061723.JAB03225@zinfandel.lacita.com> -To: <linuxuser-admin@www.linux.org.uk> -To: postmaster@zinfandel.lacita.com -MIME-Version: 1.0 -Content-Type: multipart/report; report-type=delivery-status; - bo -Auto-Submitted: auto-generated (failure) - -This is a MIME-encapsulated message - ---JAB03225.986577786/zinfandel.lacita.com - -The original message was received at Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800) -from [199.164.235.226] - - ----- The following addresses have delivery notifications ----- -<scoffman@wellpartner.com> (unrecoverable error) - - ----- Transcript of session follows ----- -554 Too many hops 19 (17 max): from <linuxuser-admin@www.linux.org.uk> via [199.164.235.226], to <scoffman@wellpartner.com> - ---JAB03225.986577786/zinfandel.lacita.com -Content-Type: message/delivery-status - -Reporting-MTA: dns; zinfandel.lacita.com -Received-From-MTA: dns; [199.164.235.226] -Arrival-Date: Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800) - -Final-Recipient: rfc822; scoffman@wellpartner.com -Action: failed -Status: 5.4.6 -Last-Attempt-Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800) - ---JAB03225.986577786/zinfandel.lacita.com -Content-Type: text/rfc822-headers - -Return-Path: linuxuser-admin@www.linux.org.uk -Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03225 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800) -Received: from zinfandel.lacita.com ([204.245.199.98]) - by - fo -Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03221 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:22:18 -0800 (GMT-0800) -Received: from zinfandel.lacita.com ([204.245.199.98]) - by - fo -Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03217 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:21:37 -0800 (GMT-0800) -Received: from zinfandel.lacita.com ([204.245.199.98]) - by - fo -Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03213 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:20:56 -0800 (GMT-0800) -Received: from zinfandel.lacita.com ([204.245.199.98]) - by - fo -Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03209 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:20:15 -0800 (GMT-0800) -Received: from zinfandel.lacita.com ([204.245.199.98]) - by - fo -Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03205 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:19:33 -0800 (GMT-0800) -Received: from zinfandel.lacita.com ([204.245.199.98]) - by - fo -Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03201 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:18:52 -0800 (GMT-0800) -Received: from zinfandel.lacita.com ([204.245.199.98]) - by - fo -Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03197 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:17:54 -0800 (GMT-0800) -Received: from www.linux.org.uk (parcelfarce.linux.theplanet.co.uk [195.92.249.252]) - by - fo -Received: from localhost.localdomain - ([ - by - id -Received: from [212.1.130.11] (helo=s1.uklinux.net ident=root) - by - id - fo -Received: from server (ppp-2-22.cvx4.telinco.net [212.1.149.22]) - by - fo -From: Daniel James <daniel@linuxuser.co.uk> -Organization: LinuxUser -To: linuxuser@www.linux.org.uk -X-Mailer: KMail [version 1.1.99] -Content-Type: text/plain; - c -MIME-Version: 1.0 -Message-Id: <01040616033903.00962@server> -Content-Transfer-Encoding: 8bit -Subject: [LinuxUser] bulletin no. 45 -Sender: linuxuser-admin@www.linux.org.uk -Errors-To: linuxuser-admin@www.linux.org.uk -X-BeenThere: linuxuser@www.linux.org.uk -X-Mailman-Version: 2.0.3 -Precedence: bulk -List-Help: <mailto:linuxuser-request@www.linux.org.uk?subject=help> -List-Post: <mailto:linuxuser@www.linux.org.uk> -List-Subscribe: <http://www.linux.org.uk/mailman/listinfo/linuxuser>, - <m -List-Id: bulletins from LinuxUser magazine <linuxuser.www.linux.org.uk> -List-Unsubscribe: <http://www.linux.org.uk/mailman/listinfo/linuxuser>, - <m -List-Archive: <http://www.linux.org.uk/pipermail/linuxuser/> -Date: Fri, 6 Apr 2001 16:03:39 +0100 - ---JAB03225.986577786/zinfandel.lacita.com-- - - diff --git a/Lib/email/test/data/msg_26.txt b/Lib/email/test/data/msg_26.txt deleted file mode 100644 index 58efaa9..0000000 --- a/Lib/email/test/data/msg_26.txt +++ /dev/null @@ -1,46 +0,0 @@ -Received: from xcar [192.168.0.2] by jeeves.wooster.local
- (SMTPD32-7.07 EVAL) id AFF92F0214; Sun, 12 May 2002 08:55:37 +0100
-Date: Sun, 12 May 2002 08:56:15 +0100
-From: Father Time <father.time@xcar.wooster.local>
-To: timbo@jeeves.wooster.local
-Subject: IMAP file test
-Message-ID: <6df65d354b.father.time@rpc.wooster.local>
-X-Organization: Home
-User-Agent: Messenger-Pro/2.50a (MsgServe/1.50) (RISC-OS/4.02) POPstar/2.03
-MIME-Version: 1.0
-Content-Type: multipart/mixed; boundary="1618492860--2051301190--113853680"
-Status: R
-X-UIDL: 319998302
-
-This message is in MIME format which your mailer apparently does not support.
-You either require a newer version of your software which supports MIME, or
-a separate MIME decoding utility. Alternatively, ask the sender of this
-message to resend it in a different format.
-
---1618492860--2051301190--113853680
-Content-Type: text/plain; charset=us-ascii
-
-Simple email with attachment.
-
-
---1618492860--2051301190--113853680
-Content-Type: application/riscos; name="clock.bmp,69c"; type=BMP;
- load=&fff69c4b; exec=&355dd4d1; access=&03
-Content-Disposition: attachment; filename="clock.bmp"
-Content-Transfer-Encoding: base64
-
-Qk12AgAAAAAAAHYAAAAoAAAAIAAAACAAAAABAAQAAAAAAAAAAADXDQAA1w0AAAAAAAAA
-AAAAAAAAAAAAiAAAiAAAAIiIAIgAAACIAIgAiIgAALu7uwCIiIgAERHdACLuIgAz//8A
-zAAAAN0R3QDu7iIA////AAAAAAAAAAAAAAAAAAAAAAAAAAi3AAAAAAAAADeAAAAAAAAA
-C3ADMzMzMANwAAAAAAAAAAAHMAAAAANwAAAAAAAAAACAMAd3zPfwAwgAAAAAAAAIAwd/
-f8x/f3AwgAAAAAAAgDB0x/f3//zPAwgAAAAAAAcHfM9////8z/AwAAAAAAiwd/f3////
-////A4AAAAAAcEx/f///////zAMAAAAAiwfM9////3///8zwOAAAAAcHf3////B/////
-8DAAAAALB/f3///wd3d3//AwAAAABwTPf//wCQAAD/zAMAAAAAsEx/f///B////8wDAA
-AAAHB39////wf/////AwAAAACwf39///8H/////wMAAAAIcHfM9///B////M8DgAAAAA
-sHTH///wf///xAMAAAAACHB3f3//8H////cDgAAAAAALB3zH//D//M9wMAAAAAAAgLB0
-z39///xHAwgAAAAAAAgLB3d3RHd3cDCAAAAAAAAAgLAHd0R3cAMIAAAAAAAAgAgLcAAA
-AAMwgAgAAAAACDAAAAu7t7cwAAgDgAAAAABzcIAAAAAAAAgDMwAAAAAAN7uwgAAAAAgH
-MzMAAAAACH97tzAAAAALu3c3gAAAAAAL+7tzDABAu7f7cAAAAAAACA+3MA7EQAv/sIAA
-AAAAAAAIAAAAAAAAAIAAAAAA
-
---1618492860--2051301190--113853680--
diff --git a/Lib/email/test/data/msg_27.txt b/Lib/email/test/data/msg_27.txt deleted file mode 100644 index d019176..0000000 --- a/Lib/email/test/data/msg_27.txt +++ /dev/null @@ -1,15 +0,0 @@ -Return-Path: <aperson@dom.ain> -Received: by mail.dom.ain (Postfix, from userid 889) - id B9D0AD35DB; Tue, 4 Jun 2002 21:46:59 -0400 (EDT) -Message-ID: <15613.28051.707126.569693@dom.ain> -Date: Tue, 4 Jun 2002 21:46:59 -0400 -MIME-Version: 1.0 -Content-Type: text/plain; charset=us-ascii -Content-Transfer-Encoding: 7bit -Subject: bug demonstration - 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 - more text -From: aperson@dom.ain (Anne P. Erson) -To: bperson@dom.ain (Barney P. Erson) - -test diff --git a/Lib/email/test/data/msg_28.txt b/Lib/email/test/data/msg_28.txt deleted file mode 100644 index 1e4824c..0000000 --- a/Lib/email/test/data/msg_28.txt +++ /dev/null @@ -1,25 +0,0 @@ -From: aperson@dom.ain -MIME-Version: 1.0 -Content-Type: multipart/digest; boundary=BOUNDARY - ---BOUNDARY -Content-Type: message/rfc822 - -Content-Type: text/plain; charset=us-ascii -To: aa@bb.org -From: cc@dd.org -Subject: ee - -message 1 - ---BOUNDARY -Content-Type: message/rfc822 - -Content-Type: text/plain; charset=us-ascii -To: aa@bb.org -From: cc@dd.org -Subject: ee - -message 2 - ---BOUNDARY-- diff --git a/Lib/email/test/data/msg_29.txt b/Lib/email/test/data/msg_29.txt deleted file mode 100644 index 1fab561..0000000 --- a/Lib/email/test/data/msg_29.txt +++ /dev/null @@ -1,22 +0,0 @@ -Return-Path: <bbb@zzz.org> -Delivered-To: bbb@zzz.org -Received: by mail.zzz.org (Postfix, from userid 889) - id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) -MIME-Version: 1.0 -Content-Type: text/plain; charset=us-ascii; - title*0*="us-ascii'en'This%20is%20even%20more%20"; - title*1*="%2A%2A%2Afun%2A%2A%2A%20"; - title*2="isn't it!" -Content-Transfer-Encoding: 7bit -Message-ID: <15090.61304.110929.45684@aaa.zzz.org> -From: bbb@ddd.com (John X. Doe) -To: bbb@zzz.org -Subject: This is a test message -Date: Fri, 4 May 2001 14:05:44 -0400 - - -Hi, - -Do you like this message? - --Me diff --git a/Lib/email/test/data/msg_30.txt b/Lib/email/test/data/msg_30.txt deleted file mode 100644 index 4334bb6..0000000 --- a/Lib/email/test/data/msg_30.txt +++ /dev/null @@ -1,23 +0,0 @@ -From: aperson@dom.ain -MIME-Version: 1.0 -Content-Type: multipart/digest; boundary=BOUNDARY - ---BOUNDARY - -Content-Type: text/plain; charset=us-ascii -To: aa@bb.org -From: cc@dd.org -Subject: ee - -message 1 - ---BOUNDARY - -Content-Type: text/plain; charset=us-ascii -To: aa@bb.org -From: cc@dd.org -Subject: ee - -message 2 - ---BOUNDARY-- diff --git a/Lib/email/test/data/msg_31.txt b/Lib/email/test/data/msg_31.txt deleted file mode 100644 index 1e58e56..0000000 --- a/Lib/email/test/data/msg_31.txt +++ /dev/null @@ -1,15 +0,0 @@ -From: aperson@dom.ain -MIME-Version: 1.0 -Content-Type: multipart/mixed; boundary=BOUNDARY_ - ---BOUNDARY -Content-Type: text/plain - -message 1 - ---BOUNDARY -Content-Type: text/plain - -message 2 - ---BOUNDARY-- diff --git a/Lib/email/test/data/msg_32.txt b/Lib/email/test/data/msg_32.txt deleted file mode 100644 index 07ec5af..0000000 --- a/Lib/email/test/data/msg_32.txt +++ /dev/null @@ -1,14 +0,0 @@ -Delivered-To: freebsd-isp@freebsd.org -Date: Tue, 26 Sep 2000 12:23:03 -0500 -From: Anne Person <aperson@example.com> -To: Barney Dude <bdude@example.com> -Subject: Re: Limiting Perl CPU Utilization... -Mime-Version: 1.0 -Content-Type: text/plain; charset*=ansi-x3.4-1968''us-ascii -Content-Disposition: inline -User-Agent: Mutt/1.3.8i -Sender: owner-freebsd-isp@FreeBSD.ORG -Precedence: bulk -X-Loop: FreeBSD.org - -Some message. diff --git a/Lib/email/test/data/msg_33.txt b/Lib/email/test/data/msg_33.txt deleted file mode 100644 index 042787a..0000000 --- a/Lib/email/test/data/msg_33.txt +++ /dev/null @@ -1,29 +0,0 @@ -Delivered-To: freebsd-isp@freebsd.org -Date: Wed, 27 Sep 2000 11:11:09 -0500 -From: Anne Person <aperson@example.com> -To: Barney Dude <bdude@example.com> -Subject: Re: Limiting Perl CPU Utilization... -Mime-Version: 1.0 -Content-Type: multipart/signed; micalg*=ansi-x3.4-1968''pgp-md5; - protocol*=ansi-x3.4-1968''application%2Fpgp-signature; - boundary*="ansi-x3.4-1968''EeQfGwPcQSOJBaQU" -Content-Disposition: inline -Sender: owner-freebsd-isp@FreeBSD.ORG -Precedence: bulk -X-Loop: FreeBSD.org - - ---EeQfGwPcQSOJBaQU -Content-Type: text/plain; charset*=ansi-x3.4-1968''us-ascii -Content-Disposition: inline -Content-Transfer-Encoding: quoted-printable - -part 1 - ---EeQfGwPcQSOJBaQU -Content-Type: text/plain -Content-Disposition: inline - -part 2 - ---EeQfGwPcQSOJBaQU-- diff --git a/Lib/email/test/data/msg_34.txt b/Lib/email/test/data/msg_34.txt deleted file mode 100644 index 055dfea..0000000 --- a/Lib/email/test/data/msg_34.txt +++ /dev/null @@ -1,19 +0,0 @@ -From: aperson@dom.ain -To: bperson@dom.ain -Content-Type: multipart/digest; boundary=XYZ - ---XYZ -Content-Type: text/plain - - -This is a text plain part that is counter to recommended practice in -RFC 2046, $5.1.5, but is not illegal - ---XYZ - -From: cperson@dom.ain -To: dperson@dom.ain - -A submessage - ---XYZ-- diff --git a/Lib/email/test/data/msg_35.txt b/Lib/email/test/data/msg_35.txt deleted file mode 100644 index be7d5a2..0000000 --- a/Lib/email/test/data/msg_35.txt +++ /dev/null @@ -1,4 +0,0 @@ -From: aperson@dom.ain -To: bperson@dom.ain -Subject: here's something interesting -counter to RFC 2822, there's no separating newline here diff --git a/Lib/email/test/data/msg_36.txt b/Lib/email/test/data/msg_36.txt deleted file mode 100644 index 5632c30..0000000 --- a/Lib/email/test/data/msg_36.txt +++ /dev/null @@ -1,40 +0,0 @@ -Mime-Version: 1.0 -Content-Type: Multipart/Mixed; Boundary="NextPart" -To: IETF-Announce:; -From: Internet-Drafts@ietf.org -Subject: I-D ACTION:draft-ietf-mboned-mix-00.txt -Date: Tue, 22 Dec 1998 16:55:06 -0500 - ---NextPart - -Blah blah blah - ---NextPart -Content-Type: Multipart/Alternative; Boundary="OtherAccess" - ---OtherAccess -Content-Type: Message/External-body; - access-type="mail-server"; - server="mailserv@ietf.org" - -Content-Type: text/plain -Content-ID: <19981222151406.I-D@ietf.org> - -ENCODING mime -FILE /internet-drafts/draft-ietf-mboned-mix-00.txt - ---OtherAccess -Content-Type: Message/External-body; - name="draft-ietf-mboned-mix-00.txt"; - site="ftp.ietf.org"; - access-type="anon-ftp"; - directory="internet-drafts" - -Content-Type: text/plain -Content-ID: <19981222151406.I-D@ietf.org> - - ---OtherAccess-- - ---NextPart-- - diff --git a/Lib/email/test/data/msg_37.txt b/Lib/email/test/data/msg_37.txt deleted file mode 100644 index 038d34a..0000000 --- a/Lib/email/test/data/msg_37.txt +++ /dev/null @@ -1,22 +0,0 @@ -Content-Type: multipart/mixed; boundary=ABCDE - ---ABCDE -Content-Type: text/x-one - -Blah - ---ABCDE ---ABCDE -Content-Type: text/x-two - -Blah - ---ABCDE ---ABCDE ---ABCDE ---ABCDE -Content-Type: text/x-two - -Blah - ---ABCDE-- diff --git a/Lib/email/test/data/msg_38.txt b/Lib/email/test/data/msg_38.txt deleted file mode 100644 index 006df81..0000000 --- a/Lib/email/test/data/msg_38.txt +++ /dev/null @@ -1,101 +0,0 @@ -MIME-Version: 1.0 -Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0" - -------- =_aaaaaaaaaa0 -Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa1" -Content-ID: <20592.1022586929.1@example.com> - -------- =_aaaaaaaaaa1 -Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa2" -Content-ID: <20592.1022586929.2@example.com> - -------- =_aaaaaaaaaa2 -Content-Type: text/plain -Content-ID: <20592.1022586929.3@example.com> -Content-Description: very tricky -Content-Transfer-Encoding: 7bit - - -Unlike the test test_nested-multiples-with-internal-boundary, this -piece of text not only contains the outer boundary tags -------- =_aaaaaaaaaa1 -and -------- =_aaaaaaaaaa0 -but puts them at the start of a line! And, to be even nastier, it -even includes a couple of end tags, such as this one: - -------- =_aaaaaaaaaa1-- - -and this one, which is from a multipart we haven't even seen yet! - -------- =_aaaaaaaaaa4-- - -This will, I'm sure, cause much breakage of MIME parsers. But, as -far as I can tell, it's perfectly legal. I have not yet ever seen -a case of this in the wild, but I've seen *similar* things. - - -------- =_aaaaaaaaaa2 -Content-Type: application/octet-stream -Content-ID: <20592.1022586929.4@example.com> -Content-Description: patch2 -Content-Transfer-Encoding: base64 - -XXX - -------- =_aaaaaaaaaa2-- - -------- =_aaaaaaaaaa1 -Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa3" -Content-ID: <20592.1022586929.6@example.com> - -------- =_aaaaaaaaaa3 -Content-Type: application/octet-stream -Content-ID: <20592.1022586929.7@example.com> -Content-Description: patch3 -Content-Transfer-Encoding: base64 - -XXX - -------- =_aaaaaaaaaa3 -Content-Type: application/octet-stream -Content-ID: <20592.1022586929.8@example.com> -Content-Description: patch4 -Content-Transfer-Encoding: base64 - -XXX - -------- =_aaaaaaaaaa3-- - -------- =_aaaaaaaaaa1 -Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa4" -Content-ID: <20592.1022586929.10@example.com> - -------- =_aaaaaaaaaa4 -Content-Type: application/octet-stream -Content-ID: <20592.1022586929.11@example.com> -Content-Description: patch5 -Content-Transfer-Encoding: base64 - -XXX - -------- =_aaaaaaaaaa4 -Content-Type: application/octet-stream -Content-ID: <20592.1022586929.12@example.com> -Content-Description: patch6 -Content-Transfer-Encoding: base64 - -XXX - -------- =_aaaaaaaaaa4-- - -------- =_aaaaaaaaaa1-- - -------- =_aaaaaaaaaa0 -Content-Type: text/plain; charset="us-ascii" -Content-ID: <20592.1022586929.15@example.com> - --- -It's never too late to have a happy childhood. - -------- =_aaaaaaaaaa0-- diff --git a/Lib/email/test/data/msg_39.txt b/Lib/email/test/data/msg_39.txt deleted file mode 100644 index 124b269..0000000 --- a/Lib/email/test/data/msg_39.txt +++ /dev/null @@ -1,83 +0,0 @@ -MIME-Version: 1.0 -Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0" - -------- =_aaaaaaaaaa0 -Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa1" -Content-ID: <20592.1022586929.1@example.com> - -------- =_aaaaaaaaaa1 -Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa1" -Content-ID: <20592.1022586929.2@example.com> - -------- =_aaaaaaaaaa1 -Content-Type: application/octet-stream -Content-ID: <20592.1022586929.3@example.com> -Content-Description: patch1 -Content-Transfer-Encoding: base64 - -XXX - -------- =_aaaaaaaaaa1 -Content-Type: application/octet-stream -Content-ID: <20592.1022586929.4@example.com> -Content-Description: patch2 -Content-Transfer-Encoding: base64 - -XXX - -------- =_aaaaaaaaaa1-- - -------- =_aaaaaaaaaa1 -Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa1" -Content-ID: <20592.1022586929.6@example.com> - -------- =_aaaaaaaaaa1 -Content-Type: application/octet-stream -Content-ID: <20592.1022586929.7@example.com> -Content-Description: patch3 -Content-Transfer-Encoding: base64 - -XXX - -------- =_aaaaaaaaaa1 -Content-Type: application/octet-stream -Content-ID: <20592.1022586929.8@example.com> -Content-Description: patch4 -Content-Transfer-Encoding: base64 - -XXX - -------- =_aaaaaaaaaa1-- - -------- =_aaaaaaaaaa1 -Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa1" -Content-ID: <20592.1022586929.10@example.com> - -------- =_aaaaaaaaaa1 -Content-Type: application/octet-stream -Content-ID: <20592.1022586929.11@example.com> -Content-Description: patch5 -Content-Transfer-Encoding: base64 - -XXX - -------- =_aaaaaaaaaa1 -Content-Type: application/octet-stream -Content-ID: <20592.1022586929.12@example.com> -Content-Description: patch6 -Content-Transfer-Encoding: base64 - -XXX - -------- =_aaaaaaaaaa1-- - -------- =_aaaaaaaaaa1-- - -------- =_aaaaaaaaaa0 -Content-Type: text/plain; charset="us-ascii" -Content-ID: <20592.1022586929.15@example.com> - --- -It's never too late to have a happy childhood. - -------- =_aaaaaaaaaa0-- diff --git a/Lib/email/test/data/msg_40.txt b/Lib/email/test/data/msg_40.txt deleted file mode 100644 index 1435fa1..0000000 --- a/Lib/email/test/data/msg_40.txt +++ /dev/null @@ -1,10 +0,0 @@ -MIME-Version: 1.0 -Content-Type: text/html; boundary="--961284236552522269" - -----961284236552522269 -Content-Type: text/html; -Content-Transfer-Encoding: 7Bit - -<html></html> - -----961284236552522269-- diff --git a/Lib/email/test/data/msg_41.txt b/Lib/email/test/data/msg_41.txt deleted file mode 100644 index 76cdd1c..0000000 --- a/Lib/email/test/data/msg_41.txt +++ /dev/null @@ -1,8 +0,0 @@ -From: "Allison Dunlap" <xxx@example.com> -To: yyy@example.com -Subject: 64423 -Date: Sun, 11 Jul 2004 16:09:27 -0300 -MIME-Version: 1.0 -Content-Type: multipart/alternative; - -Blah blah blah diff --git a/Lib/email/test/data/msg_42.txt b/Lib/email/test/data/msg_42.txt deleted file mode 100644 index a75f8f4..0000000 --- a/Lib/email/test/data/msg_42.txt +++ /dev/null @@ -1,20 +0,0 @@ -Content-Type: multipart/mixed; boundary="AAA" -From: Mail Delivery Subsystem <xxx@example.com> -To: yyy@example.com - -This is a MIME-encapsulated message - ---AAA - -Stuff - ---AAA -Content-Type: message/rfc822 - -From: webmaster@python.org -To: zzz@example.com -Content-Type: multipart/mixed; boundary="BBB" - ---BBB-- - ---AAA-- diff --git a/Lib/email/test/data/msg_43.txt b/Lib/email/test/data/msg_43.txt deleted file mode 100644 index 797d12c..0000000 --- a/Lib/email/test/data/msg_43.txt +++ /dev/null @@ -1,217 +0,0 @@ -From SRS0=aO/p=ON=bag.python.org=None@bounce2.pobox.com Fri Nov 26 21:40:36 2004 -X-VM-v5-Data: ([nil nil nil nil nil nil nil nil nil] - [nil nil nil nil nil nil nil "MAILER DAEMON <>" "MAILER DAEMON <>" nil nil "Banned file: auto__mail.python.bat in mail from you" "^From:" nil nil nil nil "Banned file: auto__mail.python.bat in mail from you" nil nil nil nil nil nil nil] - nil) -MIME-Version: 1.0 -Message-Id: <edab.7804f5cb8070@python.org> -Content-Type: multipart/report; report-type=delivery-status; - charset=utf-8; - boundary="----------=_1101526904-1956-5" -X-Virus-Scanned: by XS4ALL Virus Scanner -X-UIDL: 4\G!!!<c"!UV["!M7C!! -From: MAILER DAEMON <> -To: <webmaster@python.org> -Subject: Banned file: auto__mail.python.bat in mail from you -Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -This is a multi-part message in MIME format... - -------------=_1101526904-1956-5 -Content-Type: text/plain; charset="utf-8" -Content-Disposition: inline -Content-Transfer-Encoding: 7bit - -BANNED FILENAME ALERT - -Your message to: xxxxxxx@dot.ca.gov, xxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxx@dot.ca.gov, xxxxxx@dot.ca.gov, xxxxxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxx@dot.ca.gov, xxxxxxx@dot.ca.gov, xxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxx@dot.ca.gov, xxx@dot.ca.gov, xxxxxxx@dot.ca.gov, xxxxxxx@dot.ca.gov, xxxxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxx@dot.ca.gov, xxx@dot.ca.gov, xxxxxxxx@dot.ca.gov, xxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxxx@dot.ca.gov, xxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxx@dot.ca.gov, xxxxxxx@dot.ca.gov, xxxxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxxx@dot.ca.gov, xxxx@dot.ca.gov, xxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxxxxxxxx@dot.ca.gov -was blocked by our Spam Firewall. The email you sent with the following subject has NOT BEEN DELIVERED: - -Subject: Delivery_failure_notice - -An attachment in that mail was of a file type that the Spam Firewall is set to block. - - - -------------=_1101526904-1956-5 -Content-Type: message/delivery-status -Content-Disposition: inline -Content-Transfer-Encoding: 7bit -Content-Description: Delivery error report - -Reporting-MTA: dns; sacspam01.dot.ca.gov -Received-From-MTA: smtp; sacspam01.dot.ca.gov ([127.0.0.1]) -Arrival-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxxxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxxxxxxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxxxxxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -Final-Recipient: rfc822; xxxxxxx@dot.ca.gov -Action: failed -Status: 5.7.1 -Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat -Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST) - -------------=_1101526904-1956-5 -Content-Type: text/rfc822-headers -Content-Disposition: inline -Content-Transfer-Encoding: 7bit -Content-Description: Undelivered-message headers - -Received: from kgsav.org (ppp-70-242-162-63.dsl.spfdmo.swbell.net [70.242.162.63]) - by sacspam01.dot.ca.gov (Spam Firewall) with SMTP - id A232AD03DE3A; Fri, 26 Nov 2004 19:41:35 -0800 (PST) -From: webmaster@python.org -To: xxxxx@dot.ca.gov -Date: Sat, 27 Nov 2004 03:35:30 UTC -Subject: Delivery_failure_notice -Importance: Normal -X-Priority: 3 (Normal) -X-MSMail-Priority: Normal -Message-ID: <edab.7804f5cb8070@python.org> -MIME-Version: 1.0 -Content-Type: multipart/mixed; boundary="====67bd2b7a5.f99f7" -Content-Transfer-Encoding: 7bit - -------------=_1101526904-1956-5-- - diff --git a/Lib/email/test/data/msg_44.txt b/Lib/email/test/data/msg_44.txt deleted file mode 100644 index 15a2252..0000000 --- a/Lib/email/test/data/msg_44.txt +++ /dev/null @@ -1,33 +0,0 @@ -Return-Path: <barry@python.org> -Delivered-To: barry@python.org -Received: by mail.python.org (Postfix, from userid 889) - id C2BF0D37C6; Tue, 11 Sep 2001 00:05:05 -0400 (EDT) -MIME-Version: 1.0 -Content-Type: multipart/mixed; boundary="h90VIIIKmx" -Content-Transfer-Encoding: 7bit -Message-ID: <15261.36209.358846.118674@anthem.python.org> -From: barry@python.org (Barry A. Warsaw) -To: barry@python.org -Subject: a simple multipart -Date: Tue, 11 Sep 2001 00:05:05 -0400 -X-Mailer: VM 6.95 under 21.4 (patch 4) "Artificial Intelligence" XEmacs Lucid -X-Attribution: BAW -X-Oblique-Strategy: Make a door into a window - - ---h90VIIIKmx -Content-Type: text/plain; name="msg.txt" -Content-Transfer-Encoding: 7bit - -a simple kind of mirror -to reflect upon our own - ---h90VIIIKmx -Content-Type: text/plain; name="msg.txt" -Content-Transfer-Encoding: 7bit - -a simple kind of mirror -to reflect upon our own - ---h90VIIIKmx-- - diff --git a/Lib/email/test/data/msg_45.txt b/Lib/email/test/data/msg_45.txt deleted file mode 100644 index 58fde95..0000000 --- a/Lib/email/test/data/msg_45.txt +++ /dev/null @@ -1,33 +0,0 @@ -From: <foo@bar.baz> -To: <baz@bar.foo> -Subject: test -X-Long-Line: Some really long line contains a lot of text and thus has to be rewrapped because it is some - really long - line -MIME-Version: 1.0 -Content-Type: multipart/signed; boundary="borderline"; - protocol="application/pgp-signature"; micalg=pgp-sha1 - -This is an OpenPGP/MIME signed message (RFC 2440 and 3156) ---borderline -Content-Type: text/plain -X-Long-Line: Another really long line contains a lot of text and thus has to be rewrapped because it is another - really long - line - -This is the signed contents. - ---borderline -Content-Type: application/pgp-signature; name="signature.asc" -Content-Description: OpenPGP digital signature -Content-Disposition: attachment; filename="signature.asc" - ------BEGIN PGP SIGNATURE----- -Version: GnuPG v2.0.6 (GNU/Linux) - -iD8DBQFG03voRhp6o4m9dFsRApSZAKCCAN3IkJlVRg6NvAiMHlvvIuMGPQCeLZtj -FGwfnRHFBFO/S4/DKysm0lI= -=t7+s ------END PGP SIGNATURE----- - ---borderline-- diff --git a/Lib/email/test/data/msg_46.txt b/Lib/email/test/data/msg_46.txt deleted file mode 100644 index 1e22c4f..0000000 --- a/Lib/email/test/data/msg_46.txt +++ /dev/null @@ -1,23 +0,0 @@ -Return-Path: <sender@example.net> -Delivery-Date: Mon, 08 Feb 2010 14:05:16 +0100 -Received: from example.org (example.org [64.5.53.58]) - by example.net (node=mxbap2) with ESMTP (Nemesis) - id UNIQUE for someone@example.com; Mon, 08 Feb 2010 14:05:16 +0100 -Date: Mon, 01 Feb 2010 12:21:16 +0100 -From: "Sender" <sender@example.net> -To: <someone@example.com> -Subject: GroupwiseForwardingTest -Mime-Version: 1.0 -Content-Type: message/rfc822 - -Return-path: <sender@example.net> -Message-ID: <4B66B890.4070408@teconcept.de> -Date: Mon, 01 Feb 2010 12:18:40 +0100 -From: "Dr. Sender" <sender@example.net> -MIME-Version: 1.0 -To: "Recipient" <recipient@example.com> -Subject: GroupwiseForwardingTest -Content-Type: text/plain; charset=ISO-8859-15 -Content-Transfer-Encoding: 7bit - -Testing email forwarding with Groupwise 1.2.2010 diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py deleted file mode 100644 index 352b9b1..0000000 --- a/Lib/email/test/test_email.py +++ /dev/null @@ -1,4732 +0,0 @@ -# Copyright (C) 2001-2010 Python Software Foundation -# Contact: email-sig@python.org -# email package unit tests - -import os -import re -import sys -import time -import base64 -import difflib -import unittest -import warnings -import textwrap - -from io import StringIO, BytesIO -from itertools import chain - -import email - -from email.charset import Charset -from email.header import Header, decode_header, make_header -from email.parser import Parser, HeaderParser -from email.generator import Generator, DecodedGenerator, BytesGenerator -from email.message import Message -from email.mime.application import MIMEApplication -from email.mime.audio import MIMEAudio -from email.mime.text import MIMEText -from email.mime.image import MIMEImage -from email.mime.base import MIMEBase -from email.mime.message import MIMEMessage -from email.mime.multipart import MIMEMultipart -from email import utils -from email import errors -from email import encoders -from email import iterators -from email import base64mime -from email import quoprimime - -from test.support import findfile, run_unittest, unlink -from email.test import __file__ as landmark - - -NL = '\n' -EMPTYSTRING = '' -SPACE = ' ' - - - -def openfile(filename, *args, **kws): - path = os.path.join(os.path.dirname(landmark), 'data', filename) - return open(path, *args, **kws) - - - -# Base test class -class TestEmailBase(unittest.TestCase): - def ndiffAssertEqual(self, first, second): - """Like assertEqual except use ndiff for readable output.""" - if first != second: - sfirst = str(first) - ssecond = str(second) - rfirst = [repr(line) for line in sfirst.splitlines()] - rsecond = [repr(line) for line in ssecond.splitlines()] - diff = difflib.ndiff(rfirst, rsecond) - raise self.failureException(NL + NL.join(diff)) - - def _msgobj(self, filename): - with openfile(findfile(filename)) as fp: - return email.message_from_file(fp) - - - -# Test various aspects of the Message class's API -class TestMessageAPI(TestEmailBase): - def test_get_all(self): - eq = self.assertEqual - msg = self._msgobj('msg_20.txt') - eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org']) - eq(msg.get_all('xx', 'n/a'), 'n/a') - - def test_getset_charset(self): - eq = self.assertEqual - msg = Message() - eq(msg.get_charset(), None) - charset = Charset('iso-8859-1') - msg.set_charset(charset) - eq(msg['mime-version'], '1.0') - eq(msg.get_content_type(), 'text/plain') - eq(msg['content-type'], 'text/plain; charset="iso-8859-1"') - eq(msg.get_param('charset'), 'iso-8859-1') - eq(msg['content-transfer-encoding'], 'quoted-printable') - eq(msg.get_charset().input_charset, 'iso-8859-1') - # Remove the charset - msg.set_charset(None) - eq(msg.get_charset(), None) - eq(msg['content-type'], 'text/plain') - # Try adding a charset when there's already MIME headers present - msg = Message() - msg['MIME-Version'] = '2.0' - msg['Content-Type'] = 'text/x-weird' - msg['Content-Transfer-Encoding'] = 'quinted-puntable' - msg.set_charset(charset) - eq(msg['mime-version'], '2.0') - eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"') - eq(msg['content-transfer-encoding'], 'quinted-puntable') - - def test_set_charset_from_string(self): - eq = self.assertEqual - msg = Message() - msg.set_charset('us-ascii') - eq(msg.get_charset().input_charset, 'us-ascii') - eq(msg['content-type'], 'text/plain; charset="us-ascii"') - - def test_set_payload_with_charset(self): - msg = Message() - charset = Charset('iso-8859-1') - msg.set_payload('This is a string payload', charset) - self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1') - - def test_get_charsets(self): - eq = self.assertEqual - - msg = self._msgobj('msg_08.txt') - charsets = msg.get_charsets() - eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r']) - - msg = self._msgobj('msg_09.txt') - charsets = msg.get_charsets('dingbat') - eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat', - 'koi8-r']) - - msg = self._msgobj('msg_12.txt') - charsets = msg.get_charsets() - eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2', - 'iso-8859-3', 'us-ascii', 'koi8-r']) - - def test_get_filename(self): - eq = self.assertEqual - - msg = self._msgobj('msg_04.txt') - filenames = [p.get_filename() for p in msg.get_payload()] - eq(filenames, ['msg.txt', 'msg.txt']) - - msg = self._msgobj('msg_07.txt') - subpart = msg.get_payload(1) - eq(subpart.get_filename(), 'dingusfish.gif') - - def test_get_filename_with_name_parameter(self): - eq = self.assertEqual - - msg = self._msgobj('msg_44.txt') - filenames = [p.get_filename() for p in msg.get_payload()] - eq(filenames, ['msg.txt', 'msg.txt']) - - def test_get_boundary(self): - eq = self.assertEqual - msg = self._msgobj('msg_07.txt') - # No quotes! - eq(msg.get_boundary(), 'BOUNDARY') - - def test_set_boundary(self): - eq = self.assertEqual - # This one has no existing boundary parameter, but the Content-Type: - # header appears fifth. - msg = self._msgobj('msg_01.txt') - msg.set_boundary('BOUNDARY') - header, value = msg.items()[4] - eq(header.lower(), 'content-type') - eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"') - # This one has a Content-Type: header, with a boundary, stuck in the - # middle of its headers. Make sure the order is preserved; it should - # be fifth. - msg = self._msgobj('msg_04.txt') - msg.set_boundary('BOUNDARY') - header, value = msg.items()[4] - eq(header.lower(), 'content-type') - eq(value, 'multipart/mixed; boundary="BOUNDARY"') - # And this one has no Content-Type: header at all. - msg = self._msgobj('msg_03.txt') - self.assertRaises(errors.HeaderParseError, - msg.set_boundary, 'BOUNDARY') - - def test_make_boundary(self): - msg = MIMEMultipart('form-data') - # Note that when the boundary gets created is an implementation - # detail and might change. - self.assertEqual(msg.items()[0][1], 'multipart/form-data') - # Trigger creation of boundary - msg.as_string() - self.assertEqual(msg.items()[0][1][:33], - 'multipart/form-data; boundary="==') - # XXX: there ought to be tests of the uniqueness of the boundary, too. - - def test_message_rfc822_only(self): - # Issue 7970: message/rfc822 not in multipart parsed by - # HeaderParser caused an exception when flattened. - with openfile(findfile('msg_46.txt')) as fp: - msgdata = fp.read() - parser = HeaderParser() - msg = parser.parsestr(msgdata) - out = StringIO() - gen = Generator(out, True, 0) - gen.flatten(msg, False) - self.assertEqual(out.getvalue(), msgdata) - - def test_get_decoded_payload(self): - eq = self.assertEqual - msg = self._msgobj('msg_10.txt') - # The outer message is a multipart - eq(msg.get_payload(decode=True), None) - # Subpart 1 is 7bit encoded - eq(msg.get_payload(0).get_payload(decode=True), - b'This is a 7bit encoded message.\n') - # Subpart 2 is quopri - eq(msg.get_payload(1).get_payload(decode=True), - b'\xa1This is a Quoted Printable encoded message!\n') - # Subpart 3 is base64 - eq(msg.get_payload(2).get_payload(decode=True), - b'This is a Base64 encoded message.') - # Subpart 4 is base64 with a trailing newline, which - # used to be stripped (issue 7143). - eq(msg.get_payload(3).get_payload(decode=True), - b'This is a Base64 encoded message.\n') - # Subpart 5 has no Content-Transfer-Encoding: header. - eq(msg.get_payload(4).get_payload(decode=True), - b'This has no Content-Transfer-Encoding: header.\n') - - def test_get_decoded_uu_payload(self): - eq = self.assertEqual - msg = Message() - msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n') - for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): - msg['content-transfer-encoding'] = cte - eq(msg.get_payload(decode=True), b'hello world') - # Now try some bogus data - msg.set_payload('foo') - eq(msg.get_payload(decode=True), b'foo') - - def test_get_payload_n_raises_on_non_multipart(self): - msg = Message() - self.assertRaises(TypeError, msg.get_payload, 1) - - def test_decoded_generator(self): - eq = self.assertEqual - msg = self._msgobj('msg_07.txt') - with openfile('msg_17.txt') as fp: - text = fp.read() - s = StringIO() - g = DecodedGenerator(s) - g.flatten(msg) - eq(s.getvalue(), text) - - def test__contains__(self): - msg = Message() - msg['From'] = 'Me' - msg['to'] = 'You' - # Check for case insensitivity - self.assertTrue('from' in msg) - self.assertTrue('From' in msg) - self.assertTrue('FROM' in msg) - self.assertTrue('to' in msg) - self.assertTrue('To' in msg) - self.assertTrue('TO' in msg) - - def test_as_string(self): - eq = self.ndiffAssertEqual - msg = self._msgobj('msg_01.txt') - with openfile('msg_01.txt') as fp: - text = fp.read() - eq(text, str(msg)) - fullrepr = msg.as_string(unixfrom=True) - lines = fullrepr.split('\n') - self.assertTrue(lines[0].startswith('From ')) - eq(text, NL.join(lines[1:])) - - def test_bad_param(self): - msg = email.message_from_string("Content-Type: blarg; baz; boo\n") - self.assertEqual(msg.get_param('baz'), '') - - def test_missing_filename(self): - msg = email.message_from_string("From: foo\n") - self.assertEqual(msg.get_filename(), None) - - def test_bogus_filename(self): - msg = email.message_from_string( - "Content-Disposition: blarg; filename\n") - self.assertEqual(msg.get_filename(), '') - - def test_missing_boundary(self): - msg = email.message_from_string("From: foo\n") - self.assertEqual(msg.get_boundary(), None) - - def test_get_params(self): - eq = self.assertEqual - msg = email.message_from_string( - 'X-Header: foo=one; bar=two; baz=three\n') - eq(msg.get_params(header='x-header'), - [('foo', 'one'), ('bar', 'two'), ('baz', 'three')]) - msg = email.message_from_string( - 'X-Header: foo; bar=one; baz=two\n') - eq(msg.get_params(header='x-header'), - [('foo', ''), ('bar', 'one'), ('baz', 'two')]) - eq(msg.get_params(), None) - msg = email.message_from_string( - 'X-Header: foo; bar="one"; baz=two\n') - eq(msg.get_params(header='x-header'), - [('foo', ''), ('bar', 'one'), ('baz', 'two')]) - - def test_get_param_liberal(self): - msg = Message() - msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"' - self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG') - - def test_get_param(self): - eq = self.assertEqual - msg = email.message_from_string( - "X-Header: foo=one; bar=two; baz=three\n") - eq(msg.get_param('bar', header='x-header'), 'two') - eq(msg.get_param('quuz', header='x-header'), None) - eq(msg.get_param('quuz'), None) - msg = email.message_from_string( - 'X-Header: foo; bar="one"; baz=two\n') - eq(msg.get_param('foo', header='x-header'), '') - eq(msg.get_param('bar', header='x-header'), 'one') - eq(msg.get_param('baz', header='x-header'), 'two') - # XXX: We are not RFC-2045 compliant! We cannot parse: - # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"' - # msg.get_param("weird") - # yet. - - def test_get_param_funky_continuation_lines(self): - msg = self._msgobj('msg_22.txt') - self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG') - - def test_get_param_with_semis_in_quotes(self): - msg = email.message_from_string( - 'Content-Type: image/pjpeg; name="Jim&&Jill"\n') - self.assertEqual(msg.get_param('name'), 'Jim&&Jill') - self.assertEqual(msg.get_param('name', unquote=False), - '"Jim&&Jill"') - - def test_get_param_with_quotes(self): - msg = email.message_from_string( - 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"') - self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') - msg = email.message_from_string( - "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"") - self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') - - def test_field_containment(self): - unless = self.assertTrue - msg = email.message_from_string('Header: exists') - unless('header' in msg) - unless('Header' in msg) - unless('HEADER' in msg) - self.assertFalse('headerx' in msg) - - def test_set_param(self): - eq = self.assertEqual - msg = Message() - msg.set_param('charset', 'iso-2022-jp') - eq(msg.get_param('charset'), 'iso-2022-jp') - msg.set_param('importance', 'high value') - eq(msg.get_param('importance'), 'high value') - eq(msg.get_param('importance', unquote=False), '"high value"') - eq(msg.get_params(), [('text/plain', ''), - ('charset', 'iso-2022-jp'), - ('importance', 'high value')]) - eq(msg.get_params(unquote=False), [('text/plain', ''), - ('charset', '"iso-2022-jp"'), - ('importance', '"high value"')]) - msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy') - eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx') - - def test_del_param(self): - eq = self.assertEqual - msg = self._msgobj('msg_05.txt') - eq(msg.get_params(), - [('multipart/report', ''), ('report-type', 'delivery-status'), - ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) - old_val = msg.get_param("report-type") - msg.del_param("report-type") - eq(msg.get_params(), - [('multipart/report', ''), - ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) - msg.set_param("report-type", old_val) - eq(msg.get_params(), - [('multipart/report', ''), - ('boundary', 'D1690A7AC1.996856090/mail.example.com'), - ('report-type', old_val)]) - - def test_del_param_on_other_header(self): - msg = Message() - msg.add_header('Content-Disposition', 'attachment', filename='bud.gif') - msg.del_param('filename', 'content-disposition') - self.assertEqual(msg['content-disposition'], 'attachment') - - def test_del_param_on_nonexistent_header(self): - msg = Message() - msg.del_param('filename', 'content-disposition') - - def test_del_nonexistent_param(self): - msg = Message() - msg.add_header('Content-Type', 'text/plain', charset='utf-8') - existing_header = msg['Content-Type'] - msg.del_param('foobar', header='Content-Type') - self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"') - - def test_set_type(self): - eq = self.assertEqual - msg = Message() - self.assertRaises(ValueError, msg.set_type, 'text') - msg.set_type('text/plain') - eq(msg['content-type'], 'text/plain') - msg.set_param('charset', 'us-ascii') - eq(msg['content-type'], 'text/plain; charset="us-ascii"') - msg.set_type('text/html') - eq(msg['content-type'], 'text/html; charset="us-ascii"') - - def test_set_type_on_other_header(self): - msg = Message() - msg['X-Content-Type'] = 'text/plain' - msg.set_type('application/octet-stream', 'X-Content-Type') - self.assertEqual(msg['x-content-type'], 'application/octet-stream') - - def test_get_content_type_missing(self): - msg = Message() - self.assertEqual(msg.get_content_type(), 'text/plain') - - def test_get_content_type_missing_with_default_type(self): - msg = Message() - msg.set_default_type('message/rfc822') - self.assertEqual(msg.get_content_type(), 'message/rfc822') - - def test_get_content_type_from_message_implicit(self): - msg = self._msgobj('msg_30.txt') - self.assertEqual(msg.get_payload(0).get_content_type(), - 'message/rfc822') - - def test_get_content_type_from_message_explicit(self): - msg = self._msgobj('msg_28.txt') - self.assertEqual(msg.get_payload(0).get_content_type(), - 'message/rfc822') - - def test_get_content_type_from_message_text_plain_implicit(self): - msg = self._msgobj('msg_03.txt') - self.assertEqual(msg.get_content_type(), 'text/plain') - - def test_get_content_type_from_message_text_plain_explicit(self): - msg = self._msgobj('msg_01.txt') - self.assertEqual(msg.get_content_type(), 'text/plain') - - def test_get_content_maintype_missing(self): - msg = Message() - self.assertEqual(msg.get_content_maintype(), 'text') - - def test_get_content_maintype_missing_with_default_type(self): - msg = Message() - msg.set_default_type('message/rfc822') - self.assertEqual(msg.get_content_maintype(), 'message') - - def test_get_content_maintype_from_message_implicit(self): - msg = self._msgobj('msg_30.txt') - self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') - - def test_get_content_maintype_from_message_explicit(self): - msg = self._msgobj('msg_28.txt') - self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') - - def test_get_content_maintype_from_message_text_plain_implicit(self): - msg = self._msgobj('msg_03.txt') - self.assertEqual(msg.get_content_maintype(), 'text') - - def test_get_content_maintype_from_message_text_plain_explicit(self): - msg = self._msgobj('msg_01.txt') - self.assertEqual(msg.get_content_maintype(), 'text') - - def test_get_content_subtype_missing(self): - msg = Message() - self.assertEqual(msg.get_content_subtype(), 'plain') - - def test_get_content_subtype_missing_with_default_type(self): - msg = Message() - msg.set_default_type('message/rfc822') - self.assertEqual(msg.get_content_subtype(), 'rfc822') - - def test_get_content_subtype_from_message_implicit(self): - msg = self._msgobj('msg_30.txt') - self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') - - def test_get_content_subtype_from_message_explicit(self): - msg = self._msgobj('msg_28.txt') - self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') - - def test_get_content_subtype_from_message_text_plain_implicit(self): - msg = self._msgobj('msg_03.txt') - self.assertEqual(msg.get_content_subtype(), 'plain') - - def test_get_content_subtype_from_message_text_plain_explicit(self): - msg = self._msgobj('msg_01.txt') - self.assertEqual(msg.get_content_subtype(), 'plain') - - def test_get_content_maintype_error(self): - msg = Message() - msg['Content-Type'] = 'no-slash-in-this-string' - self.assertEqual(msg.get_content_maintype(), 'text') - - def test_get_content_subtype_error(self): - msg = Message() - msg['Content-Type'] = 'no-slash-in-this-string' - self.assertEqual(msg.get_content_subtype(), 'plain') - - def test_replace_header(self): - eq = self.assertEqual - msg = Message() - msg.add_header('First', 'One') - msg.add_header('Second', 'Two') - msg.add_header('Third', 'Three') - eq(msg.keys(), ['First', 'Second', 'Third']) - eq(msg.values(), ['One', 'Two', 'Three']) - msg.replace_header('Second', 'Twenty') - eq(msg.keys(), ['First', 'Second', 'Third']) - eq(msg.values(), ['One', 'Twenty', 'Three']) - msg.add_header('First', 'Eleven') - msg.replace_header('First', 'One Hundred') - eq(msg.keys(), ['First', 'Second', 'Third', 'First']) - eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven']) - self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing') - - def test_broken_base64_payload(self): - x = 'AwDp0P7//y6LwKEAcPa/6Q=9' - msg = Message() - msg['content-type'] = 'audio/x-midi' - msg['content-transfer-encoding'] = 'base64' - msg.set_payload(x) - self.assertEqual(msg.get_payload(decode=True), - bytes(x, 'raw-unicode-escape')) - - def test_broken_unicode_payload(self): - # This test improves coverage but is not a compliance test. - # The behavior in this situation is currently undefined by the API. - x = 'this is a br\xf6ken thing to do' - msg = Message() - msg['content-type'] = 'text/plain' - msg['content-transfer-encoding'] = '8bit' - msg.set_payload(x) - self.assertEqual(msg.get_payload(decode=True), - bytes(x, 'raw-unicode-escape')) - - def test_questionable_bytes_payload(self): - # This test improves coverage but is not a compliance test, - # since it involves poking inside the black box. - x = 'this is a quéstionable thing to do'.encode('utf-8') - msg = Message() - msg['content-type'] = 'text/plain; charset="utf-8"' - msg['content-transfer-encoding'] = '8bit' - msg._payload = x - self.assertEqual(msg.get_payload(decode=True), x) - - # Issue 1078919 - def test_ascii_add_header(self): - msg = Message() - msg.add_header('Content-Disposition', 'attachment', - filename='bud.gif') - self.assertEqual('attachment; filename="bud.gif"', - msg['Content-Disposition']) - - def test_noascii_add_header(self): - msg = Message() - msg.add_header('Content-Disposition', 'attachment', - filename="Fußballer.ppt") - self.assertEqual( - 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt', - msg['Content-Disposition']) - - def test_nonascii_add_header_via_triple(self): - msg = Message() - msg.add_header('Content-Disposition', 'attachment', - filename=('iso-8859-1', '', 'Fußballer.ppt')) - self.assertEqual( - 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt', - msg['Content-Disposition']) - - def test_ascii_add_header_with_tspecial(self): - msg = Message() - msg.add_header('Content-Disposition', 'attachment', - filename="windows [filename].ppt") - self.assertEqual( - 'attachment; filename="windows [filename].ppt"', - msg['Content-Disposition']) - - def test_nonascii_add_header_with_tspecial(self): - msg = Message() - msg.add_header('Content-Disposition', 'attachment', - filename="Fußballer [filename].ppt") - self.assertEqual( - "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt", - msg['Content-Disposition']) - - def test_add_header_with_name_only_param(self): - msg = Message() - msg.add_header('Content-Disposition', 'inline', foo_bar=None) - self.assertEqual("inline; foo-bar", msg['Content-Disposition']) - - def test_add_header_with_no_value(self): - msg = Message() - msg.add_header('X-Status', None) - self.assertEqual('', msg['X-Status']) - - # Issue 5871: reject an attempt to embed a header inside a header value - # (header injection attack). - def test_embeded_header_via_Header_rejected(self): - msg = Message() - msg['Dummy'] = Header('dummy\nX-Injected-Header: test') - self.assertRaises(errors.HeaderParseError, msg.as_string) - - def test_embeded_header_via_string_rejected(self): - msg = Message() - msg['Dummy'] = 'dummy\nX-Injected-Header: test' - self.assertRaises(errors.HeaderParseError, msg.as_string) - - def test_unicode_header_defaults_to_utf8_encoding(self): - # Issue 14291 - m = MIMEText('abc\n') - m['Subject'] = 'É test' - self.assertEqual(str(m),textwrap.dedent("""\ - Content-Type: text/plain; charset="us-ascii" - MIME-Version: 1.0 - Content-Transfer-Encoding: 7bit - Subject: =?utf-8?q?=C3=89_test?= - - abc - """)) - -# Test the email.encoders module -class TestEncoders(unittest.TestCase): - - def test_EncodersEncode_base64(self): - with openfile('PyBanner048.gif', 'rb') as fp: - bindata = fp.read() - mimed = email.mime.image.MIMEImage(bindata) - base64ed = mimed.get_payload() - # the transfer-encoded body lines should all be <=76 characters - lines = base64ed.split('\n') - self.assertLessEqual(max([ len(x) for x in lines ]), 76) - - def test_encode_empty_payload(self): - eq = self.assertEqual - msg = Message() - msg.set_charset('us-ascii') - eq(msg['content-transfer-encoding'], '7bit') - - def test_default_cte(self): - eq = self.assertEqual - # 7bit data and the default us-ascii _charset - msg = MIMEText('hello world') - eq(msg['content-transfer-encoding'], '7bit') - # Similar, but with 8bit data - msg = MIMEText('hello \xf8 world') - eq(msg['content-transfer-encoding'], '8bit') - # And now with a different charset - msg = MIMEText('hello \xf8 world', _charset='iso-8859-1') - eq(msg['content-transfer-encoding'], 'quoted-printable') - - def test_encode7or8bit(self): - # Make sure a charset whose input character set is 8bit but - # whose output character set is 7bit gets a transfer-encoding - # of 7bit. - eq = self.assertEqual - msg = MIMEText('文', _charset='euc-jp') - eq(msg['content-transfer-encoding'], '7bit') - - -# Test long header wrapping -class TestLongHeaders(TestEmailBase): - - maxDiff = None - - def test_split_long_continuation(self): - eq = self.ndiffAssertEqual - msg = email.message_from_string("""\ -Subject: bug demonstration -\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 -\tmore text - -test -""") - sfp = StringIO() - g = Generator(sfp) - g.flatten(msg) - eq(sfp.getvalue(), """\ -Subject: bug demonstration -\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 -\tmore text - -test -""") - - def test_another_long_almost_unsplittable_header(self): - eq = self.ndiffAssertEqual - hstr = """\ -bug demonstration -\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 -\tmore text""" - h = Header(hstr, continuation_ws='\t') - eq(h.encode(), """\ -bug demonstration -\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 -\tmore text""") - h = Header(hstr.replace('\t', ' ')) - eq(h.encode(), """\ -bug demonstration - 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 - more text""") - - def test_long_nonstring(self): - eq = self.ndiffAssertEqual - g = Charset("iso-8859-1") - cz = Charset("iso-8859-2") - utf8 = Charset("utf-8") - g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband ' - b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' - b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' - b'bef\xf6rdert. ') - cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' - b'd\xf9vtipu.. ') - utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' - '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' - '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' - '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' - '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' - 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' - 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' - '\u3044\u307e\u3059\u3002') - h = Header(g_head, g, header_name='Subject') - h.append(cz_head, cz) - h.append(utf8_head, utf8) - msg = Message() - msg['Subject'] = h - sfp = StringIO() - g = Generator(sfp) - g.flatten(msg) - eq(sfp.getvalue(), """\ -Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?= - =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?= - =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?= - =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?= - =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= - =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?= - =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?= - =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?= - =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?= - =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?= - =?utf-8?b?44CC?= - -""") - eq(h.encode(maxlinelen=76), """\ -=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?= - =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?= - =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?= - =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?= - =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= - =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?= - =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?= - =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?= - =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?= - =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?= - =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""") - - def test_long_header_encode(self): - eq = self.ndiffAssertEqual - h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' - 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', - header_name='X-Foobar-Spoink-Defrobnit') - eq(h.encode(), '''\ -wasnipoop; giraffes="very-long-necked-animals"; - spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') - - def test_long_header_encode_with_tab_continuation_is_just_a_hint(self): - eq = self.ndiffAssertEqual - h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' - 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', - header_name='X-Foobar-Spoink-Defrobnit', - continuation_ws='\t') - eq(h.encode(), '''\ -wasnipoop; giraffes="very-long-necked-animals"; - spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') - - def test_long_header_encode_with_tab_continuation(self): - eq = self.ndiffAssertEqual - h = Header('wasnipoop; giraffes="very-long-necked-animals";\t' - 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', - header_name='X-Foobar-Spoink-Defrobnit', - continuation_ws='\t') - eq(h.encode(), '''\ -wasnipoop; giraffes="very-long-necked-animals"; -\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') - - def test_header_encode_with_different_output_charset(self): - h = Header('文', 'euc-jp') - self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=") - - def test_long_header_encode_with_different_output_charset(self): - h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4' - b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4' - b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4' - b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp') - res = """\ -=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?= - =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=""" - self.assertEqual(h.encode(), res) - - def test_header_splitter(self): - eq = self.ndiffAssertEqual - msg = MIMEText('') - # It'd be great if we could use add_header() here, but that doesn't - # guarantee an order of the parameters. - msg['X-Foobar-Spoink-Defrobnit'] = ( - 'wasnipoop; giraffes="very-long-necked-animals"; ' - 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"') - sfp = StringIO() - g = Generator(sfp) - g.flatten(msg) - eq(sfp.getvalue(), '''\ -Content-Type: text/plain; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals"; - spooge="yummy"; hippos="gargantuan"; marshmallows="gooey" - -''') - - def test_no_semis_header_splitter(self): - eq = self.ndiffAssertEqual - msg = Message() - msg['From'] = 'test@dom.ain' - msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10)) - msg.set_payload('Test') - sfp = StringIO() - g = Generator(sfp) - g.flatten(msg) - eq(sfp.getvalue(), """\ -From: test@dom.ain -References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain> - <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain> - -Test""") - - def test_last_split_chunk_does_not_fit(self): - eq = self.ndiffAssertEqual - h = Header('Subject: the first part of this is short, but_the_second' - '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' - '_all_by_itself') - eq(h.encode(), """\ -Subject: the first part of this is short, - but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") - - def test_splittable_leading_char_followed_by_overlong_unsplitable(self): - eq = self.ndiffAssertEqual - h = Header(', but_the_second' - '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' - '_all_by_itself') - eq(h.encode(), """\ -, - but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") - - def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self): - eq = self.ndiffAssertEqual - h = Header(', , but_the_second' - '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' - '_all_by_itself') - eq(h.encode(), """\ -, , - but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") - - def test_trailing_splitable_on_overlong_unsplitable(self): - eq = self.ndiffAssertEqual - h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' - 'be_on_a_line_all_by_itself;') - eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_" - "be_on_a_line_all_by_itself;") - - def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self): - eq = self.ndiffAssertEqual - h = Header('; ' - 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' - 'be_on_a_line_all_by_itself; ') - eq(h.encode(), """\ -; - this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) - - def test_long_header_with_multiple_sequential_split_chars(self): - eq = self.ndiffAssertEqual - h = Header('This is a long line that has two whitespaces in a row. ' - 'This used to cause truncation of the header when folded') - eq(h.encode(), """\ -This is a long line that has two whitespaces in a row. This used to cause - truncation of the header when folded""") - - def test_splitter_split_on_punctuation_only_if_fws(self): - eq = self.ndiffAssertEqual - h = Header('thisverylongheaderhas;semicolons;and,commas,but' - 'they;arenotlegal;fold,points') - eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;" - "arenotlegal;fold,points") - - def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self): - eq = self.ndiffAssertEqual - h = Header('this is a test where we need to have more than one line ' - 'before; our final line that is just too big to fit;; ' - 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' - 'be_on_a_line_all_by_itself;') - eq(h.encode(), """\ -this is a test where we need to have more than one line before; - our final line that is just too big to fit;; - this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""") - - def test_overlong_last_part_followed_by_split_point(self): - eq = self.ndiffAssertEqual - h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' - 'be_on_a_line_all_by_itself ') - eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_" - "should_be_on_a_line_all_by_itself ") - - def test_multiline_with_overlong_parts_separated_by_two_split_points(self): - eq = self.ndiffAssertEqual - h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_' - 'before_our_final_line_; ; ' - 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' - 'be_on_a_line_all_by_itself; ') - eq(h.encode(), """\ -this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_; - ; - this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) - - def test_multiline_with_overlong_last_part_followed_by_split_point(self): - eq = self.ndiffAssertEqual - h = Header('this is a test where we need to have more than one line ' - 'before our final line; ; ' - 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' - 'be_on_a_line_all_by_itself; ') - eq(h.encode(), """\ -this is a test where we need to have more than one line before our final line; - ; - this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) - - def test_long_header_with_whitespace_runs(self): - eq = self.ndiffAssertEqual - msg = Message() - msg['From'] = 'test@dom.ain' - msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10) - msg.set_payload('Test') - sfp = StringIO() - g = Generator(sfp) - g.flatten(msg) - eq(sfp.getvalue(), """\ -From: test@dom.ain -References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> - <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> - <foo@dom.ain> <foo@dom.ain>\x20\x20 - -Test""") - - def test_long_run_with_semi_header_splitter(self): - eq = self.ndiffAssertEqual - msg = Message() - msg['From'] = 'test@dom.ain' - msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc' - msg.set_payload('Test') - sfp = StringIO() - g = Generator(sfp) - g.flatten(msg) - eq(sfp.getvalue(), """\ -From: test@dom.ain -References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> - <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> - <foo@dom.ain>; abc - -Test""") - - def test_splitter_split_on_punctuation_only_if_fws(self): - eq = self.ndiffAssertEqual - msg = Message() - msg['From'] = 'test@dom.ain' - msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but' - 'they;arenotlegal;fold,points') - msg.set_payload('Test') - sfp = StringIO() - g = Generator(sfp) - g.flatten(msg) - # XXX the space after the header should not be there. - eq(sfp.getvalue(), """\ -From: test@dom.ain -References:\x20 - thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points - -Test""") - - def test_no_split_long_header(self): - eq = self.ndiffAssertEqual - hstr = 'References: ' + 'x' * 80 - h = Header(hstr) - # These come on two lines because Headers are really field value - # classes and don't really know about their field names. - eq(h.encode(), """\ -References: - xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""") - h = Header('x' * 80) - eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') - - def test_splitting_multiple_long_lines(self): - eq = self.ndiffAssertEqual - hstr = """\ -from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) -\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) -\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) -""" - h = Header(hstr, continuation_ws='\t') - eq(h.encode(), """\ -from babylon.socal-raves.org (localhost [127.0.0.1]); - by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; - for <mailman-admin@babylon.socal-raves.org>; - Sat, 2 Feb 2002 17:00:06 -0800 (PST) -\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); - by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; - for <mailman-admin@babylon.socal-raves.org>; - Sat, 2 Feb 2002 17:00:06 -0800 (PST) -\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); - by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; - for <mailman-admin@babylon.socal-raves.org>; - Sat, 2 Feb 2002 17:00:06 -0800 (PST)""") - - def test_splitting_first_line_only_is_long(self): - eq = self.ndiffAssertEqual - hstr = """\ -from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca) -\tby kronos.mems-exchange.org with esmtp (Exim 4.05) -\tid 17k4h5-00034i-00 -\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""" - h = Header(hstr, maxlinelen=78, header_name='Received', - continuation_ws='\t') - eq(h.encode(), """\ -from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] - helo=cthulhu.gerg.ca) -\tby kronos.mems-exchange.org with esmtp (Exim 4.05) -\tid 17k4h5-00034i-00 -\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""") - - def test_long_8bit_header(self): - eq = self.ndiffAssertEqual - msg = Message() - h = Header('Britische Regierung gibt', 'iso-8859-1', - header_name='Subject') - h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte') - eq(h.encode(maxlinelen=76), """\ -=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= - =?iso-8859-1?q?hore-Windkraftprojekte?=""") - msg['Subject'] = h - eq(msg.as_string(maxheaderlen=76), """\ -Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= - =?iso-8859-1?q?hore-Windkraftprojekte?= - -""") - eq(msg.as_string(maxheaderlen=0), """\ -Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?= - -""") - - def test_long_8bit_header_no_charset(self): - eq = self.ndiffAssertEqual - msg = Message() - header_string = ('Britische Regierung gibt gr\xfcnes Licht ' - 'f\xfcr Offshore-Windkraftprojekte ' - '<a-very-long-address@example.com>') - msg['Reply-To'] = header_string - eq(msg.as_string(maxheaderlen=78), """\ -Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= - =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= - -""") - msg = Message() - msg['Reply-To'] = Header(header_string, - header_name='Reply-To') - eq(msg.as_string(maxheaderlen=78), """\ -Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= - =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= - -""") - - def test_long_to_header(self): - eq = self.ndiffAssertEqual - to = ('"Someone Test #A" <someone@eecs.umich.edu>,' - '<someone@eecs.umich.edu>, ' - '"Someone Test #B" <someone@umich.edu>, ' - '"Someone Test #C" <someone@eecs.umich.edu>, ' - '"Someone Test #D" <someone@eecs.umich.edu>') - msg = Message() - msg['To'] = to - eq(msg.as_string(maxheaderlen=78), '''\ -To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>, - "Someone Test #B" <someone@umich.edu>, - "Someone Test #C" <someone@eecs.umich.edu>, - "Someone Test #D" <someone@eecs.umich.edu> - -''') - - def test_long_line_after_append(self): - eq = self.ndiffAssertEqual - s = 'This is an example of string which has almost the limit of header length.' - h = Header(s) - h.append('Add another line.') - eq(h.encode(maxlinelen=76), """\ -This is an example of string which has almost the limit of header length. - Add another line.""") - - def test_shorter_line_with_append(self): - eq = self.ndiffAssertEqual - s = 'This is a shorter line.' - h = Header(s) - h.append('Add another sentence. (Surprise?)') - eq(h.encode(), - 'This is a shorter line. Add another sentence. (Surprise?)') - - def test_long_field_name(self): - eq = self.ndiffAssertEqual - fn = 'X-Very-Very-Very-Long-Header-Name' - gs = ('Die Mieter treten hier ein werden mit einem Foerderband ' - 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' - 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' - 'bef\xf6rdert. ') - h = Header(gs, 'iso-8859-1', header_name=fn) - # BAW: this seems broken because the first line is too long - eq(h.encode(maxlinelen=76), """\ -=?iso-8859-1?q?Die_Mieter_treten_hier_e?= - =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?= - =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?= - =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""") - - def test_long_received_header(self): - h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) ' - 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; ' - 'Wed, 05 Mar 2003 18:10:18 -0700') - msg = Message() - msg['Received-1'] = Header(h, continuation_ws='\t') - msg['Received-2'] = h - # This should be splitting on spaces not semicolons. - self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ -Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by - hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; - Wed, 05 Mar 2003 18:10:18 -0700 -Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by - hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; - Wed, 05 Mar 2003 18:10:18 -0700 - -""") - - def test_string_headerinst_eq(self): - h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.' - 'tu-muenchen.de> (David Bremner\'s message of ' - '"Thu, 6 Mar 2003 13:58:21 +0100")') - msg = Message() - msg['Received-1'] = Header(h, header_name='Received-1', - continuation_ws='\t') - msg['Received-2'] = h - # XXX The space after the ':' should not be there. - self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ -Received-1:\x20 - <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David - Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") -Received-2:\x20 - <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David - Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") - -""") - - def test_long_unbreakable_lines_with_continuation(self): - eq = self.ndiffAssertEqual - msg = Message() - t = """\ -iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 - locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp""" - msg['Face-1'] = t - msg['Face-2'] = Header(t, header_name='Face-2') - msg['Face-3'] = ' ' + t - # XXX This splitting is all wrong. It the first value line should be - # snug against the field name or the space after the header not there. - eq(msg.as_string(maxheaderlen=78), """\ -Face-1:\x20 - iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 - locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp -Face-2:\x20 - iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 - locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp -Face-3:\x20 - iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 - locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp - -""") - - def test_another_long_multiline_header(self): - eq = self.ndiffAssertEqual - m = ('Received: from siimage.com ' - '([172.25.1.3]) by zima.siliconimage.com with ' - 'Microsoft SMTPSVC(5.0.2195.4905); ' - 'Wed, 16 Oct 2002 07:41:11 -0700') - msg = email.message_from_string(m) - eq(msg.as_string(maxheaderlen=78), '''\ -Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with - Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700 - -''') - - def test_long_lines_with_different_header(self): - eq = self.ndiffAssertEqual - h = ('List-Unsubscribe: ' - '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,' - ' <mailto:spamassassin-talk-request@lists.sourceforge.net' - '?subject=unsubscribe>') - msg = Message() - msg['List'] = h - msg['List'] = Header(h, header_name='List') - eq(msg.as_string(maxheaderlen=78), """\ -List: List-Unsubscribe: - <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, - <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe> -List: List-Unsubscribe: - <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, - <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe> - -""") - - def test_long_rfc2047_header_with_embedded_fws(self): - h = Header(textwrap.dedent("""\ - We're going to pretend this header is in a non-ascii character set - \tto see if line wrapping with encoded words and embedded - folding white space works"""), - charset='utf-8', - header_name='Test') - self.assertEqual(h.encode()+'\n', textwrap.dedent("""\ - =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?= - =?utf-8?q?cter_set?= - =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?= - =?utf-8?q?_folding_white_space_works?=""")+'\n') - - -# Test mangling of "From " lines in the body of a message -class TestFromMangling(unittest.TestCase): - def setUp(self): - self.msg = Message() - self.msg['From'] = 'aaa@bbb.org' - self.msg.set_payload("""\ -From the desk of A.A.A.: -Blah blah blah -""") - - def test_mangled_from(self): - s = StringIO() - g = Generator(s, mangle_from_=True) - g.flatten(self.msg) - self.assertEqual(s.getvalue(), """\ -From: aaa@bbb.org - ->From the desk of A.A.A.: -Blah blah blah -""") - - def test_dont_mangle_from(self): - s = StringIO() - g = Generator(s, mangle_from_=False) - g.flatten(self.msg) - self.assertEqual(s.getvalue(), """\ -From: aaa@bbb.org - -From the desk of A.A.A.: -Blah blah blah -""") - - def test_mangle_from_in_preamble_and_epilog(self): - s = StringIO() - g = Generator(s, mangle_from_=True) - msg = email.message_from_string(textwrap.dedent("""\ - From: foo@bar.com - Mime-Version: 1.0 - Content-Type: multipart/mixed; boundary=XXX - - From somewhere unknown - - --XXX - Content-Type: text/plain - - foo - - --XXX-- - - From somewhere unknowable - """)) - g.flatten(msg) - self.assertEqual(len([1 for x in s.getvalue().split('\n') - if x.startswith('>From ')]), 2) - - def test_mangled_from_with_bad_bytes(self): - source = textwrap.dedent("""\ - Content-Type: text/plain; charset="utf-8" - MIME-Version: 1.0 - Content-Transfer-Encoding: 8bit - From: aaa@bbb.org - - """).encode('utf-8') - msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n') - b = BytesIO() - g = BytesGenerator(b, mangle_from_=True) - g.flatten(msg) - self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n') - - -# Test the basic MIMEAudio class -class TestMIMEAudio(unittest.TestCase): - def setUp(self): - # Make sure we pick up the audiotest.au that lives in email/test/data. - # In Python, there's an audiotest.au living in Lib/test but that isn't - # included in some binary distros that don't include the test - # package. The trailing empty string on the .join() is significant - # since findfile() will do a dirname(). - datadir = os.path.join(os.path.dirname(landmark), 'data', '') - with open(findfile('audiotest.au', datadir), 'rb') as fp: - self._audiodata = fp.read() - self._au = MIMEAudio(self._audiodata) - - def test_guess_minor_type(self): - self.assertEqual(self._au.get_content_type(), 'audio/basic') - - def test_encoding(self): - payload = self._au.get_payload() - self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), - self._audiodata) - - def test_checkSetMinor(self): - au = MIMEAudio(self._audiodata, 'fish') - self.assertEqual(au.get_content_type(), 'audio/fish') - - def test_add_header(self): - eq = self.assertEqual - unless = self.assertTrue - self._au.add_header('Content-Disposition', 'attachment', - filename='audiotest.au') - eq(self._au['content-disposition'], - 'attachment; filename="audiotest.au"') - eq(self._au.get_params(header='content-disposition'), - [('attachment', ''), ('filename', 'audiotest.au')]) - eq(self._au.get_param('filename', header='content-disposition'), - 'audiotest.au') - missing = [] - eq(self._au.get_param('attachment', header='content-disposition'), '') - unless(self._au.get_param('foo', failobj=missing, - header='content-disposition') is missing) - # Try some missing stuff - unless(self._au.get_param('foobar', missing) is missing) - unless(self._au.get_param('attachment', missing, - header='foobar') is missing) - - - -# Test the basic MIMEImage class -class TestMIMEImage(unittest.TestCase): - def setUp(self): - with openfile('PyBanner048.gif', 'rb') as fp: - self._imgdata = fp.read() - self._im = MIMEImage(self._imgdata) - - def test_guess_minor_type(self): - self.assertEqual(self._im.get_content_type(), 'image/gif') - - def test_encoding(self): - payload = self._im.get_payload() - self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), - self._imgdata) - - def test_checkSetMinor(self): - im = MIMEImage(self._imgdata, 'fish') - self.assertEqual(im.get_content_type(), 'image/fish') - - def test_add_header(self): - eq = self.assertEqual - unless = self.assertTrue - self._im.add_header('Content-Disposition', 'attachment', - filename='dingusfish.gif') - eq(self._im['content-disposition'], - 'attachment; filename="dingusfish.gif"') - eq(self._im.get_params(header='content-disposition'), - [('attachment', ''), ('filename', 'dingusfish.gif')]) - eq(self._im.get_param('filename', header='content-disposition'), - 'dingusfish.gif') - missing = [] - eq(self._im.get_param('attachment', header='content-disposition'), '') - unless(self._im.get_param('foo', failobj=missing, - header='content-disposition') is missing) - # Try some missing stuff - unless(self._im.get_param('foobar', missing) is missing) - unless(self._im.get_param('attachment', missing, - header='foobar') is missing) - - - -# Test the basic MIMEApplication class -class TestMIMEApplication(unittest.TestCase): - def test_headers(self): - eq = self.assertEqual - msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff') - eq(msg.get_content_type(), 'application/octet-stream') - eq(msg['content-transfer-encoding'], 'base64') - - def test_body(self): - eq = self.assertEqual - bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' - msg = MIMEApplication(bytesdata) - # whitespace in the cte encoded block is RFC-irrelevant. - eq(msg.get_payload().strip(), '+vv8/f7/') - eq(msg.get_payload(decode=True), bytesdata) - - - -# Test the basic MIMEText class -class TestMIMEText(unittest.TestCase): - def setUp(self): - self._msg = MIMEText('hello there') - - def test_types(self): - eq = self.assertEqual - unless = self.assertTrue - eq(self._msg.get_content_type(), 'text/plain') - eq(self._msg.get_param('charset'), 'us-ascii') - missing = [] - unless(self._msg.get_param('foobar', missing) is missing) - unless(self._msg.get_param('charset', missing, header='foobar') - is missing) - - def test_payload(self): - self.assertEqual(self._msg.get_payload(), 'hello there') - self.assertTrue(not self._msg.is_multipart()) - - def test_charset(self): - eq = self.assertEqual - msg = MIMEText('hello there', _charset='us-ascii') - eq(msg.get_charset().input_charset, 'us-ascii') - eq(msg['content-type'], 'text/plain; charset="us-ascii"') - - def test_7bit_input(self): - eq = self.assertEqual - msg = MIMEText('hello there', _charset='us-ascii') - eq(msg.get_charset().input_charset, 'us-ascii') - eq(msg['content-type'], 'text/plain; charset="us-ascii"') - - def test_7bit_input_no_charset(self): - eq = self.assertEqual - msg = MIMEText('hello there') - eq(msg.get_charset(), 'us-ascii') - eq(msg['content-type'], 'text/plain; charset="us-ascii"') - self.assertTrue('hello there' in msg.as_string()) - - def test_utf8_input(self): - teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' - eq = self.assertEqual - msg = MIMEText(teststr, _charset='utf-8') - eq(msg.get_charset().output_charset, 'utf-8') - eq(msg['content-type'], 'text/plain; charset="utf-8"') - eq(msg.get_payload(decode=True), teststr.encode('utf-8')) - - @unittest.skip("can't fix because of backward compat in email5, " - "will fix in email6") - def test_utf8_input_no_charset(self): - teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' - self.assertRaises(UnicodeEncodeError, MIMEText, teststr) - - - -# Test complicated multipart/* messages -class TestMultipart(TestEmailBase): - def setUp(self): - with openfile('PyBanner048.gif', 'rb') as fp: - data = fp.read() - container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY') - image = MIMEImage(data, name='dingusfish.gif') - image.add_header('content-disposition', 'attachment', - filename='dingusfish.gif') - intro = MIMEText('''\ -Hi there, - -This is the dingus fish. -''') - container.attach(intro) - container.attach(image) - container['From'] = 'Barry <barry@digicool.com>' - container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>' - container['Subject'] = 'Here is your dingus fish' - - now = 987809702.54848599 - timetuple = time.localtime(now) - if timetuple[-1] == 0: - tzsecs = time.timezone - else: - tzsecs = time.altzone - if tzsecs > 0: - sign = '-' - else: - sign = '+' - tzoffset = ' %s%04d' % (sign, tzsecs / 36) - container['Date'] = time.strftime( - '%a, %d %b %Y %H:%M:%S', - time.localtime(now)) + tzoffset - self._msg = container - self._im = image - self._txt = intro - - def test_hierarchy(self): - # convenience - eq = self.assertEqual - unless = self.assertTrue - raises = self.assertRaises - # tests - m = self._msg - unless(m.is_multipart()) - eq(m.get_content_type(), 'multipart/mixed') - eq(len(m.get_payload()), 2) - raises(IndexError, m.get_payload, 2) - m0 = m.get_payload(0) - m1 = m.get_payload(1) - unless(m0 is self._txt) - unless(m1 is self._im) - eq(m.get_payload(), [m0, m1]) - unless(not m0.is_multipart()) - unless(not m1.is_multipart()) - - def test_empty_multipart_idempotent(self): - text = """\ -Content-Type: multipart/mixed; boundary="BOUNDARY" -MIME-Version: 1.0 -Subject: A subject -To: aperson@dom.ain -From: bperson@dom.ain - - ---BOUNDARY - - ---BOUNDARY-- -""" - msg = Parser().parsestr(text) - self.ndiffAssertEqual(text, msg.as_string()) - - def test_no_parts_in_a_multipart_with_none_epilogue(self): - outer = MIMEBase('multipart', 'mixed') - outer['Subject'] = 'A subject' - outer['To'] = 'aperson@dom.ain' - outer['From'] = 'bperson@dom.ain' - outer.set_boundary('BOUNDARY') - self.ndiffAssertEqual(outer.as_string(), '''\ -Content-Type: multipart/mixed; boundary="BOUNDARY" -MIME-Version: 1.0 -Subject: A subject -To: aperson@dom.ain -From: bperson@dom.ain - ---BOUNDARY - ---BOUNDARY--''') - - def test_no_parts_in_a_multipart_with_empty_epilogue(self): - outer = MIMEBase('multipart', 'mixed') - outer['Subject'] = 'A subject' - outer['To'] = 'aperson@dom.ain' - outer['From'] = 'bperson@dom.ain' - outer.preamble = '' - outer.epilogue = '' - outer.set_boundary('BOUNDARY') - self.ndiffAssertEqual(outer.as_string(), '''\ -Content-Type: multipart/mixed; boundary="BOUNDARY" -MIME-Version: 1.0 -Subject: A subject -To: aperson@dom.ain -From: bperson@dom.ain - - ---BOUNDARY - ---BOUNDARY-- -''') - - def test_one_part_in_a_multipart(self): - eq = self.ndiffAssertEqual - outer = MIMEBase('multipart', 'mixed') - outer['Subject'] = 'A subject' - outer['To'] = 'aperson@dom.ain' - outer['From'] = 'bperson@dom.ain' - outer.set_boundary('BOUNDARY') - msg = MIMEText('hello world') - outer.attach(msg) - eq(outer.as_string(), '''\ -Content-Type: multipart/mixed; boundary="BOUNDARY" -MIME-Version: 1.0 -Subject: A subject -To: aperson@dom.ain -From: bperson@dom.ain - ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit - -hello world ---BOUNDARY--''') - - def test_seq_parts_in_a_multipart_with_empty_preamble(self): - eq = self.ndiffAssertEqual - outer = MIMEBase('multipart', 'mixed') - outer['Subject'] = 'A subject' - outer['To'] = 'aperson@dom.ain' - outer['From'] = 'bperson@dom.ain' - outer.preamble = '' - msg = MIMEText('hello world') - outer.attach(msg) - outer.set_boundary('BOUNDARY') - eq(outer.as_string(), '''\ -Content-Type: multipart/mixed; boundary="BOUNDARY" -MIME-Version: 1.0 -Subject: A subject -To: aperson@dom.ain -From: bperson@dom.ain - - ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit - -hello world ---BOUNDARY--''') - - - def test_seq_parts_in_a_multipart_with_none_preamble(self): - eq = self.ndiffAssertEqual - outer = MIMEBase('multipart', 'mixed') - outer['Subject'] = 'A subject' - outer['To'] = 'aperson@dom.ain' - outer['From'] = 'bperson@dom.ain' - outer.preamble = None - msg = MIMEText('hello world') - outer.attach(msg) - outer.set_boundary('BOUNDARY') - eq(outer.as_string(), '''\ -Content-Type: multipart/mixed; boundary="BOUNDARY" -MIME-Version: 1.0 -Subject: A subject -To: aperson@dom.ain -From: bperson@dom.ain - ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit - -hello world ---BOUNDARY--''') - - - def test_seq_parts_in_a_multipart_with_none_epilogue(self): - eq = self.ndiffAssertEqual - outer = MIMEBase('multipart', 'mixed') - outer['Subject'] = 'A subject' - outer['To'] = 'aperson@dom.ain' - outer['From'] = 'bperson@dom.ain' - outer.epilogue = None - msg = MIMEText('hello world') - outer.attach(msg) - outer.set_boundary('BOUNDARY') - eq(outer.as_string(), '''\ -Content-Type: multipart/mixed; boundary="BOUNDARY" -MIME-Version: 1.0 -Subject: A subject -To: aperson@dom.ain -From: bperson@dom.ain - ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit - -hello world ---BOUNDARY--''') - - - def test_seq_parts_in_a_multipart_with_empty_epilogue(self): - eq = self.ndiffAssertEqual - outer = MIMEBase('multipart', 'mixed') - outer['Subject'] = 'A subject' - outer['To'] = 'aperson@dom.ain' - outer['From'] = 'bperson@dom.ain' - outer.epilogue = '' - msg = MIMEText('hello world') - outer.attach(msg) - outer.set_boundary('BOUNDARY') - eq(outer.as_string(), '''\ -Content-Type: multipart/mixed; boundary="BOUNDARY" -MIME-Version: 1.0 -Subject: A subject -To: aperson@dom.ain -From: bperson@dom.ain - ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit - -hello world ---BOUNDARY-- -''') - - - def test_seq_parts_in_a_multipart_with_nl_epilogue(self): - eq = self.ndiffAssertEqual - outer = MIMEBase('multipart', 'mixed') - outer['Subject'] = 'A subject' - outer['To'] = 'aperson@dom.ain' - outer['From'] = 'bperson@dom.ain' - outer.epilogue = '\n' - msg = MIMEText('hello world') - outer.attach(msg) - outer.set_boundary('BOUNDARY') - eq(outer.as_string(), '''\ -Content-Type: multipart/mixed; boundary="BOUNDARY" -MIME-Version: 1.0 -Subject: A subject -To: aperson@dom.ain -From: bperson@dom.ain - ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit - -hello world ---BOUNDARY-- - -''') - - def test_message_external_body(self): - eq = self.assertEqual - msg = self._msgobj('msg_36.txt') - eq(len(msg.get_payload()), 2) - msg1 = msg.get_payload(1) - eq(msg1.get_content_type(), 'multipart/alternative') - eq(len(msg1.get_payload()), 2) - for subpart in msg1.get_payload(): - eq(subpart.get_content_type(), 'message/external-body') - eq(len(subpart.get_payload()), 1) - subsubpart = subpart.get_payload(0) - eq(subsubpart.get_content_type(), 'text/plain') - - def test_double_boundary(self): - # msg_37.txt is a multipart that contains two dash-boundary's in a - # row. Our interpretation of RFC 2046 calls for ignoring the second - # and subsequent boundaries. - msg = self._msgobj('msg_37.txt') - self.assertEqual(len(msg.get_payload()), 3) - - def test_nested_inner_contains_outer_boundary(self): - eq = self.ndiffAssertEqual - # msg_38.txt has an inner part that contains outer boundaries. My - # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say - # these are illegal and should be interpreted as unterminated inner - # parts. - msg = self._msgobj('msg_38.txt') - sfp = StringIO() - iterators._structure(msg, sfp) - eq(sfp.getvalue(), """\ -multipart/mixed - multipart/mixed - multipart/alternative - text/plain - text/plain - text/plain - text/plain -""") - - def test_nested_with_same_boundary(self): - eq = self.ndiffAssertEqual - # msg 39.txt is similarly evil in that it's got inner parts that use - # the same boundary as outer parts. Again, I believe the way this is - # parsed is closest to the spirit of RFC 2046 - msg = self._msgobj('msg_39.txt') - sfp = StringIO() - iterators._structure(msg, sfp) - eq(sfp.getvalue(), """\ -multipart/mixed - multipart/mixed - multipart/alternative - application/octet-stream - application/octet-stream - text/plain -""") - - def test_boundary_in_non_multipart(self): - msg = self._msgobj('msg_40.txt') - self.assertEqual(msg.as_string(), '''\ -MIME-Version: 1.0 -Content-Type: text/html; boundary="--961284236552522269" - -----961284236552522269 -Content-Type: text/html; -Content-Transfer-Encoding: 7Bit - -<html></html> - -----961284236552522269-- -''') - - def test_boundary_with_leading_space(self): - eq = self.assertEqual - msg = email.message_from_string('''\ -MIME-Version: 1.0 -Content-Type: multipart/mixed; boundary=" XXXX" - --- XXXX -Content-Type: text/plain - - --- XXXX -Content-Type: text/plain - --- XXXX-- -''') - self.assertTrue(msg.is_multipart()) - eq(msg.get_boundary(), ' XXXX') - eq(len(msg.get_payload()), 2) - - def test_boundary_without_trailing_newline(self): - m = Parser().parsestr("""\ -Content-Type: multipart/mixed; boundary="===============0012394164==" -MIME-Version: 1.0 - ---===============0012394164== -Content-Type: image/file1.jpg -MIME-Version: 1.0 -Content-Transfer-Encoding: base64 - -YXNkZg== ---===============0012394164==--""") - self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==') - - - -# Test some badly formatted messages -class TestNonConformant(TestEmailBase): - def test_parse_missing_minor_type(self): - eq = self.assertEqual - msg = self._msgobj('msg_14.txt') - eq(msg.get_content_type(), 'text/plain') - eq(msg.get_content_maintype(), 'text') - eq(msg.get_content_subtype(), 'plain') - - def test_same_boundary_inner_outer(self): - unless = self.assertTrue - msg = self._msgobj('msg_15.txt') - # XXX We can probably eventually do better - inner = msg.get_payload(0) - unless(hasattr(inner, 'defects')) - self.assertEqual(len(inner.defects), 1) - unless(isinstance(inner.defects[0], - errors.StartBoundaryNotFoundDefect)) - - def test_multipart_no_boundary(self): - unless = self.assertTrue - msg = self._msgobj('msg_25.txt') - unless(isinstance(msg.get_payload(), str)) - self.assertEqual(len(msg.defects), 2) - unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect)) - unless(isinstance(msg.defects[1], - errors.MultipartInvariantViolationDefect)) - - def test_invalid_content_type(self): - eq = self.assertEqual - neq = self.ndiffAssertEqual - msg = Message() - # RFC 2045, $5.2 says invalid yields text/plain - msg['Content-Type'] = 'text' - eq(msg.get_content_maintype(), 'text') - eq(msg.get_content_subtype(), 'plain') - eq(msg.get_content_type(), 'text/plain') - # Clear the old value and try something /really/ invalid - del msg['content-type'] - msg['Content-Type'] = 'foo' - eq(msg.get_content_maintype(), 'text') - eq(msg.get_content_subtype(), 'plain') - eq(msg.get_content_type(), 'text/plain') - # Still, make sure that the message is idempotently generated - s = StringIO() - g = Generator(s) - g.flatten(msg) - neq(s.getvalue(), 'Content-Type: foo\n\n') - - def test_no_start_boundary(self): - eq = self.ndiffAssertEqual - msg = self._msgobj('msg_31.txt') - eq(msg.get_payload(), """\ ---BOUNDARY -Content-Type: text/plain - -message 1 - ---BOUNDARY -Content-Type: text/plain - -message 2 - ---BOUNDARY-- -""") - - def test_no_separating_blank_line(self): - eq = self.ndiffAssertEqual - msg = self._msgobj('msg_35.txt') - eq(msg.as_string(), """\ -From: aperson@dom.ain -To: bperson@dom.ain -Subject: here's something interesting - -counter to RFC 2822, there's no separating newline here -""") - - def test_lying_multipart(self): - unless = self.assertTrue - msg = self._msgobj('msg_41.txt') - unless(hasattr(msg, 'defects')) - self.assertEqual(len(msg.defects), 2) - unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect)) - unless(isinstance(msg.defects[1], - errors.MultipartInvariantViolationDefect)) - - def test_missing_start_boundary(self): - outer = self._msgobj('msg_42.txt') - # The message structure is: - # - # multipart/mixed - # text/plain - # message/rfc822 - # multipart/mixed [*] - # - # [*] This message is missing its start boundary - bad = outer.get_payload(1).get_payload(0) - self.assertEqual(len(bad.defects), 1) - self.assertTrue(isinstance(bad.defects[0], - errors.StartBoundaryNotFoundDefect)) - - def test_first_line_is_continuation_header(self): - eq = self.assertEqual - m = ' Line 1\nLine 2\nLine 3' - msg = email.message_from_string(m) - eq(msg.keys(), []) - eq(msg.get_payload(), 'Line 2\nLine 3') - eq(len(msg.defects), 1) - self.assertTrue(isinstance(msg.defects[0], - errors.FirstHeaderLineIsContinuationDefect)) - eq(msg.defects[0].line, ' Line 1\n') - - - -# Test RFC 2047 header encoding and decoding -class TestRFC2047(TestEmailBase): - def test_rfc2047_multiline(self): - eq = self.assertEqual - s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz - foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""" - dh = decode_header(s) - eq(dh, [ - (b'Re:', None), - (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'), - (b'baz foo bar', None), - (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')]) - header = make_header(dh) - eq(str(header), - 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s') - self.ndiffAssertEqual(header.encode(maxlinelen=76), """\ -Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?= - =?mac-iceland?q?=9Arg=8Cs?=""") - - def test_whitespace_eater_unicode(self): - eq = self.assertEqual - s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>' - dh = decode_header(s) - eq(dh, [(b'Andr\xe9', 'iso-8859-1'), - (b'Pirard <pirard@dom.ain>', None)]) - header = str(make_header(dh)) - eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>') - - def test_whitespace_eater_unicode_2(self): - eq = self.assertEqual - s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?=' - dh = decode_header(s) - eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'), - (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')]) - hu = str(make_header(dh)) - eq(hu, 'The quick brown fox jumped over the lazy dog') - - def test_rfc2047_missing_whitespace(self): - s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' - dh = decode_header(s) - self.assertEqual(dh, [(s, None)]) - - def test_rfc2047_with_whitespace(self): - s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' - dh = decode_header(s) - self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'), - (b'rg', None), (b'\xe5', 'iso-8859-1'), - (b'sbord', None)]) - - def test_rfc2047_B_bad_padding(self): - s = '=?iso-8859-1?B?%s?=' - data = [ # only test complete bytes - ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'), - ('dmk=', b'vi'), ('dmk', b'vi') - ] - for q, a in data: - dh = decode_header(s % q) - self.assertEqual(dh, [(a, 'iso-8859-1')]) - - def test_rfc2047_Q_invalid_digits(self): - # issue 10004. - s = '=?iso-8659-1?Q?andr=e9=zz?=' - self.assertEqual(decode_header(s), - [(b'andr\xe9=zz', 'iso-8659-1')]) - - -# Test the MIMEMessage class -class TestMIMEMessage(TestEmailBase): - def setUp(self): - with openfile('msg_11.txt') as fp: - self._text = fp.read() - - def test_type_error(self): - self.assertRaises(TypeError, MIMEMessage, 'a plain string') - - def test_valid_argument(self): - eq = self.assertEqual - unless = self.assertTrue - subject = 'A sub-message' - m = Message() - m['Subject'] = subject - r = MIMEMessage(m) - eq(r.get_content_type(), 'message/rfc822') - payload = r.get_payload() - unless(isinstance(payload, list)) - eq(len(payload), 1) - subpart = payload[0] - unless(subpart is m) - eq(subpart['subject'], subject) - - def test_bad_multipart(self): - eq = self.assertEqual - msg1 = Message() - msg1['Subject'] = 'subpart 1' - msg2 = Message() - msg2['Subject'] = 'subpart 2' - r = MIMEMessage(msg1) - self.assertRaises(errors.MultipartConversionError, r.attach, msg2) - - def test_generate(self): - # First craft the message to be encapsulated - m = Message() - m['Subject'] = 'An enclosed message' - m.set_payload('Here is the body of the message.\n') - r = MIMEMessage(m) - r['Subject'] = 'The enclosing message' - s = StringIO() - g = Generator(s) - g.flatten(r) - self.assertEqual(s.getvalue(), """\ -Content-Type: message/rfc822 -MIME-Version: 1.0 -Subject: The enclosing message - -Subject: An enclosed message - -Here is the body of the message. -""") - - def test_parse_message_rfc822(self): - eq = self.assertEqual - unless = self.assertTrue - msg = self._msgobj('msg_11.txt') - eq(msg.get_content_type(), 'message/rfc822') - payload = msg.get_payload() - unless(isinstance(payload, list)) - eq(len(payload), 1) - submsg = payload[0] - self.assertTrue(isinstance(submsg, Message)) - eq(submsg['subject'], 'An enclosed message') - eq(submsg.get_payload(), 'Here is the body of the message.\n') - - def test_dsn(self): - eq = self.assertEqual - unless = self.assertTrue - # msg 16 is a Delivery Status Notification, see RFC 1894 - msg = self._msgobj('msg_16.txt') - eq(msg.get_content_type(), 'multipart/report') - unless(msg.is_multipart()) - eq(len(msg.get_payload()), 3) - # Subpart 1 is a text/plain, human readable section - subpart = msg.get_payload(0) - eq(subpart.get_content_type(), 'text/plain') - eq(subpart.get_payload(), """\ -This report relates to a message you sent with the following header fields: - - Message-id: <002001c144a6$8752e060$56104586@oxy.edu> - Date: Sun, 23 Sep 2001 20:10:55 -0700 - From: "Ian T. Henry" <henryi@oxy.edu> - To: SoCal Raves <scr@socal-raves.org> - Subject: [scr] yeah for Ians!! - -Your message cannot be delivered to the following recipients: - - Recipient address: jangel1@cougar.noc.ucla.edu - Reason: recipient reached disk quota - -""") - # Subpart 2 contains the machine parsable DSN information. It - # consists of two blocks of headers, represented by two nested Message - # objects. - subpart = msg.get_payload(1) - eq(subpart.get_content_type(), 'message/delivery-status') - eq(len(subpart.get_payload()), 2) - # message/delivery-status should treat each block as a bunch of - # headers, i.e. a bunch of Message objects. - dsn1 = subpart.get_payload(0) - unless(isinstance(dsn1, Message)) - eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu') - eq(dsn1.get_param('dns', header='reporting-mta'), '') - # Try a missing one <wink> - eq(dsn1.get_param('nsd', header='reporting-mta'), None) - dsn2 = subpart.get_payload(1) - unless(isinstance(dsn2, Message)) - eq(dsn2['action'], 'failed') - eq(dsn2.get_params(header='original-recipient'), - [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')]) - eq(dsn2.get_param('rfc822', header='final-recipient'), '') - # Subpart 3 is the original message - subpart = msg.get_payload(2) - eq(subpart.get_content_type(), 'message/rfc822') - payload = subpart.get_payload() - unless(isinstance(payload, list)) - eq(len(payload), 1) - subsubpart = payload[0] - unless(isinstance(subsubpart, Message)) - eq(subsubpart.get_content_type(), 'text/plain') - eq(subsubpart['message-id'], - '<002001c144a6$8752e060$56104586@oxy.edu>') - - def test_epilogue(self): - eq = self.ndiffAssertEqual - with openfile('msg_21.txt') as fp: - text = fp.read() - msg = Message() - msg['From'] = 'aperson@dom.ain' - msg['To'] = 'bperson@dom.ain' - msg['Subject'] = 'Test' - msg.preamble = 'MIME message' - msg.epilogue = 'End of MIME message\n' - msg1 = MIMEText('One') - msg2 = MIMEText('Two') - msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') - msg.attach(msg1) - msg.attach(msg2) - sfp = StringIO() - g = Generator(sfp) - g.flatten(msg) - eq(sfp.getvalue(), text) - - def test_no_nl_preamble(self): - eq = self.ndiffAssertEqual - msg = Message() - msg['From'] = 'aperson@dom.ain' - msg['To'] = 'bperson@dom.ain' - msg['Subject'] = 'Test' - msg.preamble = 'MIME message' - msg.epilogue = '' - msg1 = MIMEText('One') - msg2 = MIMEText('Two') - msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') - msg.attach(msg1) - msg.attach(msg2) - eq(msg.as_string(), """\ -From: aperson@dom.ain -To: bperson@dom.ain -Subject: Test -Content-Type: multipart/mixed; boundary="BOUNDARY" - -MIME message ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit - -One ---BOUNDARY -Content-Type: text/plain; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit - -Two ---BOUNDARY-- -""") - - def test_default_type(self): - eq = self.assertEqual - with openfile('msg_30.txt') as fp: - msg = email.message_from_file(fp) - container1 = msg.get_payload(0) - eq(container1.get_default_type(), 'message/rfc822') - eq(container1.get_content_type(), 'message/rfc822') - container2 = msg.get_payload(1) - eq(container2.get_default_type(), 'message/rfc822') - eq(container2.get_content_type(), 'message/rfc822') - container1a = container1.get_payload(0) - eq(container1a.get_default_type(), 'text/plain') - eq(container1a.get_content_type(), 'text/plain') - container2a = container2.get_payload(0) - eq(container2a.get_default_type(), 'text/plain') - eq(container2a.get_content_type(), 'text/plain') - - def test_default_type_with_explicit_container_type(self): - eq = self.assertEqual - with openfile('msg_28.txt') as fp: - msg = email.message_from_file(fp) - container1 = msg.get_payload(0) - eq(container1.get_default_type(), 'message/rfc822') - eq(container1.get_content_type(), 'message/rfc822') - container2 = msg.get_payload(1) - eq(container2.get_default_type(), 'message/rfc822') - eq(container2.get_content_type(), 'message/rfc822') - container1a = container1.get_payload(0) - eq(container1a.get_default_type(), 'text/plain') - eq(container1a.get_content_type(), 'text/plain') - container2a = container2.get_payload(0) - eq(container2a.get_default_type(), 'text/plain') - eq(container2a.get_content_type(), 'text/plain') - - def test_default_type_non_parsed(self): - eq = self.assertEqual - neq = self.ndiffAssertEqual - # Set up container - container = MIMEMultipart('digest', 'BOUNDARY') - container.epilogue = '' - # Set up subparts - subpart1a = MIMEText('message 1\n') - subpart2a = MIMEText('message 2\n') - subpart1 = MIMEMessage(subpart1a) - subpart2 = MIMEMessage(subpart2a) - container.attach(subpart1) - container.attach(subpart2) - eq(subpart1.get_content_type(), 'message/rfc822') - eq(subpart1.get_default_type(), 'message/rfc822') - eq(subpart2.get_content_type(), 'message/rfc822') - eq(subpart2.get_default_type(), 'message/rfc822') - neq(container.as_string(0), '''\ -Content-Type: multipart/digest; boundary="BOUNDARY" -MIME-Version: 1.0 - ---BOUNDARY -Content-Type: message/rfc822 -MIME-Version: 1.0 - -Content-Type: text/plain; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit - -message 1 - ---BOUNDARY -Content-Type: message/rfc822 -MIME-Version: 1.0 - -Content-Type: text/plain; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit - -message 2 - ---BOUNDARY-- -''') - del subpart1['content-type'] - del subpart1['mime-version'] - del subpart2['content-type'] - del subpart2['mime-version'] - eq(subpart1.get_content_type(), 'message/rfc822') - eq(subpart1.get_default_type(), 'message/rfc822') - eq(subpart2.get_content_type(), 'message/rfc822') - eq(subpart2.get_default_type(), 'message/rfc822') - neq(container.as_string(0), '''\ -Content-Type: multipart/digest; boundary="BOUNDARY" -MIME-Version: 1.0 - ---BOUNDARY - -Content-Type: text/plain; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit - -message 1 - ---BOUNDARY - -Content-Type: text/plain; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit - -message 2 - ---BOUNDARY-- -''') - - def test_mime_attachments_in_constructor(self): - eq = self.assertEqual - text1 = MIMEText('') - text2 = MIMEText('') - msg = MIMEMultipart(_subparts=(text1, text2)) - eq(len(msg.get_payload()), 2) - eq(msg.get_payload(0), text1) - eq(msg.get_payload(1), text2) - - def test_default_multipart_constructor(self): - msg = MIMEMultipart() - self.assertTrue(msg.is_multipart()) - - -# A general test of parser->model->generator idempotency. IOW, read a message -# in, parse it into a message object tree, then without touching the tree, -# regenerate the plain text. The original text and the transformed text -# should be identical. Note: that we ignore the Unix-From since that may -# contain a changed date. -class TestIdempotent(TestEmailBase): - - linesep = '\n' - - def _msgobj(self, filename): - with openfile(filename) as fp: - data = fp.read() - msg = email.message_from_string(data) - return msg, data - - def _idempotent(self, msg, text, unixfrom=False): - eq = self.ndiffAssertEqual - s = StringIO() - g = Generator(s, maxheaderlen=0) - g.flatten(msg, unixfrom=unixfrom) - eq(text, s.getvalue()) - - def test_parse_text_message(self): - eq = self.assertEqual - msg, text = self._msgobj('msg_01.txt') - eq(msg.get_content_type(), 'text/plain') - eq(msg.get_content_maintype(), 'text') - eq(msg.get_content_subtype(), 'plain') - eq(msg.get_params()[1], ('charset', 'us-ascii')) - eq(msg.get_param('charset'), 'us-ascii') - eq(msg.preamble, None) - eq(msg.epilogue, None) - self._idempotent(msg, text) - - def test_parse_untyped_message(self): - eq = self.assertEqual - msg, text = self._msgobj('msg_03.txt') - eq(msg.get_content_type(), 'text/plain') - eq(msg.get_params(), None) - eq(msg.get_param('charset'), None) - self._idempotent(msg, text) - - def test_simple_multipart(self): - msg, text = self._msgobj('msg_04.txt') - self._idempotent(msg, text) - - def test_MIME_digest(self): - msg, text = self._msgobj('msg_02.txt') - self._idempotent(msg, text) - - def test_long_header(self): - msg, text = self._msgobj('msg_27.txt') - self._idempotent(msg, text) - - def test_MIME_digest_with_part_headers(self): - msg, text = self._msgobj('msg_28.txt') - self._idempotent(msg, text) - - def test_mixed_with_image(self): - msg, text = self._msgobj('msg_06.txt') - self._idempotent(msg, text) - - def test_multipart_report(self): - msg, text = self._msgobj('msg_05.txt') - self._idempotent(msg, text) - - def test_dsn(self): - msg, text = self._msgobj('msg_16.txt') - self._idempotent(msg, text) - - def test_preamble_epilogue(self): - msg, text = self._msgobj('msg_21.txt') - self._idempotent(msg, text) - - def test_multipart_one_part(self): - msg, text = self._msgobj('msg_23.txt') - self._idempotent(msg, text) - - def test_multipart_no_parts(self): - msg, text = self._msgobj('msg_24.txt') - self._idempotent(msg, text) - - def test_no_start_boundary(self): - msg, text = self._msgobj('msg_31.txt') - self._idempotent(msg, text) - - def test_rfc2231_charset(self): - msg, text = self._msgobj('msg_32.txt') - self._idempotent(msg, text) - - def test_more_rfc2231_parameters(self): - msg, text = self._msgobj('msg_33.txt') - self._idempotent(msg, text) - - def test_text_plain_in_a_multipart_digest(self): - msg, text = self._msgobj('msg_34.txt') - self._idempotent(msg, text) - - def test_nested_multipart_mixeds(self): - msg, text = self._msgobj('msg_12a.txt') - self._idempotent(msg, text) - - def test_message_external_body_idempotent(self): - msg, text = self._msgobj('msg_36.txt') - self._idempotent(msg, text) - - def test_message_delivery_status(self): - msg, text = self._msgobj('msg_43.txt') - self._idempotent(msg, text, unixfrom=True) - - def test_message_signed_idempotent(self): - msg, text = self._msgobj('msg_45.txt') - self._idempotent(msg, text) - - def test_content_type(self): - eq = self.assertEqual - unless = self.assertTrue - # Get a message object and reset the seek pointer for other tests - msg, text = self._msgobj('msg_05.txt') - eq(msg.get_content_type(), 'multipart/report') - # Test the Content-Type: parameters - params = {} - for pk, pv in msg.get_params(): - params[pk] = pv - eq(params['report-type'], 'delivery-status') - eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com') - eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep) - eq(msg.epilogue, self.linesep) - eq(len(msg.get_payload()), 3) - # Make sure the subparts are what we expect - msg1 = msg.get_payload(0) - eq(msg1.get_content_type(), 'text/plain') - eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep) - msg2 = msg.get_payload(1) - eq(msg2.get_content_type(), 'text/plain') - eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep) - msg3 = msg.get_payload(2) - eq(msg3.get_content_type(), 'message/rfc822') - self.assertTrue(isinstance(msg3, Message)) - payload = msg3.get_payload() - unless(isinstance(payload, list)) - eq(len(payload), 1) - msg4 = payload[0] - unless(isinstance(msg4, Message)) - eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep) - - def test_parser(self): - eq = self.assertEqual - unless = self.assertTrue - msg, text = self._msgobj('msg_06.txt') - # Check some of the outer headers - eq(msg.get_content_type(), 'message/rfc822') - # Make sure the payload is a list of exactly one sub-Message, and that - # that submessage has a type of text/plain - payload = msg.get_payload() - unless(isinstance(payload, list)) - eq(len(payload), 1) - msg1 = payload[0] - self.assertTrue(isinstance(msg1, Message)) - eq(msg1.get_content_type(), 'text/plain') - self.assertTrue(isinstance(msg1.get_payload(), str)) - eq(msg1.get_payload(), self.linesep) - - - -# Test various other bits of the package's functionality -class TestMiscellaneous(TestEmailBase): - def test_message_from_string(self): - with openfile('msg_01.txt') as fp: - text = fp.read() - msg = email.message_from_string(text) - s = StringIO() - # Don't wrap/continue long headers since we're trying to test - # idempotency. - g = Generator(s, maxheaderlen=0) - g.flatten(msg) - self.assertEqual(text, s.getvalue()) - - def test_message_from_file(self): - with openfile('msg_01.txt') as fp: - text = fp.read() - fp.seek(0) - msg = email.message_from_file(fp) - s = StringIO() - # Don't wrap/continue long headers since we're trying to test - # idempotency. - g = Generator(s, maxheaderlen=0) - g.flatten(msg) - self.assertEqual(text, s.getvalue()) - - def test_message_from_string_with_class(self): - unless = self.assertTrue - with openfile('msg_01.txt') as fp: - text = fp.read() - - # Create a subclass - class MyMessage(Message): - pass - - msg = email.message_from_string(text, MyMessage) - unless(isinstance(msg, MyMessage)) - # Try something more complicated - with openfile('msg_02.txt') as fp: - text = fp.read() - msg = email.message_from_string(text, MyMessage) - for subpart in msg.walk(): - unless(isinstance(subpart, MyMessage)) - - def test_message_from_file_with_class(self): - unless = self.assertTrue - # Create a subclass - class MyMessage(Message): - pass - - with openfile('msg_01.txt') as fp: - msg = email.message_from_file(fp, MyMessage) - unless(isinstance(msg, MyMessage)) - # Try something more complicated - with openfile('msg_02.txt') as fp: - msg = email.message_from_file(fp, MyMessage) - for subpart in msg.walk(): - unless(isinstance(subpart, MyMessage)) - - def test__all__(self): - module = __import__('email') - self.assertEqual(sorted(module.__all__), [ - 'base64mime', 'charset', 'encoders', 'errors', 'feedparser', - 'generator', 'header', 'iterators', 'message', - 'message_from_binary_file', 'message_from_bytes', - 'message_from_file', 'message_from_string', 'mime', 'parser', - 'quoprimime', 'utils', - ]) - - def test_formatdate(self): - now = time.time() - self.assertEqual(utils.parsedate(utils.formatdate(now))[:6], - time.gmtime(now)[:6]) - - def test_formatdate_localtime(self): - now = time.time() - self.assertEqual( - utils.parsedate(utils.formatdate(now, localtime=True))[:6], - time.localtime(now)[:6]) - - def test_formatdate_usegmt(self): - now = time.time() - self.assertEqual( - utils.formatdate(now, localtime=False), - time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now))) - self.assertEqual( - utils.formatdate(now, localtime=False, usegmt=True), - time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now))) - - def test_parsedate_none(self): - self.assertEqual(utils.parsedate(''), None) - - def test_parsedate_compact(self): - # The FWS after the comma is optional - self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'), - utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800')) - - def test_parsedate_no_dayofweek(self): - eq = self.assertEqual - eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'), - (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800)) - - def test_parsedate_compact_no_dayofweek(self): - eq = self.assertEqual - eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'), - (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) - - def test_parsedate_no_space_before_positive_offset(self): - self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'), - (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) - - def test_parsedate_no_space_before_negative_offset(self): - # Issue 1155362: we already handled '+' for this case. - self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'), - (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800)) - - - def test_parsedate_acceptable_to_time_functions(self): - eq = self.assertEqual - timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800') - t = int(time.mktime(timetup)) - eq(time.localtime(t)[:6], timetup[:6]) - eq(int(time.strftime('%Y', timetup)), 2003) - timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800') - t = int(time.mktime(timetup[:9])) - eq(time.localtime(t)[:6], timetup[:6]) - eq(int(time.strftime('%Y', timetup[:9])), 2003) - - def test_mktime_tz(self): - self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, - -1, -1, -1, 0)), 0) - self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, - -1, -1, -1, 1234)), -1234) - - def test_parsedate_y2k(self): - """Test for parsing a date with a two-digit year. - - Parsing a date with a two-digit year should return the correct - four-digit year. RFC822 allows two-digit years, but RFC2822 (which - obsoletes RFC822) requires four-digit years. - - """ - self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'), - utils.parsedate_tz('25 Feb 2003 13:47:26 -0800')) - self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'), - utils.parsedate_tz('25 Feb 1971 13:47:26 -0800')) - - def test_parseaddr_empty(self): - self.assertEqual(utils.parseaddr('<>'), ('', '')) - self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '') - - def test_noquote_dump(self): - self.assertEqual( - utils.formataddr(('A Silly Person', 'person@dom.ain')), - 'A Silly Person <person@dom.ain>') - - def test_escape_dump(self): - self.assertEqual( - utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')), - r'"A \(Very\) Silly Person" <person@dom.ain>') - a = r'A \(Special\) Person' - b = 'person@dom.ain' - self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) - - def test_escape_backslashes(self): - self.assertEqual( - utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')), - r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>') - a = r'Arthur \Backslash\ Foobar' - b = 'person@dom.ain' - self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) - - def test_name_with_dot(self): - x = 'John X. Doe <jxd@example.com>' - y = '"John X. Doe" <jxd@example.com>' - a, b = ('John X. Doe', 'jxd@example.com') - self.assertEqual(utils.parseaddr(x), (a, b)) - self.assertEqual(utils.parseaddr(y), (a, b)) - # formataddr() quotes the name if there's a dot in it - self.assertEqual(utils.formataddr((a, b)), y) - - def test_parseaddr_preserves_quoted_pairs_in_addresses(self): - # issue 10005. Note that in the third test the second pair of - # backslashes is not actually a quoted pair because it is not inside a - # comment or quoted string: the address being parsed has a quoted - # string containing a quoted backslash, followed by 'example' and two - # backslashes, followed by another quoted string containing a space and - # the word 'example'. parseaddr copies those two backslashes - # literally. Per rfc5322 this is not technically correct since a \ may - # not appear in an address outside of a quoted string. It is probably - # a sensible Postel interpretation, though. - eq = self.assertEqual - eq(utils.parseaddr('""example" example"@example.com'), - ('', '""example" example"@example.com')) - eq(utils.parseaddr('"\\"example\\" example"@example.com'), - ('', '"\\"example\\" example"@example.com')) - eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'), - ('', '"\\\\"example\\\\" example"@example.com')) - - def test_parseaddr_preserves_spaces_in_local_part(self): - # issue 9286. A normal RFC5322 local part should not contain any - # folding white space, but legacy local parts can (they are a sequence - # of atoms, not dotatoms). On the other hand we strip whitespace from - # before the @ and around dots, on the assumption that the whitespace - # around the punctuation is a mistake in what would otherwise be - # an RFC5322 local part. Leading whitespace is, usual, stripped as well. - self.assertEqual(('', "merwok wok@xample.com"), - utils.parseaddr("merwok wok@xample.com")) - self.assertEqual(('', "merwok wok@xample.com"), - utils.parseaddr("merwok wok@xample.com")) - self.assertEqual(('', "merwok wok@xample.com"), - utils.parseaddr(" merwok wok @xample.com")) - self.assertEqual(('', 'merwok"wok" wok@xample.com'), - utils.parseaddr('merwok"wok" wok@xample.com')) - self.assertEqual(('', 'merwok.wok.wok@xample.com'), - utils.parseaddr('merwok. wok . wok@xample.com')) - - def test_multiline_from_comment(self): - x = """\ -Foo -\tBar <foo@example.com>""" - self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com')) - - def test_quote_dump(self): - self.assertEqual( - utils.formataddr(('A Silly; Person', 'person@dom.ain')), - r'"A Silly; Person" <person@dom.ain>') - - def test_charset_richcomparisons(self): - eq = self.assertEqual - ne = self.assertNotEqual - cset1 = Charset() - cset2 = Charset() - eq(cset1, 'us-ascii') - eq(cset1, 'US-ASCII') - eq(cset1, 'Us-AsCiI') - eq('us-ascii', cset1) - eq('US-ASCII', cset1) - eq('Us-AsCiI', cset1) - ne(cset1, 'usascii') - ne(cset1, 'USASCII') - ne(cset1, 'UsAsCiI') - ne('usascii', cset1) - ne('USASCII', cset1) - ne('UsAsCiI', cset1) - eq(cset1, cset2) - eq(cset2, cset1) - - def test_getaddresses(self): - eq = self.assertEqual - eq(utils.getaddresses(['aperson@dom.ain (Al Person)', - 'Bud Person <bperson@dom.ain>']), - [('Al Person', 'aperson@dom.ain'), - ('Bud Person', 'bperson@dom.ain')]) - - def test_getaddresses_nasty(self): - eq = self.assertEqual - eq(utils.getaddresses(['foo: ;']), [('', '')]) - eq(utils.getaddresses( - ['[]*-- =~$']), - [('', ''), ('', ''), ('', '*--')]) - eq(utils.getaddresses( - ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']), - [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) - - def test_getaddresses_embedded_comment(self): - """Test proper handling of a nested comment""" - eq = self.assertEqual - addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>']) - eq(addrs[0][1], 'foo@bar.com') - - def test_utils_quote_unquote(self): - eq = self.assertEqual - msg = Message() - msg.add_header('content-disposition', 'attachment', - filename='foo\\wacky"name') - eq(msg.get_filename(), 'foo\\wacky"name') - - def test_get_body_encoding_with_bogus_charset(self): - charset = Charset('not a charset') - self.assertEqual(charset.get_body_encoding(), 'base64') - - def test_get_body_encoding_with_uppercase_charset(self): - eq = self.assertEqual - msg = Message() - msg['Content-Type'] = 'text/plain; charset=UTF-8' - eq(msg['content-type'], 'text/plain; charset=UTF-8') - charsets = msg.get_charsets() - eq(len(charsets), 1) - eq(charsets[0], 'utf-8') - charset = Charset(charsets[0]) - eq(charset.get_body_encoding(), 'base64') - msg.set_payload(b'hello world', charset=charset) - eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n') - eq(msg.get_payload(decode=True), b'hello world') - eq(msg['content-transfer-encoding'], 'base64') - # Try another one - msg = Message() - msg['Content-Type'] = 'text/plain; charset="US-ASCII"' - charsets = msg.get_charsets() - eq(len(charsets), 1) - eq(charsets[0], 'us-ascii') - charset = Charset(charsets[0]) - eq(charset.get_body_encoding(), encoders.encode_7or8bit) - msg.set_payload('hello world', charset=charset) - eq(msg.get_payload(), 'hello world') - eq(msg['content-transfer-encoding'], '7bit') - - def test_charsets_case_insensitive(self): - lc = Charset('us-ascii') - uc = Charset('US-ASCII') - self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding()) - - def test_partial_falls_inside_message_delivery_status(self): - eq = self.ndiffAssertEqual - # The Parser interface provides chunks of data to FeedParser in 8192 - # byte gulps. SF bug #1076485 found one of those chunks inside - # message/delivery-status header block, which triggered an - # unreadline() of NeedMoreData. - msg = self._msgobj('msg_43.txt') - sfp = StringIO() - iterators._structure(msg, sfp) - eq(sfp.getvalue(), """\ -multipart/report - text/plain - message/delivery-status - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/plain - text/rfc822-headers -""") - - def test_make_msgid_domain(self): - self.assertEqual( - email.utils.make_msgid(domain='testdomain-string')[-19:], - '@testdomain-string>') - - -# Test the iterator/generators -class TestIterators(TestEmailBase): - def test_body_line_iterator(self): - eq = self.assertEqual - neq = self.ndiffAssertEqual - # First a simple non-multipart message - msg = self._msgobj('msg_01.txt') - it = iterators.body_line_iterator(msg) - lines = list(it) - eq(len(lines), 6) - neq(EMPTYSTRING.join(lines), msg.get_payload()) - # Now a more complicated multipart - msg = self._msgobj('msg_02.txt') - it = iterators.body_line_iterator(msg) - lines = list(it) - eq(len(lines), 43) - with openfile('msg_19.txt') as fp: - neq(EMPTYSTRING.join(lines), fp.read()) - - def test_typed_subpart_iterator(self): - eq = self.assertEqual - msg = self._msgobj('msg_04.txt') - it = iterators.typed_subpart_iterator(msg, 'text') - lines = [] - subparts = 0 - for subpart in it: - subparts += 1 - lines.append(subpart.get_payload()) - eq(subparts, 2) - eq(EMPTYSTRING.join(lines), """\ -a simple kind of mirror -to reflect upon our own -a simple kind of mirror -to reflect upon our own -""") - - def test_typed_subpart_iterator_default_type(self): - eq = self.assertEqual - msg = self._msgobj('msg_03.txt') - it = iterators.typed_subpart_iterator(msg, 'text', 'plain') - lines = [] - subparts = 0 - for subpart in it: - subparts += 1 - lines.append(subpart.get_payload()) - eq(subparts, 1) - eq(EMPTYSTRING.join(lines), """\ - -Hi, - -Do you like this message? - --Me -""") - - def test_pushCR_LF(self): - '''FeedParser BufferedSubFile.push() assumed it received complete - line endings. A CR ending one push() followed by a LF starting - the next push() added an empty line. - ''' - imt = [ - ("a\r \n", 2), - ("b", 0), - ("c\n", 1), - ("", 0), - ("d\r\n", 1), - ("e\r", 0), - ("\nf", 1), - ("\r\n", 1), - ] - from email.feedparser import BufferedSubFile, NeedMoreData - bsf = BufferedSubFile() - om = [] - nt = 0 - for il, n in imt: - bsf.push(il) - nt += n - n1 = 0 - while True: - ol = bsf.readline() - if ol == NeedMoreData: - break - om.append(ol) - n1 += 1 - self.assertTrue(n == n1) - self.assertTrue(len(om) == nt) - self.assertTrue(''.join([il for il, n in imt]) == ''.join(om)) - - - -class TestParsers(TestEmailBase): - def test_header_parser(self): - eq = self.assertEqual - # Parse only the headers of a complex multipart MIME document - with openfile('msg_02.txt') as fp: - msg = HeaderParser().parse(fp) - eq(msg['from'], 'ppp-request@zzz.org') - eq(msg['to'], 'ppp@zzz.org') - eq(msg.get_content_type(), 'multipart/mixed') - self.assertFalse(msg.is_multipart()) - self.assertTrue(isinstance(msg.get_payload(), str)) - - def test_whitespace_continuation(self): - eq = self.assertEqual - # This message contains a line after the Subject: header that has only - # whitespace, but it is not empty! - msg = email.message_from_string("""\ -From: aperson@dom.ain -To: bperson@dom.ain -Subject: the next line has a space on it -\x20 -Date: Mon, 8 Apr 2002 15:09:19 -0400 -Message-ID: spam - -Here's the message body -""") - eq(msg['subject'], 'the next line has a space on it\n ') - eq(msg['message-id'], 'spam') - eq(msg.get_payload(), "Here's the message body\n") - - def test_whitespace_continuation_last_header(self): - eq = self.assertEqual - # Like the previous test, but the subject line is the last - # header. - msg = email.message_from_string("""\ -From: aperson@dom.ain -To: bperson@dom.ain -Date: Mon, 8 Apr 2002 15:09:19 -0400 -Message-ID: spam -Subject: the next line has a space on it -\x20 - -Here's the message body -""") - eq(msg['subject'], 'the next line has a space on it\n ') - eq(msg['message-id'], 'spam') - eq(msg.get_payload(), "Here's the message body\n") - - def test_crlf_separation(self): - eq = self.assertEqual - with openfile('msg_26.txt', newline='\n') as fp: - msg = Parser().parse(fp) - eq(len(msg.get_payload()), 2) - part1 = msg.get_payload(0) - eq(part1.get_content_type(), 'text/plain') - eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n') - part2 = msg.get_payload(1) - eq(part2.get_content_type(), 'application/riscos') - - def test_crlf_flatten(self): - # Using newline='\n' preserves the crlfs in this input file. - with openfile('msg_26.txt', newline='\n') as fp: - text = fp.read() - msg = email.message_from_string(text) - s = StringIO() - g = Generator(s) - g.flatten(msg, linesep='\r\n') - self.assertEqual(s.getvalue(), text) - - maxDiff = None - - def test_multipart_digest_with_extra_mime_headers(self): - eq = self.assertEqual - neq = self.ndiffAssertEqual - with openfile('msg_28.txt') as fp: - msg = email.message_from_file(fp) - # Structure is: - # multipart/digest - # message/rfc822 - # text/plain - # message/rfc822 - # text/plain - eq(msg.is_multipart(), 1) - eq(len(msg.get_payload()), 2) - part1 = msg.get_payload(0) - eq(part1.get_content_type(), 'message/rfc822') - eq(part1.is_multipart(), 1) - eq(len(part1.get_payload()), 1) - part1a = part1.get_payload(0) - eq(part1a.is_multipart(), 0) - eq(part1a.get_content_type(), 'text/plain') - neq(part1a.get_payload(), 'message 1\n') - # next message/rfc822 - part2 = msg.get_payload(1) - eq(part2.get_content_type(), 'message/rfc822') - eq(part2.is_multipart(), 1) - eq(len(part2.get_payload()), 1) - part2a = part2.get_payload(0) - eq(part2a.is_multipart(), 0) - eq(part2a.get_content_type(), 'text/plain') - neq(part2a.get_payload(), 'message 2\n') - - def test_three_lines(self): - # A bug report by Andrew McNamara - lines = ['From: Andrew Person <aperson@dom.ain', - 'Subject: Test', - 'Date: Tue, 20 Aug 2002 16:43:45 +1000'] - msg = email.message_from_string(NL.join(lines)) - self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000') - - def test_strip_line_feed_and_carriage_return_in_headers(self): - eq = self.assertEqual - # For [ 1002475 ] email message parser doesn't handle \r\n correctly - value1 = 'text' - value2 = 'more text' - m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % ( - value1, value2) - msg = email.message_from_string(m) - eq(msg.get('Header'), value1) - eq(msg.get('Next-Header'), value2) - - def test_rfc2822_header_syntax(self): - eq = self.assertEqual - m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody' - msg = email.message_from_string(m) - eq(len(msg), 3) - eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From']) - eq(msg.get_payload(), 'body') - - def test_rfc2822_space_not_allowed_in_header(self): - eq = self.assertEqual - m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody' - msg = email.message_from_string(m) - eq(len(msg.keys()), 0) - - def test_rfc2822_one_character_header(self): - eq = self.assertEqual - m = 'A: first header\nB: second header\nCC: third header\n\nbody' - msg = email.message_from_string(m) - headers = msg.keys() - headers.sort() - eq(headers, ['A', 'B', 'CC']) - eq(msg.get_payload(), 'body') - - def test_CRLFLF_at_end_of_part(self): - # issue 5610: feedparser should not eat two chars from body part ending - # with "\r\n\n". - m = ( - "From: foo@bar.com\n" - "To: baz\n" - "Mime-Version: 1.0\n" - "Content-Type: multipart/mixed; boundary=BOUNDARY\n" - "\n" - "--BOUNDARY\n" - "Content-Type: text/plain\n" - "\n" - "body ending with CRLF newline\r\n" - "\n" - "--BOUNDARY--\n" - ) - msg = email.message_from_string(m) - self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n')) - - -class Test8BitBytesHandling(unittest.TestCase): - # In Python3 all input is string, but that doesn't work if the actual input - # uses an 8bit transfer encoding. To hack around that, in email 5.1 we - # decode byte streams using the surrogateescape error handler, and - # reconvert to binary at appropriate places if we detect surrogates. This - # doesn't allow us to transform headers with 8bit bytes (they get munged), - # but it does allow us to parse and preserve them, and to decode body - # parts that use an 8bit CTE. - - bodytest_msg = textwrap.dedent("""\ - From: foo@bar.com - To: baz - Mime-Version: 1.0 - Content-Type: text/plain; charset={charset} - Content-Transfer-Encoding: {cte} - - {bodyline} - """) - - def test_known_8bit_CTE(self): - m = self.bodytest_msg.format(charset='utf-8', - cte='8bit', - bodyline='pöstal').encode('utf-8') - msg = email.message_from_bytes(m) - self.assertEqual(msg.get_payload(), "pöstal\n") - self.assertEqual(msg.get_payload(decode=True), - "pöstal\n".encode('utf-8')) - - def test_unknown_8bit_CTE(self): - m = self.bodytest_msg.format(charset='notavalidcharset', - cte='8bit', - bodyline='pöstal').encode('utf-8') - msg = email.message_from_bytes(m) - self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n") - self.assertEqual(msg.get_payload(decode=True), - "pöstal\n".encode('utf-8')) - - def test_8bit_in_quopri_body(self): - # This is non-RFC compliant data...without 'decode' the library code - # decodes the body using the charset from the headers, and because the - # source byte really is utf-8 this works. This is likely to fail - # against real dirty data (ie: produce mojibake), but the data is - # invalid anyway so it is as good a guess as any. But this means that - # this test just confirms the current behavior; that behavior is not - # necessarily the best possible behavior. With 'decode' it is - # returning the raw bytes, so that test should be of correct behavior, - # or at least produce the same result that email4 did. - m = self.bodytest_msg.format(charset='utf-8', - cte='quoted-printable', - bodyline='p=C3=B6stál').encode('utf-8') - msg = email.message_from_bytes(m) - self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n') - self.assertEqual(msg.get_payload(decode=True), - 'pöstál\n'.encode('utf-8')) - - def test_invalid_8bit_in_non_8bit_cte_uses_replace(self): - # This is similar to the previous test, but proves that if the 8bit - # byte is undecodeable in the specified charset, it gets replaced - # by the unicode 'unknown' character. Again, this may or may not - # be the ideal behavior. Note that if decode=False none of the - # decoders will get involved, so this is the only test we need - # for this behavior. - m = self.bodytest_msg.format(charset='ascii', - cte='quoted-printable', - bodyline='p=C3=B6stál').encode('utf-8') - msg = email.message_from_bytes(m) - self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n') - self.assertEqual(msg.get_payload(decode=True), - 'pöstál\n'.encode('utf-8')) - - def test_8bit_in_base64_body(self): - # Sticking an 8bit byte in a base64 block makes it undecodable by - # normal means, so the block is returned undecoded, but as bytes. - m = self.bodytest_msg.format(charset='utf-8', - cte='base64', - bodyline='cMO2c3RhbAá=').encode('utf-8') - msg = email.message_from_bytes(m) - self.assertEqual(msg.get_payload(decode=True), - 'cMO2c3RhbAá=\n'.encode('utf-8')) - - def test_8bit_in_uuencode_body(self): - # Sticking an 8bit byte in a uuencode block makes it undecodable by - # normal means, so the block is returned undecoded, but as bytes. - m = self.bodytest_msg.format(charset='utf-8', - cte='uuencode', - bodyline='<,.V<W1A; á ').encode('utf-8') - msg = email.message_from_bytes(m) - self.assertEqual(msg.get_payload(decode=True), - '<,.V<W1A; á \n'.encode('utf-8')) - - - headertest_headers = ( - ('From: foo@bar.com', ('From', 'foo@bar.com')), - ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')), - ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n' - '\tJean de Baddie', - ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' - 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n' - ' =?unknown-8bit?q?_Jean_de_Baddie?=')), - ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')), - ) - headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) + - '\nYes, they are flying.\n').encode('utf-8') - - def test_get_8bit_header(self): - msg = email.message_from_bytes(self.headertest_msg) - self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz') - self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz') - - def test_print_8bit_headers(self): - msg = email.message_from_bytes(self.headertest_msg) - self.assertEqual(str(msg), - textwrap.dedent("""\ - From: {} - To: {} - Subject: {} - From: {} - - Yes, they are flying. - """).format(*[expected[1] for (_, expected) in - self.headertest_headers])) - - def test_values_with_8bit_headers(self): - msg = email.message_from_bytes(self.headertest_msg) - self.assertListEqual([str(x) for x in msg.values()], - ['foo@bar.com', - 'b\uFFFD\uFFFDz', - 'Maintenant je vous pr\uFFFD\uFFFDsente mon ' - 'coll\uFFFD\uFFFDgue, le pouf ' - 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' - '\tJean de Baddie', - "g\uFFFD\uFFFDst"]) - - def test_items_with_8bit_headers(self): - msg = email.message_from_bytes(self.headertest_msg) - self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()], - [('From', 'foo@bar.com'), - ('To', 'b\uFFFD\uFFFDz'), - ('Subject', 'Maintenant je vous ' - 'pr\uFFFD\uFFFDsente ' - 'mon coll\uFFFD\uFFFDgue, le pouf ' - 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' - '\tJean de Baddie'), - ('From', 'g\uFFFD\uFFFDst')]) - - def test_get_all_with_8bit_headers(self): - msg = email.message_from_bytes(self.headertest_msg) - self.assertListEqual([str(x) for x in msg.get_all('from')], - ['foo@bar.com', - 'g\uFFFD\uFFFDst']) - - def test_get_content_type_with_8bit(self): - msg = email.message_from_bytes(textwrap.dedent("""\ - Content-Type: text/pl\xA7in; charset=utf-8 - """).encode('latin-1')) - self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin") - self.assertEqual(msg.get_content_maintype(), "text") - self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin") - - def test_get_params_with_8bit(self): - msg = email.message_from_bytes( - 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1')) - self.assertEqual(msg.get_params(header='x-header'), - [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')]) - self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne') - # XXX: someday you might be able to get 'b\xa7r', for now you can't. - self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None) - - def test_get_rfc2231_params_with_8bit(self): - msg = email.message_from_bytes(textwrap.dedent("""\ - Content-Type: text/plain; charset=us-ascii; - title*=us-ascii'en'This%20is%20not%20f\xa7n""" - ).encode('latin-1')) - self.assertEqual(msg.get_param('title'), - ('us-ascii', 'en', 'This is not f\uFFFDn')) - - def test_set_rfc2231_params_with_8bit(self): - msg = email.message_from_bytes(textwrap.dedent("""\ - Content-Type: text/plain; charset=us-ascii; - title*=us-ascii'en'This%20is%20not%20f\xa7n""" - ).encode('latin-1')) - msg.set_param('title', 'test') - self.assertEqual(msg.get_param('title'), 'test') - - def test_del_rfc2231_params_with_8bit(self): - msg = email.message_from_bytes(textwrap.dedent("""\ - Content-Type: text/plain; charset=us-ascii; - title*=us-ascii'en'This%20is%20not%20f\xa7n""" - ).encode('latin-1')) - msg.del_param('title') - self.assertEqual(msg.get_param('title'), None) - self.assertEqual(msg.get_content_maintype(), 'text') - - def test_get_payload_with_8bit_cte_header(self): - msg = email.message_from_bytes(textwrap.dedent("""\ - Content-Transfer-Encoding: b\xa7se64 - Content-Type: text/plain; charset=latin-1 - - payload - """).encode('latin-1')) - self.assertEqual(msg.get_payload(), 'payload\n') - self.assertEqual(msg.get_payload(decode=True), b'payload\n') - - non_latin_bin_msg = textwrap.dedent("""\ - From: foo@bar.com - To: báz - Subject: Maintenant je vous présente mon collègue, le pouf célèbre - \tJean de Baddie - Mime-Version: 1.0 - Content-Type: text/plain; charset="utf-8" - Content-Transfer-Encoding: 8bit - - Да, они летят. - """).encode('utf-8') - - def test_bytes_generator(self): - msg = email.message_from_bytes(self.non_latin_bin_msg) - out = BytesIO() - email.generator.BytesGenerator(out).flatten(msg) - self.assertEqual(out.getvalue(), self.non_latin_bin_msg) - - def test_bytes_generator_handles_None_body(self): - #Issue 11019 - msg = email.message.Message() - out = BytesIO() - email.generator.BytesGenerator(out).flatten(msg) - self.assertEqual(out.getvalue(), b"\n") - - non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\ - From: foo@bar.com - To: =?unknown-8bit?q?b=C3=A1z?= - Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?= - =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?= - =?unknown-8bit?q?_Jean_de_Baddie?= - Mime-Version: 1.0 - Content-Type: text/plain; charset="utf-8" - Content-Transfer-Encoding: base64 - - 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg== - """) - - def test_generator_handles_8bit(self): - msg = email.message_from_bytes(self.non_latin_bin_msg) - out = StringIO() - email.generator.Generator(out).flatten(msg) - self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped) - - def test_bytes_generator_with_unix_from(self): - # The unixfrom contains a current date, so we can't check it - # literally. Just make sure the first word is 'From' and the - # rest of the message matches the input. - msg = email.message_from_bytes(self.non_latin_bin_msg) - out = BytesIO() - email.generator.BytesGenerator(out).flatten(msg, unixfrom=True) - lines = out.getvalue().split(b'\n') - self.assertEqual(lines[0].split()[0], b'From') - self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg) - - non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n') - non_latin_bin_msg_as7bit[2:4] = [ - 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' - 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?='] - non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit) - - def test_message_from_binary_file(self): - fn = 'test.msg' - self.addCleanup(unlink, fn) - with open(fn, 'wb') as testfile: - testfile.write(self.non_latin_bin_msg) - with open(fn, 'rb') as testfile: - m = email.parser.BytesParser().parse(testfile) - self.assertEqual(str(m), self.non_latin_bin_msg_as7bit) - - latin_bin_msg = textwrap.dedent("""\ - From: foo@bar.com - To: Dinsdale - Subject: Nudge nudge, wink, wink - Mime-Version: 1.0 - Content-Type: text/plain; charset="latin-1" - Content-Transfer-Encoding: 8bit - - oh là là, know what I mean, know what I mean? - """).encode('latin-1') - - latin_bin_msg_as7bit = textwrap.dedent("""\ - From: foo@bar.com - To: Dinsdale - Subject: Nudge nudge, wink, wink - Mime-Version: 1.0 - Content-Type: text/plain; charset="iso-8859-1" - Content-Transfer-Encoding: quoted-printable - - oh l=E0 l=E0, know what I mean, know what I mean? - """) - - def test_string_generator_reencodes_to_quopri_when_appropriate(self): - m = email.message_from_bytes(self.latin_bin_msg) - self.assertEqual(str(m), self.latin_bin_msg_as7bit) - - def test_decoded_generator_emits_unicode_body(self): - m = email.message_from_bytes(self.latin_bin_msg) - out = StringIO() - email.generator.DecodedGenerator(out).flatten(m) - #DecodedHeader output contains an extra blank line compared - #to the input message. RDM: not sure if this is a bug or not, - #but it is not specific to the 8bit->7bit conversion. - self.assertEqual(out.getvalue(), - self.latin_bin_msg.decode('latin-1')+'\n') - - def test_bytes_feedparser(self): - bfp = email.feedparser.BytesFeedParser() - for i in range(0, len(self.latin_bin_msg), 10): - bfp.feed(self.latin_bin_msg[i:i+10]) - m = bfp.close() - self.assertEqual(str(m), self.latin_bin_msg_as7bit) - - def test_crlf_flatten(self): - with openfile('msg_26.txt', 'rb') as fp: - text = fp.read() - msg = email.message_from_bytes(text) - s = BytesIO() - g = email.generator.BytesGenerator(s) - g.flatten(msg, linesep='\r\n') - self.assertEqual(s.getvalue(), text) - - def test_8bit_multipart(self): - # Issue 11605 - source = textwrap.dedent("""\ - Date: Fri, 18 Mar 2011 17:15:43 +0100 - To: foo@example.com - From: foodwatch-Newsletter <bar@example.com> - Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System - Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain> - MIME-Version: 1.0 - Content-Type: multipart/alternative; - boundary="b1_76a486bee62b0d200f33dc2ca08220ad" - - --b1_76a486bee62b0d200f33dc2ca08220ad - Content-Type: text/plain; charset="utf-8" - Content-Transfer-Encoding: 8bit - - Guten Tag, , - - mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die - Nachrichten aus Japan. - - - --b1_76a486bee62b0d200f33dc2ca08220ad - Content-Type: text/html; charset="utf-8" - Content-Transfer-Encoding: 8bit - - <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" - "http://www.w3.org/TR/html4/loose.dtd"> - <html lang="de"> - <head> - <title>foodwatch - Newsletter</title> - </head> - <body> - <p>mit großer Betroffenheit verfolgen auch wir im foodwatch-Team - die Nachrichten aus Japan.</p> - </body> - </html> - --b1_76a486bee62b0d200f33dc2ca08220ad-- - - """).encode('utf-8') - msg = email.message_from_bytes(source) - s = BytesIO() - g = email.generator.BytesGenerator(s) - g.flatten(msg) - self.assertEqual(s.getvalue(), source) - - def test_bytes_generator_b_encoding_linesep(self): - # Issue 14062: b encoding was tacking on an extra \n. - m = Message() - # This has enough non-ascii that it should always end up b encoded. - m['Subject'] = Header('žluťoučký kůň') - s = BytesIO() - g = email.generator.BytesGenerator(s) - g.flatten(m, linesep='\r\n') - self.assertEqual( - s.getvalue(), - b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') - - def test_generator_b_encoding_linesep(self): - # Since this broke in ByteGenerator, test Generator for completeness. - m = Message() - # This has enough non-ascii that it should always end up b encoded. - m['Subject'] = Header('žluťoučký kůň') - s = StringIO() - g = email.generator.Generator(s) - g.flatten(m, linesep='\r\n') - self.assertEqual( - s.getvalue(), - 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') - - maxDiff = None - - -class BaseTestBytesGeneratorIdempotent: - - maxDiff = None - - def _msgobj(self, filename): - with openfile(filename, 'rb') as fp: - data = fp.read() - data = self.normalize_linesep_regex.sub(self.blinesep, data) - msg = email.message_from_bytes(data) - return msg, data - - def _idempotent(self, msg, data, unixfrom=False): - b = BytesIO() - g = email.generator.BytesGenerator(b, maxheaderlen=0) - g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep) - self.assertByteStringsEqual(data, b.getvalue()) - - def assertByteStringsEqual(self, str1, str2): - # Not using self.blinesep here is intentional. This way the output - # is more useful when the failure results in mixed line endings. - self.assertListEqual(str1.split(b'\n'), str2.split(b'\n')) - - -class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent, - TestIdempotent): - linesep = '\n' - blinesep = b'\n' - normalize_linesep_regex = re.compile(br'\r\n') - - -class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent, - TestIdempotent): - linesep = '\r\n' - blinesep = b'\r\n' - normalize_linesep_regex = re.compile(br'(?<!\r)\n') - - -class TestBase64(unittest.TestCase): - def test_len(self): - eq = self.assertEqual - eq(base64mime.header_length('hello'), - len(base64mime.body_encode(b'hello', eol=''))) - for size in range(15): - if size == 0 : bsize = 0 - elif size <= 3 : bsize = 4 - elif size <= 6 : bsize = 8 - elif size <= 9 : bsize = 12 - elif size <= 12: bsize = 16 - else : bsize = 20 - eq(base64mime.header_length('x' * size), bsize) - - def test_decode(self): - eq = self.assertEqual - eq(base64mime.decode(''), b'') - eq(base64mime.decode('aGVsbG8='), b'hello') - - def test_encode(self): - eq = self.assertEqual - eq(base64mime.body_encode(b''), b'') - eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n') - # Test the binary flag - eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n') - # Test the maxlinelen arg - eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\ -eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg -eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg -eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg -eHh4eCB4eHh4IA== -""") - # Test the eol argument - eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'), - """\ -eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r -eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r -eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r -eHh4eCB4eHh4IA==\r -""") - - def test_header_encode(self): - eq = self.assertEqual - he = base64mime.header_encode - eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=') - eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=') - eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') - # Test the charset option - eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=') - eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') - - - -class TestQuopri(unittest.TestCase): - def setUp(self): - # Set of characters (as byte integers) that don't need to be encoded - # in headers. - self.hlit = list(chain( - range(ord('a'), ord('z') + 1), - range(ord('A'), ord('Z') + 1), - range(ord('0'), ord('9') + 1), - (c for c in b'!*+-/'))) - # Set of characters (as byte integers) that do need to be encoded in - # headers. - self.hnon = [c for c in range(256) if c not in self.hlit] - assert len(self.hlit) + len(self.hnon) == 256 - # Set of characters (as byte integers) that don't need to be encoded - # in bodies. - self.blit = list(range(ord(' '), ord('~') + 1)) - self.blit.append(ord('\t')) - self.blit.remove(ord('=')) - # Set of characters (as byte integers) that do need to be encoded in - # bodies. - self.bnon = [c for c in range(256) if c not in self.blit] - assert len(self.blit) + len(self.bnon) == 256 - - def test_quopri_header_check(self): - for c in self.hlit: - self.assertFalse(quoprimime.header_check(c), - 'Should not be header quopri encoded: %s' % chr(c)) - for c in self.hnon: - self.assertTrue(quoprimime.header_check(c), - 'Should be header quopri encoded: %s' % chr(c)) - - def test_quopri_body_check(self): - for c in self.blit: - self.assertFalse(quoprimime.body_check(c), - 'Should not be body quopri encoded: %s' % chr(c)) - for c in self.bnon: - self.assertTrue(quoprimime.body_check(c), - 'Should be body quopri encoded: %s' % chr(c)) - - def test_header_quopri_len(self): - eq = self.assertEqual - eq(quoprimime.header_length(b'hello'), 5) - # RFC 2047 chrome is not included in header_length(). - eq(len(quoprimime.header_encode(b'hello', charset='xxx')), - quoprimime.header_length(b'hello') + - # =?xxx?q?...?= means 10 extra characters - 10) - eq(quoprimime.header_length(b'h@e@l@l@o@'), 20) - # RFC 2047 chrome is not included in header_length(). - eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')), - quoprimime.header_length(b'h@e@l@l@o@') + - # =?xxx?q?...?= means 10 extra characters - 10) - for c in self.hlit: - eq(quoprimime.header_length(bytes([c])), 1, - 'expected length 1 for %r' % chr(c)) - for c in self.hnon: - # Space is special; it's encoded to _ - if c == ord(' '): - continue - eq(quoprimime.header_length(bytes([c])), 3, - 'expected length 3 for %r' % chr(c)) - eq(quoprimime.header_length(b' '), 1) - - def test_body_quopri_len(self): - eq = self.assertEqual - for c in self.blit: - eq(quoprimime.body_length(bytes([c])), 1) - for c in self.bnon: - eq(quoprimime.body_length(bytes([c])), 3) - - def test_quote_unquote_idempotent(self): - for x in range(256): - c = chr(x) - self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c) - - def _test_header_encode(self, header, expected_encoded_header, charset=None): - if charset is None: - encoded_header = quoprimime.header_encode(header) - else: - encoded_header = quoprimime.header_encode(header, charset) - self.assertEqual(encoded_header, expected_encoded_header) - - def test_header_encode_null(self): - self._test_header_encode(b'', '') - - def test_header_encode_one_word(self): - self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=') - - def test_header_encode_two_lines(self): - self._test_header_encode(b'hello\nworld', - '=?iso-8859-1?q?hello=0Aworld?=') - - def test_header_encode_non_ascii(self): - self._test_header_encode(b'hello\xc7there', - '=?iso-8859-1?q?hello=C7there?=') - - def test_header_encode_alt_charset(self): - self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=', - charset='iso-8859-2') - - def _test_header_decode(self, encoded_header, expected_decoded_header): - decoded_header = quoprimime.header_decode(encoded_header) - self.assertEqual(decoded_header, expected_decoded_header) - - def test_header_decode_null(self): - self._test_header_decode('', '') - - def test_header_decode_one_word(self): - self._test_header_decode('hello', 'hello') - - def test_header_decode_two_lines(self): - self._test_header_decode('hello=0Aworld', 'hello\nworld') - - def test_header_decode_non_ascii(self): - self._test_header_decode('hello=C7there', 'hello\xc7there') - - def _test_decode(self, encoded, expected_decoded, eol=None): - if eol is None: - decoded = quoprimime.decode(encoded) - else: - decoded = quoprimime.decode(encoded, eol=eol) - self.assertEqual(decoded, expected_decoded) - - def test_decode_null_word(self): - self._test_decode('', '') - - def test_decode_null_line_null_word(self): - self._test_decode('\r\n', '\n') - - def test_decode_one_word(self): - self._test_decode('hello', 'hello') - - def test_decode_one_word_eol(self): - self._test_decode('hello', 'hello', eol='X') - - def test_decode_one_line(self): - self._test_decode('hello\r\n', 'hello\n') - - def test_decode_one_line_lf(self): - self._test_decode('hello\n', 'hello\n') - - def test_decode_one_line_cr(self): - self._test_decode('hello\r', 'hello\n') - - def test_decode_one_line_nl(self): - self._test_decode('hello\n', 'helloX', eol='X') - - def test_decode_one_line_crnl(self): - self._test_decode('hello\r\n', 'helloX', eol='X') - - def test_decode_one_line_one_word(self): - self._test_decode('hello\r\nworld', 'hello\nworld') - - def test_decode_one_line_one_word_eol(self): - self._test_decode('hello\r\nworld', 'helloXworld', eol='X') - - def test_decode_two_lines(self): - self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n') - - def test_decode_two_lines_eol(self): - self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X') - - def test_decode_one_long_line(self): - self._test_decode('Spam' * 250, 'Spam' * 250) - - def test_decode_one_space(self): - self._test_decode(' ', '') - - def test_decode_multiple_spaces(self): - self._test_decode(' ' * 5, '') - - def test_decode_one_line_trailing_spaces(self): - self._test_decode('hello \r\n', 'hello\n') - - def test_decode_two_lines_trailing_spaces(self): - self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n') - - def test_decode_quoted_word(self): - self._test_decode('=22quoted=20words=22', '"quoted words"') - - def test_decode_uppercase_quoting(self): - self._test_decode('ab=CD=EF', 'ab\xcd\xef') - - def test_decode_lowercase_quoting(self): - self._test_decode('ab=cd=ef', 'ab\xcd\xef') - - def test_decode_soft_line_break(self): - self._test_decode('soft line=\r\nbreak', 'soft linebreak') - - def test_decode_false_quoting(self): - self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2') - - def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None): - kwargs = {} - if maxlinelen is None: - # Use body_encode's default. - maxlinelen = 76 - else: - kwargs['maxlinelen'] = maxlinelen - if eol is None: - # Use body_encode's default. - eol = '\n' - else: - kwargs['eol'] = eol - encoded_body = quoprimime.body_encode(body, **kwargs) - self.assertEqual(encoded_body, expected_encoded_body) - if eol == '\n' or eol == '\r\n': - # We know how to split the result back into lines, so maxlinelen - # can be checked. - for line in encoded_body.splitlines(): - self.assertLessEqual(len(line), maxlinelen) - - def test_encode_null(self): - self._test_encode('', '') - - def test_encode_null_lines(self): - self._test_encode('\n\n', '\n\n') - - def test_encode_one_line(self): - self._test_encode('hello\n', 'hello\n') - - def test_encode_one_line_crlf(self): - self._test_encode('hello\r\n', 'hello\n') - - def test_encode_one_line_eol(self): - self._test_encode('hello\n', 'hello\r\n', eol='\r\n') - - def test_encode_one_space(self): - self._test_encode(' ', '=20') - - def test_encode_one_line_one_space(self): - self._test_encode(' \n', '=20\n') - -# XXX: body_encode() expect strings, but uses ord(char) from these strings -# to index into a 256-entry list. For code points above 255, this will fail. -# Should there be a check for 8-bit only ord() values in body, or at least -# a comment about the expected input? - - def test_encode_two_lines_one_space(self): - self._test_encode(' \n \n', '=20\n=20\n') - - def test_encode_one_word_trailing_spaces(self): - self._test_encode('hello ', 'hello =20') - - def test_encode_one_line_trailing_spaces(self): - self._test_encode('hello \n', 'hello =20\n') - - def test_encode_one_word_trailing_tab(self): - self._test_encode('hello \t', 'hello =09') - - def test_encode_one_line_trailing_tab(self): - self._test_encode('hello \t\n', 'hello =09\n') - - def test_encode_trailing_space_before_maxlinelen(self): - self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6) - - def test_encode_trailing_space_at_maxlinelen(self): - self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5) - - def test_encode_trailing_space_beyond_maxlinelen(self): - self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4) - - def test_encode_whitespace_lines(self): - self._test_encode(' \n' * 5, '=20\n' * 5) - - def test_encode_quoted_equals(self): - self._test_encode('a = b', 'a =3D b') - - def test_encode_one_long_string(self): - self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25) - - def test_encode_one_long_line(self): - self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n') - - def test_encode_one_very_long_line(self): - self._test_encode('x' * 200 + '\n', - 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n') - - def test_encode_one_long_line(self): - self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n') - - def test_encode_shortest_maxlinelen(self): - self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4) - - def test_encode_maxlinelen_too_small(self): - self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3) - - def test_encode(self): - eq = self.assertEqual - eq(quoprimime.body_encode(''), '') - eq(quoprimime.body_encode('hello'), 'hello') - # Test the binary flag - eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld') - # Test the maxlinelen arg - eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\ -xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx= - xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx= -x xxxx xxxx xxxx xxxx=20""") - # Test the eol argument - eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), - """\ -xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r - xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r -x xxxx xxxx xxxx xxxx=20""") - eq(quoprimime.body_encode("""\ -one line - -two line"""), """\ -one line - -two line""") - - - -# Test the Charset class -class TestCharset(unittest.TestCase): - def tearDown(self): - from email import charset as CharsetModule - try: - del CharsetModule.CHARSETS['fake'] - except KeyError: - pass - - def test_codec_encodeable(self): - eq = self.assertEqual - # Make sure us-ascii = no Unicode conversion - c = Charset('us-ascii') - eq(c.header_encode('Hello World!'), 'Hello World!') - # Test 8-bit idempotency with us-ascii - s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa' - self.assertRaises(UnicodeError, c.header_encode, s) - c = Charset('utf-8') - eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=') - - def test_body_encode(self): - eq = self.assertEqual - # Try a charset with QP body encoding - c = Charset('iso-8859-1') - eq('hello w=F6rld', c.body_encode('hello w\xf6rld')) - # Try a charset with Base64 body encoding - c = Charset('utf-8') - eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world')) - # Try a charset with None body encoding - c = Charset('us-ascii') - eq('hello world', c.body_encode('hello world')) - # Try the convert argument, where input codec != output codec - c = Charset('euc-jp') - # With apologies to Tokio Kikuchi ;) - # XXX FIXME -## try: -## eq('\x1b$B5FCO;~IW\x1b(B', -## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7')) -## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', -## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False)) -## except LookupError: -## # We probably don't have the Japanese codecs installed -## pass - # Testing SF bug #625509, which we have to fake, since there are no - # built-in encodings where the header encoding is QP but the body - # encoding is not. - from email import charset as CharsetModule - CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8') - c = Charset('fake') - eq('hello world', c.body_encode('hello world')) - - def test_unicode_charset_name(self): - charset = Charset('us-ascii') - self.assertEqual(str(charset), 'us-ascii') - self.assertRaises(errors.CharsetError, Charset, 'asc\xffii') - - - -# Test multilingual MIME headers. -class TestHeader(TestEmailBase): - def test_simple(self): - eq = self.ndiffAssertEqual - h = Header('Hello World!') - eq(h.encode(), 'Hello World!') - h.append(' Goodbye World!') - eq(h.encode(), 'Hello World! Goodbye World!') - - def test_simple_surprise(self): - eq = self.ndiffAssertEqual - h = Header('Hello World!') - eq(h.encode(), 'Hello World!') - h.append('Goodbye World!') - eq(h.encode(), 'Hello World! Goodbye World!') - - def test_header_needs_no_decoding(self): - h = 'no decoding needed' - self.assertEqual(decode_header(h), [(h, None)]) - - def test_long(self): - h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.", - maxlinelen=76) - for l in h.encode(splitchars=' ').split('\n '): - self.assertTrue(len(l) <= 76) - - def test_multilingual(self): - eq = self.ndiffAssertEqual - g = Charset("iso-8859-1") - cz = Charset("iso-8859-2") - utf8 = Charset("utf-8") - g_head = (b'Die Mieter treten hier ein werden mit einem ' - b'Foerderband komfortabel den Korridor entlang, ' - b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, ' - b'gegen die rotierenden Klingen bef\xf6rdert. ') - cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' - b'd\xf9vtipu.. ') - utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' - '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' - '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' - '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' - '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' - 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' - 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' - '\u3044\u307e\u3059\u3002') - h = Header(g_head, g) - h.append(cz_head, cz) - h.append(utf8_head, utf8) - enc = h.encode(maxlinelen=76) - eq(enc, """\ -=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?= - =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?= - =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?= - =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?= - =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= - =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?= - =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?= - =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?= - =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?= - =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?= - =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""") - decoded = decode_header(enc) - eq(len(decoded), 3) - eq(decoded[0], (g_head, 'iso-8859-1')) - eq(decoded[1], (cz_head, 'iso-8859-2')) - eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8')) - ustr = str(h) - eq(ustr, - (b'Die Mieter treten hier ein werden mit einem Foerderband ' - b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen ' - b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen ' - b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod ' - b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81' - b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3' - b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3' - b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83' - b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e' - b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3' - b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82' - b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b' - b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git ' - b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt ' - b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81' - b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82' - ).decode('utf-8')) - # Test make_header() - newh = make_header(decode_header(enc)) - eq(newh, h) - - def test_empty_header_encode(self): - h = Header() - self.assertEqual(h.encode(), '') - - def test_header_ctor_default_args(self): - eq = self.ndiffAssertEqual - h = Header() - eq(h, '') - h.append('foo', Charset('iso-8859-1')) - eq(h, 'foo') - - def test_explicit_maxlinelen(self): - eq = self.ndiffAssertEqual - hstr = ('A very long line that must get split to something other ' - 'than at the 76th character boundary to test the non-default ' - 'behavior') - h = Header(hstr) - eq(h.encode(), '''\ -A very long line that must get split to something other than at the 76th - character boundary to test the non-default behavior''') - eq(str(h), hstr) - h = Header(hstr, header_name='Subject') - eq(h.encode(), '''\ -A very long line that must get split to something other than at the - 76th character boundary to test the non-default behavior''') - eq(str(h), hstr) - h = Header(hstr, maxlinelen=1024, header_name='Subject') - eq(h.encode(), hstr) - eq(str(h), hstr) - - def test_quopri_splittable(self): - eq = self.ndiffAssertEqual - h = Header(charset='iso-8859-1', maxlinelen=20) - x = 'xxxx ' * 20 - h.append(x) - s = h.encode() - eq(s, """\ -=?iso-8859-1?q?xxx?= - =?iso-8859-1?q?x_?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?_x?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?x_?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?_x?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?x_?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?_x?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?x_?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?_x?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?x_?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?_x?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?x_?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?_x?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?x_?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?_x?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?x_?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?_x?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?x_?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?_x?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?x_?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?xx?= - =?iso-8859-1?q?_?=""") - eq(x, str(make_header(decode_header(s)))) - h = Header(charset='iso-8859-1', maxlinelen=40) - h.append('xxxx ' * 20) - s = h.encode() - eq(s, """\ -=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?= - =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?= - =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?= - =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?= - =?iso-8859-1?q?_xxxx_xxxx_?=""") - eq(x, str(make_header(decode_header(s)))) - - def test_base64_splittable(self): - eq = self.ndiffAssertEqual - h = Header(charset='koi8-r', maxlinelen=20) - x = 'xxxx ' * 20 - h.append(x) - s = h.encode() - eq(s, """\ -=?koi8-r?b?eHh4?= - =?koi8-r?b?eCB4?= - =?koi8-r?b?eHh4?= - =?koi8-r?b?IHh4?= - =?koi8-r?b?eHgg?= - =?koi8-r?b?eHh4?= - =?koi8-r?b?eCB4?= - =?koi8-r?b?eHh4?= - =?koi8-r?b?IHh4?= - =?koi8-r?b?eHgg?= - =?koi8-r?b?eHh4?= - =?koi8-r?b?eCB4?= - =?koi8-r?b?eHh4?= - =?koi8-r?b?IHh4?= - =?koi8-r?b?eHgg?= - =?koi8-r?b?eHh4?= - =?koi8-r?b?eCB4?= - =?koi8-r?b?eHh4?= - =?koi8-r?b?IHh4?= - =?koi8-r?b?eHgg?= - =?koi8-r?b?eHh4?= - =?koi8-r?b?eCB4?= - =?koi8-r?b?eHh4?= - =?koi8-r?b?IHh4?= - =?koi8-r?b?eHgg?= - =?koi8-r?b?eHh4?= - =?koi8-r?b?eCB4?= - =?koi8-r?b?eHh4?= - =?koi8-r?b?IHh4?= - =?koi8-r?b?eHgg?= - =?koi8-r?b?eHh4?= - =?koi8-r?b?eCB4?= - =?koi8-r?b?eHh4?= - =?koi8-r?b?IA==?=""") - eq(x, str(make_header(decode_header(s)))) - h = Header(charset='koi8-r', maxlinelen=40) - h.append(x) - s = h.encode() - eq(s, """\ -=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?= - =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?= - =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?= - =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?= - =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?= - =?koi8-r?b?eHh4eCB4eHh4IA==?=""") - eq(x, str(make_header(decode_header(s)))) - - def test_us_ascii_header(self): - eq = self.assertEqual - s = 'hello' - x = decode_header(s) - eq(x, [('hello', None)]) - h = make_header(x) - eq(s, h.encode()) - - def test_string_charset(self): - eq = self.assertEqual - h = Header() - h.append('hello', 'iso-8859-1') - eq(h, 'hello') - -## def test_unicode_error(self): -## raises = self.assertRaises -## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii') -## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii') -## h = Header() -## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii') -## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii') -## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1') - - def test_utf8_shortest(self): - eq = self.assertEqual - h = Header('p\xf6stal', 'utf-8') - eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=') - h = Header('\u83ca\u5730\u6642\u592b', 'utf-8') - eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=') - - def test_bad_8bit_header(self): - raises = self.assertRaises - eq = self.assertEqual - x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' - raises(UnicodeError, Header, x) - h = Header() - raises(UnicodeError, h.append, x) - e = x.decode('utf-8', 'replace') - eq(str(Header(x, errors='replace')), e) - h.append(x, errors='replace') - eq(str(h), e) - - def test_escaped_8bit_header(self): - x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' - e = x.decode('ascii', 'surrogateescape') - h = Header(e, charset=email.charset.UNKNOWN8BIT) - self.assertEqual(str(h), - 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') - self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) - - def test_header_handles_binary_unknown8bit(self): - x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' - h = Header(x, charset=email.charset.UNKNOWN8BIT) - self.assertEqual(str(h), - 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') - self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) - - def test_make_header_handles_binary_unknown8bit(self): - x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' - h = Header(x, charset=email.charset.UNKNOWN8BIT) - h2 = email.header.make_header(email.header.decode_header(h)) - self.assertEqual(str(h2), - 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') - self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')]) - - def test_modify_returned_list_does_not_change_header(self): - h = Header('test') - chunks = email.header.decode_header(h) - chunks.append(('ascii', 'test2')) - self.assertEqual(str(h), 'test') - - def test_encoded_adjacent_nonencoded(self): - eq = self.assertEqual - h = Header() - h.append('hello', 'iso-8859-1') - h.append('world') - s = h.encode() - eq(s, '=?iso-8859-1?q?hello?= world') - h = make_header(decode_header(s)) - eq(h.encode(), s) - - def test_whitespace_eater(self): - eq = self.assertEqual - s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.' - parts = decode_header(s) - eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)]) - hdr = make_header(parts) - eq(hdr.encode(), - 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.') - - def test_broken_base64_header(self): - raises = self.assertRaises - s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?=' - raises(errors.HeaderParseError, decode_header, s) - - def test_shift_jis_charset(self): - h = Header('文', charset='shift_jis') - self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=') - - def test_flatten_header_with_no_value(self): - # Issue 11401 (regression from email 4.x) Note that the space after - # the header doesn't reflect the input, but this is also the way - # email 4.x behaved. At some point it would be nice to fix that. - msg = email.message_from_string("EmptyHeader:") - self.assertEqual(str(msg), "EmptyHeader: \n\n") - - def test_encode_preserves_leading_ws_on_value(self): - msg = Message() - msg['SomeHeader'] = ' value with leading ws' - self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n") - - - -# Test RFC 2231 header parameters (en/de)coding -class TestRFC2231(TestEmailBase): - def test_get_param(self): - eq = self.assertEqual - msg = self._msgobj('msg_29.txt') - eq(msg.get_param('title'), - ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) - eq(msg.get_param('title', unquote=False), - ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"')) - - def test_set_param(self): - eq = self.ndiffAssertEqual - msg = Message() - msg.set_param('title', 'This is even more ***fun*** isn\'t it!', - charset='us-ascii') - eq(msg.get_param('title'), - ('us-ascii', '', 'This is even more ***fun*** isn\'t it!')) - msg.set_param('title', 'This is even more ***fun*** isn\'t it!', - charset='us-ascii', language='en') - eq(msg.get_param('title'), - ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) - msg = self._msgobj('msg_01.txt') - msg.set_param('title', 'This is even more ***fun*** isn\'t it!', - charset='us-ascii', language='en') - eq(msg.as_string(maxheaderlen=78), """\ -Return-Path: <bbb@zzz.org> -Delivered-To: bbb@zzz.org -Received: by mail.zzz.org (Postfix, from userid 889) -\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Message-ID: <15090.61304.110929.45684@aaa.zzz.org> -From: bbb@ddd.com (John X. Doe) -To: bbb@zzz.org -Subject: This is a test message -Date: Fri, 4 May 2001 14:05:44 -0400 -Content-Type: text/plain; charset=us-ascii; - title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 - - -Hi, - -Do you like this message? - --Me -""") - - def test_set_param_requote(self): - msg = Message() - msg.set_param('title', 'foo') - self.assertEqual(msg['content-type'], 'text/plain; title="foo"') - msg.set_param('title', 'bar', requote=False) - self.assertEqual(msg['content-type'], 'text/plain; title=bar') - # tspecial is still quoted. - msg.set_param('title', "(bar)bell", requote=False) - self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"') - - def test_del_param(self): - eq = self.ndiffAssertEqual - msg = self._msgobj('msg_01.txt') - msg.set_param('foo', 'bar', charset='us-ascii', language='en') - msg.set_param('title', 'This is even more ***fun*** isn\'t it!', - charset='us-ascii', language='en') - msg.del_param('foo', header='Content-Type') - eq(msg.as_string(maxheaderlen=78), """\ -Return-Path: <bbb@zzz.org> -Delivered-To: bbb@zzz.org -Received: by mail.zzz.org (Postfix, from userid 889) -\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Message-ID: <15090.61304.110929.45684@aaa.zzz.org> -From: bbb@ddd.com (John X. Doe) -To: bbb@zzz.org -Subject: This is a test message -Date: Fri, 4 May 2001 14:05:44 -0400 -Content-Type: text/plain; charset="us-ascii"; - title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 - - -Hi, - -Do you like this message? - --Me -""") - - def test_rfc2231_get_content_charset(self): - eq = self.assertEqual - msg = self._msgobj('msg_32.txt') - eq(msg.get_content_charset(), 'us-ascii') - - def test_rfc2231_parse_rfc_quoting(self): - m = textwrap.dedent('''\ - Content-Disposition: inline; - \tfilename*0*=''This%20is%20even%20more%20; - \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20; - \tfilename*2="is it not.pdf" - - ''') - msg = email.message_from_string(m) - self.assertEqual(msg.get_filename(), - 'This is even more ***fun*** is it not.pdf') - self.assertEqual(m, msg.as_string()) - - def test_rfc2231_parse_extra_quoting(self): - m = textwrap.dedent('''\ - Content-Disposition: inline; - \tfilename*0*="''This%20is%20even%20more%20"; - \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; - \tfilename*2="is it not.pdf" - - ''') - msg = email.message_from_string(m) - self.assertEqual(msg.get_filename(), - 'This is even more ***fun*** is it not.pdf') - self.assertEqual(m, msg.as_string()) - - def test_rfc2231_no_language_or_charset(self): - m = '''\ -Content-Transfer-Encoding: 8bit -Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm" -Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm - -''' - msg = email.message_from_string(m) - param = msg.get_param('NAME') - self.assertFalse(isinstance(param, tuple)) - self.assertEqual( - param, - 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm') - - def test_rfc2231_no_language_or_charset_in_filename(self): - m = '''\ -Content-Disposition: inline; -\tfilename*0*="''This%20is%20even%20more%20"; -\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; -\tfilename*2="is it not.pdf" - -''' - msg = email.message_from_string(m) - self.assertEqual(msg.get_filename(), - 'This is even more ***fun*** is it not.pdf') - - def test_rfc2231_no_language_or_charset_in_filename_encoded(self): - m = '''\ -Content-Disposition: inline; -\tfilename*0*="''This%20is%20even%20more%20"; -\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; -\tfilename*2="is it not.pdf" - -''' - msg = email.message_from_string(m) - self.assertEqual(msg.get_filename(), - 'This is even more ***fun*** is it not.pdf') - - def test_rfc2231_partly_encoded(self): - m = '''\ -Content-Disposition: inline; -\tfilename*0="''This%20is%20even%20more%20"; -\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; -\tfilename*2="is it not.pdf" - -''' - msg = email.message_from_string(m) - self.assertEqual( - msg.get_filename(), - 'This%20is%20even%20more%20***fun*** is it not.pdf') - - def test_rfc2231_partly_nonencoded(self): - m = '''\ -Content-Disposition: inline; -\tfilename*0="This%20is%20even%20more%20"; -\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20"; -\tfilename*2="is it not.pdf" - -''' - msg = email.message_from_string(m) - self.assertEqual( - msg.get_filename(), - 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf') - - def test_rfc2231_no_language_or_charset_in_boundary(self): - m = '''\ -Content-Type: multipart/alternative; -\tboundary*0*="''This%20is%20even%20more%20"; -\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20"; -\tboundary*2="is it not.pdf" - -''' - msg = email.message_from_string(m) - self.assertEqual(msg.get_boundary(), - 'This is even more ***fun*** is it not.pdf') - - def test_rfc2231_no_language_or_charset_in_charset(self): - # This is a nonsensical charset value, but tests the code anyway - m = '''\ -Content-Type: text/plain; -\tcharset*0*="This%20is%20even%20more%20"; -\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20"; -\tcharset*2="is it not.pdf" - -''' - msg = email.message_from_string(m) - self.assertEqual(msg.get_content_charset(), - 'this is even more ***fun*** is it not.pdf') - - def test_rfc2231_bad_encoding_in_filename(self): - m = '''\ -Content-Disposition: inline; -\tfilename*0*="bogus'xx'This%20is%20even%20more%20"; -\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; -\tfilename*2="is it not.pdf" - -''' - msg = email.message_from_string(m) - self.assertEqual(msg.get_filename(), - 'This is even more ***fun*** is it not.pdf') - - def test_rfc2231_bad_encoding_in_charset(self): - m = """\ -Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D - -""" - msg = email.message_from_string(m) - # This should return None because non-ascii characters in the charset - # are not allowed. - self.assertEqual(msg.get_content_charset(), None) - - def test_rfc2231_bad_character_in_charset(self): - m = """\ -Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D - -""" - msg = email.message_from_string(m) - # This should return None because non-ascii characters in the charset - # are not allowed. - self.assertEqual(msg.get_content_charset(), None) - - def test_rfc2231_bad_character_in_filename(self): - m = '''\ -Content-Disposition: inline; -\tfilename*0*="ascii'xx'This%20is%20even%20more%20"; -\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; -\tfilename*2*="is it not.pdf%E2" - -''' - msg = email.message_from_string(m) - self.assertEqual(msg.get_filename(), - 'This is even more ***fun*** is it not.pdf\ufffd') - - def test_rfc2231_unknown_encoding(self): - m = """\ -Content-Transfer-Encoding: 8bit -Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt - -""" - msg = email.message_from_string(m) - self.assertEqual(msg.get_filename(), 'myfile.txt') - - def test_rfc2231_single_tick_in_filename_extended(self): - eq = self.assertEqual - m = """\ -Content-Type: application/x-foo; -\tname*0*=\"Frank's\"; name*1*=\" Document\" - -""" - msg = email.message_from_string(m) - charset, language, s = msg.get_param('name') - eq(charset, None) - eq(language, None) - eq(s, "Frank's Document") - - def test_rfc2231_single_tick_in_filename(self): - m = """\ -Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\" - -""" - msg = email.message_from_string(m) - param = msg.get_param('name') - self.assertFalse(isinstance(param, tuple)) - self.assertEqual(param, "Frank's Document") - - def test_rfc2231_tick_attack_extended(self): - eq = self.assertEqual - m = """\ -Content-Type: application/x-foo; -\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\" - -""" - msg = email.message_from_string(m) - charset, language, s = msg.get_param('name') - eq(charset, 'us-ascii') - eq(language, 'en-us') - eq(s, "Frank's Document") - - def test_rfc2231_tick_attack(self): - m = """\ -Content-Type: application/x-foo; -\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\" - -""" - msg = email.message_from_string(m) - param = msg.get_param('name') - self.assertFalse(isinstance(param, tuple)) - self.assertEqual(param, "us-ascii'en-us'Frank's Document") - - def test_rfc2231_no_extended_values(self): - eq = self.assertEqual - m = """\ -Content-Type: application/x-foo; name=\"Frank's Document\" - -""" - msg = email.message_from_string(m) - eq(msg.get_param('name'), "Frank's Document") - - def test_rfc2231_encoded_then_unencoded_segments(self): - eq = self.assertEqual - m = """\ -Content-Type: application/x-foo; -\tname*0*=\"us-ascii'en-us'My\"; -\tname*1=\" Document\"; -\tname*2*=\" For You\" - -""" - msg = email.message_from_string(m) - charset, language, s = msg.get_param('name') - eq(charset, 'us-ascii') - eq(language, 'en-us') - eq(s, 'My Document For You') - - def test_rfc2231_unencoded_then_encoded_segments(self): - eq = self.assertEqual - m = """\ -Content-Type: application/x-foo; -\tname*0=\"us-ascii'en-us'My\"; -\tname*1*=\" Document\"; -\tname*2*=\" For You\" - -""" - msg = email.message_from_string(m) - charset, language, s = msg.get_param('name') - eq(charset, 'us-ascii') - eq(language, 'en-us') - eq(s, 'My Document For You') - - - -# Tests to ensure that signed parts of an email are completely preserved, as -# required by RFC1847 section 2.1. Note that these are incomplete, because the -# email package does not currently always preserve the body. See issue 1670765. -class TestSigned(TestEmailBase): - - def _msg_and_obj(self, filename): - with openfile(findfile(filename)) as fp: - original = fp.read() - msg = email.message_from_string(original) - return original, msg - - def _signed_parts_eq(self, original, result): - # Extract the first mime part of each message - import re - repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M) - inpart = repart.search(original).group(2) - outpart = repart.search(result).group(2) - self.assertEqual(outpart, inpart) - - def test_long_headers_as_string(self): - original, msg = self._msg_and_obj('msg_45.txt') - result = msg.as_string() - self._signed_parts_eq(original, result) - - def test_long_headers_as_string_maxheaderlen(self): - original, msg = self._msg_and_obj('msg_45.txt') - result = msg.as_string(maxheaderlen=60) - self._signed_parts_eq(original, result) - - def test_long_headers_flatten(self): - original, msg = self._msg_and_obj('msg_45.txt') - fp = StringIO() - Generator(fp).flatten(msg) - result = fp.getvalue() - self._signed_parts_eq(original, result) - - - -def _testclasses(): - mod = sys.modules[__name__] - return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')] - - -def suite(): - suite = unittest.TestSuite() - for testclass in _testclasses(): - suite.addTest(unittest.makeSuite(testclass)) - return suite - - -def test_main(): - for testclass in _testclasses(): - run_unittest(testclass) - - - -if __name__ == '__main__': - unittest.main(defaultTest='suite') diff --git a/Lib/email/test/test_email_codecs.py b/Lib/email/test/test_email_codecs.py deleted file mode 100644 index ca85f57..0000000 --- a/Lib/email/test/test_email_codecs.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (C) 2002-2006 Python Software Foundation -# Contact: email-sig@python.org -# email package unit tests for (optional) Asian codecs - -import unittest -from test.support import run_unittest - -from email.test.test_email import TestEmailBase -from email.charset import Charset -from email.header import Header, decode_header -from email.message import Message - -# We're compatible with Python 2.3, but it doesn't have the built-in Asian -# codecs, so we have to skip all these tests. -try: - str(b'foo', 'euc-jp') -except LookupError: - raise unittest.SkipTest - - - -class TestEmailAsianCodecs(TestEmailBase): - def test_japanese_codecs(self): - eq = self.ndiffAssertEqual - jcode = "euc-jp" - gcode = "iso-8859-1" - j = Charset(jcode) - g = Charset(gcode) - h = Header("Hello World!") - jhello = str(b'\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc' - b'\xa5\xeb\xa5\xc9\xa1\xaa', jcode) - ghello = str(b'Gr\xfc\xdf Gott!', gcode) - h.append(jhello, j) - h.append(ghello, g) - # BAW: This used to -- and maybe should -- fold the two iso-8859-1 - # chunks into a single encoded word. However it doesn't violate the - # standard to have them as two encoded chunks and maybe it's - # reasonable <wink> for each .append() call to result in a separate - # encoded word. - eq(h.encode(), """\ -Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?= - =?iso-8859-1?q?Gr=FC=DF_Gott!?=""") - eq(decode_header(h.encode()), - [(b'Hello World!', None), - (b'\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'), - (b'Gr\xfc\xdf Gott!', gcode)]) - subject_bytes = (b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5' - b'\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2' - b'\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3' - b'\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9') - subject = str(subject_bytes, jcode) - h = Header(subject, j, header_name="Subject") - # test a very long header - enc = h.encode() - # TK: splitting point may differ by codec design and/or Header encoding - eq(enc , """\ -=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?= - =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""") - # TK: full decode comparison - eq(str(h).encode(jcode), subject_bytes) - - def test_payload_encoding_utf8(self): - jhello = str(b'\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc' - b'\xa5\xeb\xa5\xc9\xa1\xaa', 'euc-jp') - msg = Message() - msg.set_payload(jhello, 'utf-8') - ustr = msg.get_payload(decode=True).decode(msg.get_content_charset()) - self.assertEqual(jhello, ustr) - - def test_payload_encoding(self): - jcode = 'euc-jp' - jhello = str(b'\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc' - b'\xa5\xeb\xa5\xc9\xa1\xaa', jcode) - msg = Message() - msg.set_payload(jhello, jcode) - ustr = msg.get_payload(decode=True).decode(msg.get_content_charset()) - self.assertEqual(jhello, ustr) - - - -def suite(): - suite = unittest.TestSuite() - suite.addTest(unittest.makeSuite(TestEmailAsianCodecs)) - return suite - - -def test_main(): - run_unittest(TestEmailAsianCodecs) - - - -if __name__ == '__main__': - unittest.main(defaultTest='suite') diff --git a/Lib/email/test/test_email_torture.py b/Lib/email/test/test_email_torture.py deleted file mode 100644 index 544b1bb..0000000 --- a/Lib/email/test/test_email_torture.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright (C) 2002-2004 Python Software Foundation -# -# A torture test of the email package. This should not be run as part of the -# standard Python test suite since it requires several meg of email messages -# collected in the wild. These source messages are not checked into the -# Python distro, but are available as part of the standalone email package at -# http://sf.net/projects/mimelib - -import sys -import os -import unittest -from io import StringIO -from types import ListType - -from email.test.test_email import TestEmailBase -from test.support import TestSkipped, run_unittest - -import email -from email import __file__ as testfile -from email.iterators import _structure - -def openfile(filename): - from os.path import join, dirname, abspath - path = abspath(join(dirname(testfile), os.pardir, 'moredata', filename)) - return open(path, 'r') - -# Prevent this test from running in the Python distro -try: - openfile('crispin-torture.txt') -except IOError: - raise TestSkipped - - - -class TortureBase(TestEmailBase): - def _msgobj(self, filename): - fp = openfile(filename) - try: - msg = email.message_from_file(fp) - finally: - fp.close() - return msg - - - -class TestCrispinTorture(TortureBase): - # Mark Crispin's torture test from the SquirrelMail project - def test_mondo_message(self): - eq = self.assertEqual - neq = self.ndiffAssertEqual - msg = self._msgobj('crispin-torture.txt') - payload = msg.get_payload() - eq(type(payload), ListType) - eq(len(payload), 12) - eq(msg.preamble, None) - eq(msg.epilogue, '\n') - # Probably the best way to verify the message is parsed correctly is to - # dump its structure and compare it against the known structure. - fp = StringIO() - _structure(msg, fp=fp) - neq(fp.getvalue(), """\ -multipart/mixed - text/plain - message/rfc822 - multipart/alternative - text/plain - multipart/mixed - text/richtext - application/andrew-inset - message/rfc822 - audio/basic - audio/basic - image/pbm - message/rfc822 - multipart/mixed - multipart/mixed - text/plain - audio/x-sun - multipart/mixed - image/gif - image/gif - application/x-be2 - application/atomicmail - audio/x-sun - message/rfc822 - multipart/mixed - text/plain - image/pgm - text/plain - message/rfc822 - multipart/mixed - text/plain - image/pbm - message/rfc822 - application/postscript - image/gif - message/rfc822 - multipart/mixed - audio/basic - audio/basic - message/rfc822 - multipart/mixed - application/postscript - text/plain - message/rfc822 - multipart/mixed - text/plain - multipart/parallel - image/gif - audio/basic - application/atomicmail - message/rfc822 - audio/x-sun -""") - - -def _testclasses(): - mod = sys.modules[__name__] - return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')] - - -def suite(): - suite = unittest.TestSuite() - for testclass in _testclasses(): - suite.addTest(unittest.makeSuite(testclass)) - return suite - - -def test_main(): - for testclass in _testclasses(): - run_unittest(testclass) - - - -if __name__ == '__main__': - unittest.main(defaultTest='suite') diff --git a/Lib/email/utils.py b/Lib/email/utils.py index ac4da37..6b6d7f4 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -11,12 +11,14 @@ __all__ = [ 'encode_rfc2231', 'formataddr', 'formatdate', + 'format_datetime', 'getaddresses', 'make_msgid', 'mktime_tz', 'parseaddr', 'parsedate', 'parsedate_tz', + 'parsedate_to_datetime', 'unquote', ] @@ -26,6 +28,7 @@ import time import base64 import random import socket +import datetime import urllib.parse import warnings from io import StringIO @@ -34,14 +37,13 @@ from email._parseaddr import quote from email._parseaddr import AddressList as _AddressList from email._parseaddr import mktime_tz -# We need wormarounds for bugs in these methods in older Pythons (see below) -from email._parseaddr import parsedate as _parsedate -from email._parseaddr import parsedate_tz as _parsedate_tz +from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz from quopri import decodestring as _qdecode # Intrapackage imports from email.encoders import _bencode, _qencode +from email.charset import Charset COMMASPACE = ', ' EMPTYSTRING = '' @@ -50,27 +52,53 @@ CRLF = '\r\n' TICK = "'" specialsre = re.compile(r'[][\\()<>@,:;".]') -escapesre = re.compile(r'[][\\()"]') +escapesre = re.compile(r'[\\"]') +# How to figure out if we are processing strings that come from a byte +# source with undecodable characters. +_has_surrogates = re.compile( + '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search + +# How to deal with a string containing bytes before handing it to the +# application through the 'normal' interface. +def _sanitize(string): + # Turn any escaped bytes into unicode 'unknown' char. + original_bytes = string.encode('ascii', 'surrogateescape') + return original_bytes.decode('ascii', 'replace') # Helpers -def formataddr(pair): +def formataddr(pair, charset='utf-8'): """The inverse of parseaddr(), this takes a 2-tuple of the form (realname, email_address) and returns the string value suitable for an RFC 2822 From, To or Cc header. If the first element of pair is false, then the second element is returned unmodified. + + Optional charset if given is the character set that is used to encode + realname in case realname is not ASCII safe. Can be an instance of str or + a Charset-like object which has a header_encode method. Default is + 'utf-8'. """ name, address = pair + # The address MUST (per RFC) be ascii, so throw a UnicodeError if it isn't. + address.encode('ascii') if name: - quotes = '' - if specialsre.search(name): - quotes = '"' - name = escapesre.sub(r'\\\g<0>', name) - return '%s%s%s <%s>' % (quotes, name, quotes, address) + try: + name.encode('ascii') + except UnicodeEncodeError: + if isinstance(charset, str): + charset = Charset(charset) + encoded_name = charset.header_encode(name) + return "%s <%s>" % (encoded_name, address) + else: + quotes = '' + if specialsre.search(name): + quotes = '"' + name = escapesre.sub(r'\\\g<0>', name) + return '%s%s%s <%s>' % (quotes, name, quotes, address) return address @@ -94,6 +122,14 @@ ecre = re.compile(r''' ''', re.VERBOSE | re.IGNORECASE) +def _format_timetuple_and_zone(timetuple, zone): + return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( + ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]], + timetuple[2], + ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1], + timetuple[0], timetuple[3], timetuple[4], timetuple[5], + zone) def formatdate(timeval=None, localtime=False, usegmt=False): """Returns a date string as specified by RFC 2822, e.g.: @@ -138,14 +174,25 @@ def formatdate(timeval=None, localtime=False, usegmt=False): zone = 'GMT' else: zone = '-0000' - return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( - ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]], - now[2], - ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1], - now[0], now[3], now[4], now[5], - zone) + return _format_timetuple_and_zone(now, zone) +def format_datetime(dt, usegmt=False): + """Turn a datetime into a date string as specified in RFC 2822. + + If usegmt is True, dt must be an aware datetime with an offset of zero. In + this case 'GMT' will be rendered instead of the normal +0000 required by + RFC2822. This is to support HTTP headers involving date stamps. + """ + now = dt.timetuple() + if usegmt: + if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc: + raise ValueError("usegmt option requires a UTC datetime") + zone = 'GMT' + elif dt.tzinfo is None: + zone = '-0000' + else: + zone = dt.strftime("%z") + return _format_timetuple_and_zone(now, zone) def make_msgid(idstring=None, domain=None): @@ -172,20 +219,12 @@ def make_msgid(idstring=None, domain=None): return msgid - -# These functions are in the standalone mimelib version only because they've -# subsequently been fixed in the latest Python versions. We use this to worm -# around broken older Pythons. -def parsedate(data): - if not data: - return None - return _parsedate(data) - - -def parsedate_tz(data): - if not data: - return None - return _parsedate_tz(data) +def parsedate_to_datetime(data): + *dtuple, tz = _parsedate_tz(data) + if tz is None: + return datetime.datetime(*dtuple[:6]) + return datetime.datetime(*dtuple[:6], + tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) def parseaddr(addr): @@ -304,3 +343,49 @@ def collapse_rfc2231_value(value, errors='replace', except LookupError: # charset is not a known codec. return unquote(text) + + +# +# datetime doesn't provide a localtime function yet, so provide one. Code +# adapted from the patch in issue 9527. This may not be perfect, but it is +# better than not having it. +# + +def localtime(dt=None, isdst=-1): + """Return local time as an aware datetime object. + + If called without arguments, return current time. Otherwise *dt* + argument should be a datetime instance, and it is converted to the + local time zone according to the system time zone database. If *dt* is + naive (that is, dt.tzinfo is None), it is assumed to be in local time. + In this case, a positive or zero value for *isdst* causes localtime to + presume initially that summer time (for example, Daylight Saving Time) + is or is not (respectively) in effect for the specified time. A + negative value for *isdst* causes the localtime() function to attempt + to divine whether summer time is in effect for the specified time. + + """ + if dt is None: + return datetime.datetime.now(datetime.timezone.utc).astimezone() + if dt.tzinfo is not None: + return dt.astimezone() + # We have a naive datetime. Convert to a (localtime) timetuple and pass to + # system mktime together with the isdst hint. System mktime will return + # seconds since epoch. + tm = dt.timetuple()[:-1] + (isdst,) + seconds = time.mktime(tm) + localtm = time.localtime(seconds) + try: + delta = datetime.timedelta(seconds=localtm.tm_gmtoff) + tz = datetime.timezone(delta, localtm.tm_zone) + except AttributeError: + # Compute UTC offset and compare with the value implied by tm_isdst. + # If the values match, use the zone name implied by tm_isdst. + delta = dt - datetime.datetime(*time.gmtime(seconds)[:6]) + dst = time.daylight and localtm.tm_isdst > 0 + gmtoff = -(time.altzone if dst else time.timezone) + if delta == datetime.timedelta(seconds=gmtoff): + tz = datetime.timezone(delta, time.tzname[dst]) + else: + tz = datetime.timezone(delta) + return dt.replace(tzinfo=tz) |