summaryrefslogtreecommitdiffstats
path: root/Lib/email
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/email')
-rw-r--r--Lib/email/_encoded_words.py2
-rw-r--r--Lib/email/_header_value_parser.py79
-rw-r--r--Lib/email/contentmanager.py249
-rw-r--r--Lib/email/encoders.py17
-rw-r--r--Lib/email/feedparser.py85
-rw-r--r--Lib/email/generator.py13
-rw-r--r--Lib/email/header.py2
-rw-r--r--Lib/email/headerregistry.py3
-rw-r--r--Lib/email/iterators.py6
-rw-r--r--Lib/email/message.py283
-rw-r--r--Lib/email/mime/nonmultipart.py2
-rw-r--r--Lib/email/mime/text.py1
-rw-r--r--Lib/email/parser.py11
-rw-r--r--Lib/email/policy.py13
-rw-r--r--Lib/email/quoprimime.py1
-rw-r--r--Lib/email/utils.py69
16 files changed, 679 insertions, 157 deletions
diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py
index 9e0cc75..5eaab36 100644
--- a/Lib/email/_encoded_words.py
+++ b/Lib/email/_encoded_words.py
@@ -152,7 +152,7 @@ def decode(ew):
then from the resulting bytes into unicode using the specified charset. If
the cte-decoded string does not successfully decode using the specified
character set, a defect is added to the defects list and the unknown octets
- are replaced by the unicode 'unknown' character \uFDFF.
+ are replaced by the unicode 'unknown' character \\uFDFF.
The specified charset and language are returned. The default for language,
which is rarely if ever encountered, is the empty string.
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 0369e01..a9bdf44 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -70,7 +70,8 @@ XXX: provide complete list of token types.
import re
import urllib # For urllib.parse.unquote
from string import hexdigits
-from collections import namedtuple, OrderedDict
+from collections import OrderedDict
+from operator import itemgetter
from email import _encoded_words as _ew
from email import errors
from email import utils
@@ -368,8 +369,7 @@ class TokenList(list):
yield (indent + ' !! invalid element in token '
'list: {!r}'.format(token))
else:
- for line in token._pp(indent+' '):
- yield line
+ yield from token._pp(indent+' ')
if self.defects:
extra = ' Defects: {}'.format(self.defects)
else:
@@ -1099,15 +1099,34 @@ class MimeParameters(TokenList):
params[name] = []
params[name].append((token.section_number, token))
for name, parts in params.items():
- parts = sorted(parts)
- # XXX: there might be more recovery we could do here if, for
- # example, this is really a case of a duplicate attribute name.
+ parts = sorted(parts, key=itemgetter(0))
+ first_param = parts[0][1]
+ charset = first_param.charset
+ # Our arbitrary error recovery is to ignore duplicate parameters,
+ # to use appearance order if there are duplicate rfc 2231 parts,
+ # and to ignore gaps. This mimics the error recovery of get_param.
+ if not first_param.extended and len(parts) > 1:
+ if parts[1][0] == 0:
+ parts[1][1].defects.append(errors.InvalidHeaderDefect(
+ 'duplicate parameter name; duplicate(s) ignored'))
+ parts = parts[:1]
+ # Else assume the *0* was missing...note that this is different
+ # from get_param, but we registered a defect for this earlier.
value_parts = []
- charset = parts[0][1].charset
- for i, (section_number, param) in enumerate(parts):
+ i = 0
+ for section_number, param in parts:
if section_number != i:
- param.defects.append(errors.InvalidHeaderDefect(
- "inconsistent multipart parameter numbering"))
+ # We could get fancier here and look for a complete
+ # duplicate extended parameter and ignore the second one
+ # seen. But we're not doing that. The old code didn't.
+ if not param.extended:
+ param.defects.append(errors.InvalidHeaderDefect(
+ 'duplicate parameter name; duplicate ignored'))
+ continue
+ else:
+ param.defects.append(errors.InvalidHeaderDefect(
+ "inconsistent RFC2231 parameter numbering"))
+ i += 1
value = param.param_value
if param.extended:
try:
@@ -1315,24 +1334,22 @@ RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
# Parser
#
-"""Parse strings according to RFC822/2047/2822/5322 rules.
-
-This is a stateless parser. Each get_XXX function accepts a string and
-returns either a Terminal or a TokenList representing the RFC object named
-by the method and a string containing the remaining unparsed characters
-from the input. Thus a parser method consumes the next syntactic construct
-of a given type and returns a token representing the construct plus the
-unparsed remainder of the input string.
-
-For example, if the first element of a structured header is a 'phrase',
-then:
-
- phrase, value = get_phrase(value)
-
-returns the complete phrase from the start of the string value, plus any
-characters left in the string after the phrase is removed.
-
-"""
+# Parse strings according to RFC822/2047/2822/5322 rules.
+#
+# This is a stateless parser. Each get_XXX function accepts a string and
+# returns either a Terminal or a TokenList representing the RFC object named
+# by the method and a string containing the remaining unparsed characters
+# from the input. Thus a parser method consumes the next syntactic construct
+# of a given type and returns a token representing the construct plus the
+# unparsed remainder of the input string.
+#
+# For example, if the first element of a structured header is a 'phrase',
+# then:
+#
+# phrase, value = get_phrase(value)
+#
+# returns the complete phrase from the start of the string value, plus any
+# characters left in the string after the phrase is removed.
_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split
_non_atom_end_matcher = re.compile(r"[^{}]+".format(
@@ -2900,7 +2917,7 @@ def parse_content_disposition_header(value):
try:
token, value = get_token(value)
except errors.HeaderParseError:
- ctype.defects.append(errors.InvalidHeaderDefect(
+ disp_header.defects.append(errors.InvalidHeaderDefect(
"Expected content disposition but found {!r}".format(value)))
_find_mime_parameters(disp_header, value)
return disp_header
@@ -2931,8 +2948,8 @@ def parse_content_transfer_encoding_header(value):
try:
token, value = get_token(value)
except errors.HeaderParseError:
- ctype.defects.append(errors.InvalidHeaderDefect(
- "Expected content trnasfer encoding but found {!r}".format(value)))
+ cte_header.defects.append(errors.InvalidHeaderDefect(
+ "Expected content transfer encoding but found {!r}".format(value)))
else:
cte_header.append(token)
cte_header.cte = token.value.strip().lower()
diff --git a/Lib/email/contentmanager.py b/Lib/email/contentmanager.py
new file mode 100644
index 0000000..d363652
--- /dev/null
+++ b/Lib/email/contentmanager.py
@@ -0,0 +1,249 @@
+import binascii
+import email.charset
+import email.message
+import email.errors
+from email import quoprimime
+
+class ContentManager:
+
+ def __init__(self):
+ self.get_handlers = {}
+ self.set_handlers = {}
+
+ def add_get_handler(self, key, handler):
+ self.get_handlers[key] = handler
+
+ def get_content(self, msg, *args, **kw):
+ content_type = msg.get_content_type()
+ if content_type in self.get_handlers:
+ return self.get_handlers[content_type](msg, *args, **kw)
+ maintype = msg.get_content_maintype()
+ if maintype in self.get_handlers:
+ return self.get_handlers[maintype](msg, *args, **kw)
+ if '' in self.get_handlers:
+ return self.get_handlers[''](msg, *args, **kw)
+ raise KeyError(content_type)
+
+ def add_set_handler(self, typekey, handler):
+ self.set_handlers[typekey] = handler
+
+ def set_content(self, msg, obj, *args, **kw):
+ if msg.get_content_maintype() == 'multipart':
+ # XXX: is this error a good idea or not? We can remove it later,
+ # but we can't add it later, so do it for now.
+ raise TypeError("set_content not valid on multipart")
+ handler = self._find_set_handler(msg, obj)
+ msg.clear_content()
+ handler(msg, obj, *args, **kw)
+
+ def _find_set_handler(self, msg, obj):
+ full_path_for_error = None
+ for typ in type(obj).__mro__:
+ if typ in self.set_handlers:
+ return self.set_handlers[typ]
+ qname = typ.__qualname__
+ modname = getattr(typ, '__module__', '')
+ full_path = '.'.join((modname, qname)) if modname else qname
+ if full_path_for_error is None:
+ full_path_for_error = full_path
+ if full_path in self.set_handlers:
+ return self.set_handlers[full_path]
+ if qname in self.set_handlers:
+ return self.set_handlers[qname]
+ name = typ.__name__
+ if name in self.set_handlers:
+ return self.set_handlers[name]
+ if None in self.set_handlers:
+ return self.set_handlers[None]
+ raise KeyError(full_path_for_error)
+
+
+raw_data_manager = ContentManager()
+
+
+def get_text_content(msg, errors='replace'):
+ content = msg.get_payload(decode=True)
+ charset = msg.get_param('charset', 'ASCII')
+ return content.decode(charset, errors=errors)
+raw_data_manager.add_get_handler('text', get_text_content)
+
+
+def get_non_text_content(msg):
+ return msg.get_payload(decode=True)
+for maintype in 'audio image video application'.split():
+ raw_data_manager.add_get_handler(maintype, get_non_text_content)
+
+
+def get_message_content(msg):
+ return msg.get_payload(0)
+for subtype in 'rfc822 external-body'.split():
+ raw_data_manager.add_get_handler('message/'+subtype, get_message_content)
+
+
+def get_and_fixup_unknown_message_content(msg):
+ # If we don't understand a message subtype, we are supposed to treat it as
+ # if it were application/octet-stream, per
+ # tools.ietf.org/html/rfc2046#section-5.2.4. Feedparser doesn't do that,
+ # so do our best to fix things up. Note that it is *not* appropriate to
+ # model message/partial content as Message objects, so they are handled
+ # here as well. (How to reassemble them is out of scope for this comment :)
+ return bytes(msg.get_payload(0))
+raw_data_manager.add_get_handler('message',
+ get_and_fixup_unknown_message_content)
+
+
+def _prepare_set(msg, maintype, subtype, headers):
+ msg['Content-Type'] = '/'.join((maintype, subtype))
+ if headers:
+ if not hasattr(headers[0], 'name'):
+ mp = msg.policy
+ headers = [mp.header_factory(*mp.header_source_parse([header]))
+ for header in headers]
+ try:
+ for header in headers:
+ if header.defects:
+ raise header.defects[0]
+ msg[header.name] = header
+ except email.errors.HeaderDefect as exc:
+ raise ValueError("Invalid header: {}".format(
+ header.fold(policy=msg.policy))) from exc
+
+
+def _finalize_set(msg, disposition, filename, cid, params):
+ if disposition is None and filename is not None:
+ disposition = 'attachment'
+ if disposition is not None:
+ msg['Content-Disposition'] = disposition
+ if filename is not None:
+ msg.set_param('filename',
+ filename,
+ header='Content-Disposition',
+ replace=True)
+ if cid is not None:
+ msg['Content-ID'] = cid
+ if params is not None:
+ for key, value in params.items():
+ msg.set_param(key, value)
+
+
+# XXX: This is a cleaned-up version of base64mime.body_encode. It would
+# be nice to drop both this and quoprimime.body_encode in favor of
+# enhanced binascii routines that accepted a max_line_length parameter.
+def _encode_base64(data, max_line_length):
+ encoded_lines = []
+ unencoded_bytes_per_line = max_line_length * 3 // 4
+ for i in range(0, len(data), unencoded_bytes_per_line):
+ thisline = data[i:i+unencoded_bytes_per_line]
+ encoded_lines.append(binascii.b2a_base64(thisline).decode('ascii'))
+ return ''.join(encoded_lines)
+
+
+def _encode_text(string, charset, cte, policy):
+ lines = string.encode(charset).splitlines()
+ linesep = policy.linesep.encode('ascii')
+ def embeded_body(lines): return linesep.join(lines) + linesep
+ def normal_body(lines): return b'\n'.join(lines) + b'\n'
+ if cte==None:
+ # Use heuristics to decide on the "best" encoding.
+ try:
+ return '7bit', normal_body(lines).decode('ascii')
+ except UnicodeDecodeError:
+ pass
+ if (policy.cte_type == '8bit' and
+ max(len(x) for x in lines) <= policy.max_line_length):
+ return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
+ sniff = embeded_body(lines[:10])
+ sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'),
+ policy.max_line_length)
+ sniff_base64 = binascii.b2a_base64(sniff)
+ # This is a little unfair to qp; it includes lineseps, base64 doesn't.
+ if len(sniff_qp) > len(sniff_base64):
+ cte = 'base64'
+ else:
+ cte = 'quoted-printable'
+ if len(lines) <= 10:
+ return cte, sniff_qp
+ if cte == '7bit':
+ data = normal_body(lines).decode('ascii')
+ elif cte == '8bit':
+ data = normal_body(lines).decode('ascii', 'surrogateescape')
+ elif cte == 'quoted-printable':
+ data = quoprimime.body_encode(normal_body(lines).decode('latin-1'),
+ policy.max_line_length)
+ elif cte == 'base64':
+ data = _encode_base64(embeded_body(lines), policy.max_line_length)
+ else:
+ raise ValueError("Unknown content transfer encoding {}".format(cte))
+ return cte, data
+
+
+def set_text_content(msg, string, subtype="plain", charset='utf-8', cte=None,
+ disposition=None, filename=None, cid=None,
+ params=None, headers=None):
+ _prepare_set(msg, 'text', subtype, headers)
+ cte, payload = _encode_text(string, charset, cte, msg.policy)
+ msg.set_payload(payload)
+ msg.set_param('charset',
+ email.charset.ALIASES.get(charset, charset),
+ replace=True)
+ msg['Content-Transfer-Encoding'] = cte
+ _finalize_set(msg, disposition, filename, cid, params)
+raw_data_manager.add_set_handler(str, set_text_content)
+
+
+def set_message_content(msg, message, subtype="rfc822", cte=None,
+ disposition=None, filename=None, cid=None,
+ params=None, headers=None):
+ if subtype == 'partial':
+ raise ValueError("message/partial is not supported for Message objects")
+ if subtype == 'rfc822':
+ if cte not in (None, '7bit', '8bit', 'binary'):
+ # http://tools.ietf.org/html/rfc2046#section-5.2.1 mandate.
+ raise ValueError(
+ "message/rfc822 parts do not support cte={}".format(cte))
+ # 8bit will get coerced on serialization if policy.cte_type='7bit'. We
+ # may end up claiming 8bit when it isn't needed, but the only negative
+ # result of that should be a gateway that needs to coerce to 7bit
+ # having to look through the whole embedded message to discover whether
+ # or not it actually has to do anything.
+ cte = '8bit' if cte is None else cte
+ elif subtype == 'external-body':
+ if cte not in (None, '7bit'):
+ # http://tools.ietf.org/html/rfc2046#section-5.2.3 mandate.
+ raise ValueError(
+ "message/external-body parts do not support cte={}".format(cte))
+ cte = '7bit'
+ elif cte is None:
+ # http://tools.ietf.org/html/rfc2046#section-5.2.4 says all future
+ # subtypes should be restricted to 7bit, so assume that.
+ cte = '7bit'
+ _prepare_set(msg, 'message', subtype, headers)
+ msg.set_payload([message])
+ msg['Content-Transfer-Encoding'] = cte
+ _finalize_set(msg, disposition, filename, cid, params)
+raw_data_manager.add_set_handler(email.message.Message, set_message_content)
+
+
+def set_bytes_content(msg, data, maintype, subtype, cte='base64',
+ disposition=None, filename=None, cid=None,
+ params=None, headers=None):
+ _prepare_set(msg, maintype, subtype, headers)
+ if cte == 'base64':
+ data = _encode_base64(data, max_line_length=msg.policy.max_line_length)
+ elif cte == 'quoted-printable':
+ # XXX: quoprimime.body_encode won't encode newline characters in data,
+ # so we can't use it. This means max_line_length is ignored. Another
+ # bug to fix later. (Note: encoders.quopri is broken on line ends.)
+ data = binascii.b2a_qp(data, istext=False, header=False, quotetabs=True)
+ data = data.decode('ascii')
+ elif cte == '7bit':
+ # Make sure it really is only ASCII. The early warning here seems
+ # worth the overhead...if you care write your own content manager :).
+ data.encode('ascii')
+ elif cte in ('8bit', 'binary'):
+ data = data.decode('ascii', 'surrogateescape')
+ msg.set_payload(data)
+ msg['Content-Transfer-Encoding'] = cte
+ _finalize_set(msg, disposition, filename, cid, params)
+for typ in (bytes, bytearray, memoryview):
+ raw_data_manager.add_set_handler(typ, set_bytes_content)
diff --git a/Lib/email/encoders.py b/Lib/email/encoders.py
index f9657f0..0a66acb 100644
--- a/Lib/email/encoders.py
+++ b/Lib/email/encoders.py
@@ -54,21 +54,12 @@ def encode_7or8bit(msg):
# There's no payload. For backwards compatibility we use 7bit
msg['Content-Transfer-Encoding'] = '7bit'
return
- # We play a trick to make this go fast. If encoding/decode to ASCII
- # succeeds, we know the data must be 7bit, otherwise treat it as 8bit.
+ # We play a trick to make this go fast. If decoding from ASCII succeeds,
+ # we know the data must be 7bit, otherwise treat it as 8bit.
try:
- if isinstance(orig, str):
- orig.encode('ascii')
- else:
- orig.decode('ascii')
+ orig.decode('ascii')
except UnicodeError:
- charset = msg.get_charset()
- output_cset = charset and charset.output_charset
- # iso-2022-* is non-ASCII but encodes to a 7-bit representation
- if output_cset and output_cset.lower().startswith('iso-2022-'):
- msg['Content-Transfer-Encoding'] = '7bit'
- else:
- msg['Content-Transfer-Encoding'] = '8bit'
+ msg['Content-Transfer-Encoding'] = '8bit'
else:
msg['Content-Transfer-Encoding'] = '7bit'
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
index ea41e95..c95b27f 100644
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -33,7 +33,7 @@ NLCRE_eol = re.compile('(\r\n|\r|\n)\Z')
NLCRE_crack = re.compile('(\r\n|\r|\n)')
# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
# except controls, SP, and ":".
-headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])')
+headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])')
EMPTYSTRING = ''
NL = '\n'
@@ -50,8 +50,8 @@ class BufferedSubFile(object):
simple abstraction -- it parses until EOF closes the current message.
"""
def __init__(self):
- # The last partial line pushed into this object.
- self._partial = ''
+ # Chunks of the last partial line pushed into this object.
+ self._partial = []
# The list of full, pushed lines, in reverse order
self._lines = []
# The stack of false-EOF checking predicates.
@@ -67,8 +67,8 @@ class BufferedSubFile(object):
def close(self):
# Don't forget any trailing partial line.
- self._lines.append(self._partial)
- self._partial = ''
+ self.pushlines(''.join(self._partial).splitlines(True))
+ self._partial = []
self._closed = True
def readline(self):
@@ -96,26 +96,27 @@ class BufferedSubFile(object):
def push(self, data):
"""Push some new data into this object."""
- # Handle any previous leftovers
- data, self._partial = self._partial + data, ''
- # Crack into lines, but preserve the newlines on the end of each
- parts = NLCRE_crack.split(data)
- # The *ahem* interesting behaviour of re.split when supplied grouping
- # parentheses is that the last element of the resulting list is the
- # data after the final RE. In the case of a NL/CR terminated string,
- # this is the empty string.
- self._partial = parts.pop()
- #GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r:
- # is there a \n to follow later?
- if not self._partial and parts and parts[-1].endswith('\r'):
- self._partial = parts.pop(-2)+parts.pop()
- # parts is a list of strings, alternating between the line contents
- # and the eol character(s). Gather up a list of lines after
- # re-attaching the newlines.
- lines = []
- for i in range(len(parts) // 2):
- lines.append(parts[i*2] + parts[i*2+1])
- self.pushlines(lines)
+ # Crack into lines, but preserve the linesep characters on the end of each
+ parts = data.splitlines(True)
+
+ if not parts or not parts[0].endswith(('\n', '\r')):
+ # No new complete lines, so just accumulate partials
+ self._partial += parts
+ return
+
+ if self._partial:
+ # If there are previous leftovers, complete them now
+ self._partial.append(parts[0])
+ parts[0:1] = ''.join(self._partial).splitlines(True)
+ del self._partial[:]
+
+ # If the last element of the list does not end in a newline, then treat
+ # it as a partial line. We only check for '\n' here because a line
+ # ending with '\r' might be a line that was split in the middle of a
+ # '\r\n' sequence (see bugs 1555570 and 1721862).
+ if not parts[-1].endswith('\n'):
+ self._partial = [parts.pop()]
+ self.pushlines(parts)
def pushlines(self, lines):
# Reverse and insert at the front of the lines.
@@ -135,7 +136,7 @@ class BufferedSubFile(object):
class FeedParser:
"""A feed-style parser of email."""
- def __init__(self, _factory=message.Message, *, policy=compat32):
+ def __init__(self, _factory=None, *, policy=compat32):
"""_factory is called with no arguments to create a new message obj
The policy keyword specifies a policy object that controls a number of
@@ -143,14 +144,23 @@ class FeedParser:
backward compatibility.
"""
- self._factory = _factory
self.policy = policy
- try:
- _factory(policy=self.policy)
- self._factory_kwds = lambda: {'policy': self.policy}
- except TypeError:
- # Assume this is an old-style factory
- self._factory_kwds = lambda: {}
+ self._factory_kwds = lambda: {'policy': self.policy}
+ if _factory is None:
+ # What this should be:
+ #self._factory = policy.default_message_factory
+ # but, because we are post 3.4 feature freeze, fix with temp hack:
+ if self.policy is compat32:
+ self._factory = message.Message
+ else:
+ self._factory = message.EmailMessage
+ else:
+ self._factory = _factory
+ try:
+ _factory(policy=self.policy)
+ except TypeError:
+ # Assume this is an old-style factory
+ self._factory_kwds = lambda: {}
self._input = BufferedSubFile()
self._msgstack = []
self._parse = self._parsegen().__next__
@@ -501,6 +511,15 @@ class FeedParser:
# There will always be a colon, because if there wasn't the part of
# the parser that calls us would have started parsing the body.
i = line.find(':')
+
+ # If the colon is on the start of the line the header is clearly
+ # malformed, but we might be able to salvage the rest of the
+ # message. Track the error but keep going.
+ if i == 0:
+ defect = errors.InvalidHeaderDefect("Missing header name.")
+ self._cur.defects.append(defect)
+ continue
+
assert i>0, "_parse_headers fed line with no : and no leading WS"
lastheader = line[:i]
lastvalue = [line]
diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index e4a86d4..4735721 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -10,14 +10,10 @@ import re
import sys
import time
import random
-import warnings
from copy import deepcopy
from io import StringIO, BytesIO
-from email._policybase import compat32
-from email.header import Header
from email.utils import _has_surrogates
-import email.charset as _charset
UNDERSCORE = '_'
NL = '\n' # XXX: no longer used by the code below.
@@ -55,8 +51,9 @@ class Generator:
by RFC 2822.
The policy keyword specifies a policy object that controls a number of
- aspects of the generator's operation. The default policy maintains
- backward compatibility.
+ aspects of the generator's operation. If no policy is specified,
+ the policy associated with the Message object passed to the
+ flatten method is used.
"""
self._fp = outfp
@@ -80,7 +77,9 @@ class Generator:
Note that for subobjects, no From_ line is printed.
linesep specifies the characters used to indicate a new line in
- the output. The default value is determined by the policy.
+ the output. The default value is determined by the policy specified
+ when the Generator instance was created or, if none was specified,
+ from the policy associated with the msg.
"""
# We use the _XXX constants for operating on data that comes directly
diff --git a/Lib/email/header.py b/Lib/email/header.py
index 5bd0638..9c89589 100644
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -100,7 +100,6 @@ def decode_header(header):
words.append((encoded, encoding, charset))
# Now loop over words and remove words that consist of whitespace
# between two encoded strings.
- import sys
droplist = []
for n, w in enumerate(words):
if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace():
@@ -362,7 +361,6 @@ class Header:
for string, charset in self._chunks:
if hasspace is not None:
hasspace = string and self._nonctext(string[0])
- import sys
if lastcs not in (None, 'us-ascii'):
if not hasspace or charset not in (None, 'us-ascii'):
formatter.add_transition()
diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py
index 1fae950..911a2af 100644
--- a/Lib/email/headerregistry.py
+++ b/Lib/email/headerregistry.py
@@ -7,6 +7,7 @@ Eventually HeaderRegistry will be a public API, but it isn't yet,
and will probably change some before that happens.
"""
+from types import MappingProxyType
from email import utils
from email import errors
@@ -454,7 +455,7 @@ class ParameterizedMIMEHeader:
@property
def params(self):
- return self._params.copy()
+ return MappingProxyType(self._params)
class ContentTypeHeader(ParameterizedMIMEHeader):
diff --git a/Lib/email/iterators.py b/Lib/email/iterators.py
index 3adc4a0..b5502ee 100644
--- a/Lib/email/iterators.py
+++ b/Lib/email/iterators.py
@@ -26,8 +26,7 @@ def walk(self):
yield self
if self.is_multipart():
for subpart in self.get_payload():
- for subsubpart in subpart.walk():
- yield subsubpart
+ yield from subpart.walk()
@@ -40,8 +39,7 @@ def body_line_iterator(msg, decode=False):
for subpart in msg.walk():
payload = subpart.get_payload(decode=decode)
if isinstance(payload, str):
- for line in StringIO(payload):
- yield line
+ yield from StringIO(payload)
def typed_subpart_iterator(msg, maintype='text', subtype=None):
diff --git a/Lib/email/message.py b/Lib/email/message.py
index afe350c..2f37dbb 100644
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -8,8 +8,8 @@ __all__ = ['Message']
import re
import uu
-import base64
-import binascii
+import quopri
+import warnings
from io import BytesIO, StringIO
# Intrapackage imports
@@ -132,22 +132,50 @@ class Message:
def __str__(self):
"""Return the entire formatted message as a string.
- This includes the headers, body, and envelope header.
"""
return self.as_string()
- def as_string(self, unixfrom=False, maxheaderlen=0):
+ def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
"""Return the entire formatted message as a string.
- Optional `unixfrom' when True, means include the Unix From_ envelope
- header.
- This is a convenience method and may not generate the message exactly
- as you intend. For more flexibility, use the flatten() method of a
- Generator instance.
+ Optional 'unixfrom', when true, means include the Unix From_ envelope
+ header. For backward compatibility reasons, if maxheaderlen is
+ not specified it defaults to 0, so you must override it explicitly
+ if you want a different maxheaderlen. 'policy' is passed to the
+ Generator instance used to serialize the mesasge; if it is not
+ specified the policy associated with the message instance is used.
+
+ If the message object contains binary data that is not encoded
+ according to RFC standards, the non-compliant data will be replaced by
+ unicode "unknown character" code points.
"""
from email.generator import Generator
+ policy = self.policy if policy is None else policy
fp = StringIO()
- g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
+ g = Generator(fp,
+ mangle_from_=False,
+ maxheaderlen=maxheaderlen,
+ policy=policy)
+ g.flatten(self, unixfrom=unixfrom)
+ return fp.getvalue()
+
+ def __bytes__(self):
+ """Return the entire formatted message as a bytes object.
+ """
+ return self.as_bytes()
+
+ def as_bytes(self, unixfrom=False, policy=None):
+ """Return the entire formatted message as a bytes object.
+
+ Optional 'unixfrom', when true, means include the Unix From_ envelope
+ header. 'policy' is passed to the BytesGenerator instance used to
+ serialize the message; if not specified the policy associated with
+ the message instance is used.
+ """
+ from email.generator import BytesGenerator
+ policy = self.policy if policy is None else policy
+ fp = BytesIO()
+ g = BytesGenerator(fp, mangle_from_=False, policy=policy)
g.flatten(self, unixfrom=unixfrom)
return fp.getvalue()
@@ -177,7 +205,11 @@ class Message:
if self._payload is None:
self._payload = [payload]
else:
- self._payload.append(payload)
+ try:
+ self._payload.append(payload)
+ except AttributeError:
+ raise TypeError("Attach is not valid on a message with a"
+ " non-multipart payload")
def get_payload(self, i=None, decode=False):
"""Return a reference to the payload.
@@ -241,14 +273,14 @@ class Message:
bpayload = payload.encode('ascii')
except UnicodeError:
# This won't happen for RFC compliant messages (messages
- # containing only ASCII codepoints in the unicode input).
+ # containing only ASCII code points in the unicode input).
# If it does happen, turn the string into bytes in a way
# guaranteed not to fail.
bpayload = payload.encode('raw-unicode-escape')
if not decode:
return payload
if cte == 'quoted-printable':
- return utils._qdecode(bpayload)
+ return quopri.decodestring(bpayload)
elif cte == 'base64':
# XXX: this is a bit of a hack; decode_b should probably be factored
# out somewhere, but I haven't figured out where yet.
@@ -668,7 +700,7 @@ class Message:
return failobj
def set_param(self, param, value, header='Content-Type', requote=True,
- charset=None, language=''):
+ charset=None, language='', replace=False):
"""Set a parameter in the Content-Type header.
If the parameter already exists in the header, its value will be
@@ -712,8 +744,11 @@ class Message:
else:
ctype = SEMISPACE.join([ctype, append_param])
if ctype != self.get(header):
- del self[header]
- self[header] = ctype
+ if replace:
+ self.replace_header(header, ctype)
+ else:
+ del self[header]
+ self[header] = ctype
def del_param(self, param, header='content-type', requote=True):
"""Remove the given parameter completely from the Content-Type header.
@@ -894,3 +929,219 @@ class Message:
# I.e. def walk(self): ...
from email.iterators import walk
+
+# XXX Support for temporary deprecation hack for is_attachment property.
+class _IsAttachment:
+ def __init__(self, value):
+ self.value = value
+ def __call__(self):
+ return self.value
+ def __bool__(self):
+ warnings.warn("is_attachment will be a method, not a property, in 3.5",
+ DeprecationWarning,
+ stacklevel=3)
+ return self.value
+
+class MIMEPart(Message):
+
+ def __init__(self, policy=None):
+ if policy is None:
+ from email.policy import default
+ policy = default
+ Message.__init__(self, policy)
+
+ @property
+ def is_attachment(self):
+ c_d = self.get('content-disposition')
+ result = False if c_d is None else c_d.content_disposition == 'attachment'
+ # XXX transitional hack to raise deprecation if not called.
+ return _IsAttachment(result)
+
+ def _find_body(self, part, preferencelist):
+ if part.is_attachment():
+ return
+ maintype, subtype = part.get_content_type().split('/')
+ if maintype == 'text':
+ if subtype in preferencelist:
+ yield (preferencelist.index(subtype), part)
+ return
+ if maintype != 'multipart':
+ return
+ if subtype != 'related':
+ for subpart in part.iter_parts():
+ yield from self._find_body(subpart, preferencelist)
+ return
+ if 'related' in preferencelist:
+ yield (preferencelist.index('related'), part)
+ candidate = None
+ start = part.get_param('start')
+ if start:
+ for subpart in part.iter_parts():
+ if subpart['content-id'] == start:
+ candidate = subpart
+ break
+ if candidate is None:
+ subparts = part.get_payload()
+ candidate = subparts[0] if subparts else None
+ if candidate is not None:
+ yield from self._find_body(candidate, preferencelist)
+
+ def get_body(self, preferencelist=('related', 'html', 'plain')):
+ """Return best candidate mime part for display as 'body' of message.
+
+ Do a depth first search, starting with self, looking for the first part
+ matching each of the items in preferencelist, and return the part
+ corresponding to the first item that has a match, or None if no items
+ have a match. If 'related' is not included in preferencelist, consider
+ the root part of any multipart/related encountered as a candidate
+ match. Ignore parts with 'Content-Disposition: attachment'.
+ """
+ best_prio = len(preferencelist)
+ body = None
+ for prio, part in self._find_body(self, preferencelist):
+ if prio < best_prio:
+ best_prio = prio
+ body = part
+ if prio == 0:
+ break
+ return body
+
+ _body_types = {('text', 'plain'),
+ ('text', 'html'),
+ ('multipart', 'related'),
+ ('multipart', 'alternative')}
+ def iter_attachments(self):
+ """Return an iterator over the non-main parts of a multipart.
+
+ Skip the first of each occurrence of text/plain, text/html,
+ multipart/related, or multipart/alternative in the multipart (unless
+ they have a 'Content-Disposition: attachment' header) and include all
+ remaining subparts in the returned iterator. When applied to a
+ multipart/related, return all parts except the root part. Return an
+ empty iterator when applied to a multipart/alternative or a
+ non-multipart.
+ """
+ maintype, subtype = self.get_content_type().split('/')
+ if maintype != 'multipart' or subtype == 'alternative':
+ return
+ parts = self.get_payload()
+ if maintype == 'multipart' and subtype == 'related':
+ # For related, we treat everything but the root as an attachment.
+ # The root may be indicated by 'start'; if there's no start or we
+ # can't find the named start, treat the first subpart as the root.
+ start = self.get_param('start')
+ if start:
+ found = False
+ attachments = []
+ for part in parts:
+ if part.get('content-id') == start:
+ found = True
+ else:
+ attachments.append(part)
+ if found:
+ yield from attachments
+ return
+ parts.pop(0)
+ yield from parts
+ return
+ # Otherwise we more or less invert the remaining logic in get_body.
+ # This only really works in edge cases (ex: non-text relateds or
+ # alternatives) if the sending agent sets content-disposition.
+ seen = [] # Only skip the first example of each candidate type.
+ for part in parts:
+ maintype, subtype = part.get_content_type().split('/')
+ if ((maintype, subtype) in self._body_types and
+ not part.is_attachment() and subtype not in seen):
+ seen.append(subtype)
+ continue
+ yield part
+
+ def iter_parts(self):
+ """Return an iterator over all immediate subparts of a multipart.
+
+ Return an empty iterator for a non-multipart.
+ """
+ if self.get_content_maintype() == 'multipart':
+ yield from self.get_payload()
+
+ def get_content(self, *args, content_manager=None, **kw):
+ if content_manager is None:
+ content_manager = self.policy.content_manager
+ return content_manager.get_content(self, *args, **kw)
+
+ def set_content(self, *args, content_manager=None, **kw):
+ if content_manager is None:
+ content_manager = self.policy.content_manager
+ content_manager.set_content(self, *args, **kw)
+
+ def _make_multipart(self, subtype, disallowed_subtypes, boundary):
+ if self.get_content_maintype() == 'multipart':
+ existing_subtype = self.get_content_subtype()
+ disallowed_subtypes = disallowed_subtypes + (subtype,)
+ if existing_subtype in disallowed_subtypes:
+ raise ValueError("Cannot convert {} to {}".format(
+ existing_subtype, subtype))
+ keep_headers = []
+ part_headers = []
+ for name, value in self._headers:
+ if name.lower().startswith('content-'):
+ part_headers.append((name, value))
+ else:
+ keep_headers.append((name, value))
+ if part_headers:
+ # There is existing content, move it to the first subpart.
+ part = type(self)(policy=self.policy)
+ part._headers = part_headers
+ part._payload = self._payload
+ self._payload = [part]
+ else:
+ self._payload = []
+ self._headers = keep_headers
+ self['Content-Type'] = 'multipart/' + subtype
+ if boundary is not None:
+ self.set_param('boundary', boundary)
+
+ def make_related(self, boundary=None):
+ self._make_multipart('related', ('alternative', 'mixed'), boundary)
+
+ def make_alternative(self, boundary=None):
+ self._make_multipart('alternative', ('mixed',), boundary)
+
+ def make_mixed(self, boundary=None):
+ self._make_multipart('mixed', (), boundary)
+
+ def _add_multipart(self, _subtype, *args, _disp=None, **kw):
+ if (self.get_content_maintype() != 'multipart' or
+ self.get_content_subtype() != _subtype):
+ getattr(self, 'make_' + _subtype)()
+ part = type(self)(policy=self.policy)
+ part.set_content(*args, **kw)
+ if _disp and 'content-disposition' not in part:
+ part['Content-Disposition'] = _disp
+ self.attach(part)
+
+ def add_related(self, *args, **kw):
+ self._add_multipart('related', *args, _disp='inline', **kw)
+
+ def add_alternative(self, *args, **kw):
+ self._add_multipart('alternative', *args, **kw)
+
+ def add_attachment(self, *args, **kw):
+ self._add_multipart('mixed', *args, _disp='attachment', **kw)
+
+ def clear(self):
+ self._headers = []
+ self._payload = None
+
+ def clear_content(self):
+ self._headers = [(n, v) for n, v in self._headers
+ if not n.lower().startswith('content-')]
+ self._payload = None
+
+
+class EmailMessage(MIMEPart):
+
+ def set_content(self, *args, **kw):
+ super().set_content(*args, **kw)
+ if 'MIME-Version' not in self:
+ self['MIME-Version'] = '1.0'
diff --git a/Lib/email/mime/nonmultipart.py b/Lib/email/mime/nonmultipart.py
index fc3b9eb..e1f5196 100644
--- a/Lib/email/mime/nonmultipart.py
+++ b/Lib/email/mime/nonmultipart.py
@@ -12,7 +12,7 @@ from email.mime.base import MIMEBase
class MIMENonMultipart(MIMEBase):
- """Base class for MIME multipart/* type messages."""
+ """Base class for MIME non-multipart type messages."""
def attach(self, payload):
# The public API prohibits attaching multiple subparts to MIMEBase
diff --git a/Lib/email/mime/text.py b/Lib/email/mime/text.py
index 3b5b09f..ec18b85 100644
--- a/Lib/email/mime/text.py
+++ b/Lib/email/mime/text.py
@@ -6,7 +6,6 @@
__all__ = ['MIMEText']
-from email.encoders import encode_7or8bit
from email.mime.nonmultipart import MIMENonMultipart
diff --git a/Lib/email/parser.py b/Lib/email/parser.py
index 752bf35..8c9bc9e 100644
--- a/Lib/email/parser.py
+++ b/Lib/email/parser.py
@@ -4,19 +4,18 @@
"""A parser of RFC 2822 and MIME email messages."""
-__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser']
+__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
+ 'FeedParser', 'BytesFeedParser']
-import warnings
from io import StringIO, TextIOWrapper
from email.feedparser import FeedParser, BytesFeedParser
-from email.message import Message
from email._policybase import compat32
class Parser:
- def __init__(self, _class=Message, *, policy=compat32):
+ def __init__(self, _class=None, *, policy=compat32):
"""Parser of RFC 2822 and MIME email messages.
Creates an in-memory object tree representing the email message, which
@@ -107,8 +106,10 @@ class BytesParser:
meaning it parses the entire contents of the file.
"""
fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
- with fp:
+ try:
return self.parser.parse(fp, headersonly)
+ finally:
+ fp.detach()
def parsebytes(self, text, headersonly=False):
diff --git a/Lib/email/policy.py b/Lib/email/policy.py
index 38e88af..f0b20f4 100644
--- a/Lib/email/policy.py
+++ b/Lib/email/policy.py
@@ -5,6 +5,7 @@ code that adds all the email6 features.
from email._policybase import Policy, Compat32, compat32, _extend_docstrings
from email.utils import _has_surrogates
from email.headerregistry import HeaderRegistry as HeaderRegistry
+from email.contentmanager import raw_data_manager
__all__ = [
'Compat32',
@@ -58,10 +59,22 @@ class EmailPolicy(Policy):
special treatment, while all other fields are
treated as unstructured. This list will be
completed before the extension is marked stable.)
+
+ content_manager -- an object with at least two methods: get_content
+ and set_content. When the get_content or
+ set_content method of a Message object is called,
+ it calls the corresponding method of this object,
+ passing it the message object as its first argument,
+ and any arguments or keywords that were passed to
+ it as additional arguments. The default
+ content_manager is
+ :data:`~email.contentmanager.raw_data_manager`.
+
"""
refold_source = 'long'
header_factory = HeaderRegistry()
+ content_manager = raw_data_manager
def __init__(self, **kw):
# Ensure that each new instance gets a unique header factory
diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py
index 30bf916..c1fe2b4 100644
--- a/Lib/email/quoprimime.py
+++ b/Lib/email/quoprimime.py
@@ -40,7 +40,6 @@ __all__ = [
]
import re
-import io
from string import ascii_letters, digits, hexdigits
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index f76c21e..5080d81 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -25,13 +25,10 @@ __all__ = [
import os
import re
import time
-import base64
import random
import socket
import datetime
import urllib.parse
-import warnings
-from io import StringIO
from email._parseaddr import quote
from email._parseaddr import AddressList as _AddressList
@@ -39,10 +36,7 @@ from email._parseaddr import mktime_tz
from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz
-from quopri import decodestring as _qdecode
-
# Intrapackage imports
-from email.encoders import _bencode, _qencode
from email.charset import Charset
COMMASPACE = ', '
@@ -54,17 +48,27 @@ TICK = "'"
specialsre = re.compile(r'[][\\()<>@,:;".]')
escapesre = re.compile(r'[\\"]')
-# How to figure out if we are processing strings that come from a byte
-# source with undecodable characters.
-_has_surrogates = re.compile(
- '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
+def _has_surrogates(s):
+ """Return True if s contains surrogate-escaped binary data."""
+ # This check is based on the fact that unless there are surrogates, utf8
+ # (Python's default encoding) can encode any string. This is the fastest
+ # way to check for surrogates, see issue 11454 for timings.
+ try:
+ s.encode()
+ return False
+ except UnicodeEncodeError:
+ return True
# How to deal with a string containing bytes before handing it to the
# application through the 'normal' interface.
def _sanitize(string):
- # Turn any escaped bytes into unicode 'unknown' char.
- original_bytes = string.encode('ascii', 'surrogateescape')
- return original_bytes.decode('ascii', 'replace')
+ # Turn any escaped bytes into unicode 'unknown' char. If the escaped
+ # bytes happen to be utf-8 they will instead get decoded, even if they
+ # were invalid in the charset the source was supposed to be in. This
+ # seems like it is not a bad thing; a defect was still registered.
+ original_bytes = string.encode('utf-8', 'surrogateescape')
+ return original_bytes.decode('utf-8', 'replace')
+
# Helpers
@@ -151,30 +155,14 @@ def formatdate(timeval=None, localtime=False, usegmt=False):
# 2822 requires that day and month names be the English abbreviations.
if timeval is None:
timeval = time.time()
- if localtime:
- now = time.localtime(timeval)
- # Calculate timezone offset, based on whether the local zone has
- # daylight savings time, and whether DST is in effect.
- if time.daylight and now[-1]:
- offset = time.altzone
- else:
- offset = time.timezone
- hours, minutes = divmod(abs(offset), 3600)
- # Remember offset is in seconds west of UTC, but the timezone is in
- # minutes east of UTC, so the signs differ.
- if offset > 0:
- sign = '-'
- else:
- sign = '+'
- zone = '%s%02d%02d' % (sign, hours, minutes // 60)
+ if localtime or usegmt:
+ dt = datetime.datetime.fromtimestamp(timeval, datetime.timezone.utc)
else:
- now = time.gmtime(timeval)
- # Timezone offset is always -0000
- if usegmt:
- zone = 'GMT'
- else:
- zone = '-0000'
- return _format_timetuple_and_zone(now, zone)
+ dt = datetime.datetime.utcfromtimestamp(timeval)
+ if localtime:
+ dt = dt.astimezone()
+ usegmt = False
+ return format_datetime(dt, usegmt)
def format_datetime(dt, usegmt=False):
"""Turn a datetime into a date string as specified in RFC 2822.
@@ -198,24 +186,23 @@ def format_datetime(dt, usegmt=False):
def make_msgid(idstring=None, domain=None):
"""Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
- <20020201195627.33539.96671@nightshade.la.mastaler.com>
+ <142480216486.20800.16526388040877946887@nightshade.la.mastaler.com>
Optional idstring if given is a string used to strengthen the
uniqueness of the message id. Optional domain if given provides the
portion of the message id after the '@'. It defaults to the locally
defined hostname.
"""
- timeval = time.time()
- utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
+ timeval = int(time.time()*100)
pid = os.getpid()
- randint = random.randrange(100000)
+ randint = random.getrandbits(64)
if idstring is None:
idstring = ''
else:
idstring = '.' + idstring
if domain is None:
domain = socket.getfqdn()
- msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain)
+ msgid = '<%d.%d.%d%s@%s>' % (timeval, pid, randint, idstring, domain)
return msgid