summaryrefslogtreecommitdiffstats
path: root/Lib/email
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/email')
-rw-r--r--Lib/email/__init__.py2
-rw-r--r--Lib/email/_header_value_parser.py23
-rw-r--r--Lib/email/_policybase.py8
-rw-r--r--Lib/email/charset.py3
-rw-r--r--Lib/email/feedparser.py25
-rw-r--r--Lib/email/generator.py13
-rw-r--r--Lib/email/header.py3
-rw-r--r--Lib/email/headerregistry.py10
-rw-r--r--Lib/email/message.py30
-rw-r--r--Lib/email/mime/text.py3
-rw-r--r--Lib/email/parser.py4
-rw-r--r--Lib/email/policy.py15
-rw-r--r--Lib/email/utils.py2
13 files changed, 83 insertions, 58 deletions
diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py
index ff16f6a..fae8724 100644
--- a/Lib/email/__init__.py
+++ b/Lib/email/__init__.py
@@ -4,8 +4,6 @@
"""A package for parsing, handling, and generating email messages."""
-__version__ = '5.1.0'
-
__all__ = [
'base64mime',
'charset',
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index a9bdf44..5df9511 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -320,17 +320,18 @@ class TokenList(list):
return ''.join(res)
def _fold(self, folded):
+ encoding = 'utf-8' if folded.policy.utf8 else 'ascii'
for part in self.parts:
tstr = str(part)
tlen = len(tstr)
try:
- str(part).encode('us-ascii')
+ str(part).encode(encoding)
except UnicodeEncodeError:
if any(isinstance(x, errors.UndecodableBytesDefect)
for x in part.all_defects):
charset = 'unknown-8bit'
else:
- # XXX: this should be a policy setting
+ # XXX: this should be a policy setting when utf8 is False.
charset = 'utf-8'
tstr = part.cte_encode(charset, folded.policy)
tlen = len(tstr)
@@ -394,11 +395,12 @@ class UnstructuredTokenList(TokenList):
def _fold(self, folded):
last_ew = None
+ encoding = 'utf-8' if folded.policy.utf8 else 'ascii'
for part in self.parts:
tstr = str(part)
is_ew = False
try:
- str(part).encode('us-ascii')
+ str(part).encode(encoding)
except UnicodeEncodeError:
if any(isinstance(x, errors.UndecodableBytesDefect)
for x in part.all_defects):
@@ -437,7 +439,7 @@ class UnstructuredTokenList(TokenList):
if folded.append_if_fits(part):
continue
if part.has_fws:
- part.fold(folded)
+ part._fold(folded)
continue
# It can't be split...we just have to put it on its own line.
folded.append(tstr)
@@ -458,7 +460,7 @@ class UnstructuredTokenList(TokenList):
last_ew = len(res)
else:
tl = get_unstructured(''.join(res[last_ew:] + [spart]))
- res.append(tl.as_encoded_word())
+ res.append(tl.as_encoded_word(charset))
return ''.join(res)
@@ -475,12 +477,13 @@ class Phrase(TokenList):
# comment that becomes a barrier across which we can't compose encoded
# words.
last_ew = None
+ encoding = 'utf-8' if folded.policy.utf8 else 'ascii'
for part in self.parts:
tstr = str(part)
tlen = len(tstr)
has_ew = False
try:
- str(part).encode('us-ascii')
+ str(part).encode(encoding)
except UnicodeEncodeError:
if any(isinstance(x, errors.UndecodableBytesDefect)
for x in part.all_defects):
@@ -1519,7 +1522,7 @@ def get_qp_ctext(value):
This is not the RFC ctext, since we are handling nested comments in comment
and unquoting quoted-pairs here. We allow anything except the '()'
characters, but if we find any ASCII other than the RFC defined printable
- ASCII an NonPrintableDefect is added to the token's defects list. Since
+ ASCII, a NonPrintableDefect is added to the token's defects list. Since
quoted pairs are converted to their unquoted values, what is returned is
a 'ptext' token. In this case it is a WhiteSpaceTerminal, so it's value
is ' '.
@@ -1534,7 +1537,7 @@ def get_qcontent(value):
"""qcontent = qtext / quoted-pair
We allow anything except the DQUOTE character, but if we find any ASCII
- other than the RFC defined printable ASCII an NonPrintableDefect is
+ other than the RFC defined printable ASCII, a NonPrintableDefect is
added to the token's defects list. Any quoted pairs are converted to their
unquoted values, so what is returned is a 'ptext' token. In this case it
is a ValueTerminal.
@@ -1879,7 +1882,7 @@ def get_dtext(value):
obs-dtext = obs-NO-WS-CTL / quoted-pair
We allow anything except the excluded characters, but if we find any
- ASCII other than the RFC defined printable ASCII an NonPrintableDefect is
+ ASCII other than the RFC defined printable ASCII, a NonPrintableDefect is
added to the token's defects list. Quoted pairs are converted to their
unquoted values, so what is returned is a ptext token, in this case a
ValueTerminal. If there were quoted-printables, an ObsoleteHeaderDefect is
@@ -2869,7 +2872,7 @@ def parse_content_type_header(value):
_find_mime_parameters(ctype, value)
return ctype
ctype.append(token)
- # XXX: If we really want to follow the formal grammer we should make
+ # XXX: If we really want to follow the formal grammar we should make
# mantype and subtype specialized TokenLists here. Probably not worth it.
if not value or value[0] != '/':
ctype.defects.append(errors.InvalidHeaderDefect(
diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py
index 8106114..c0d98a4 100644
--- a/Lib/email/_policybase.py
+++ b/Lib/email/_policybase.py
@@ -149,12 +149,18 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta):
during serialization. None or 0 means no line
wrapping is done. Default is 78.
+ mangle_from_ -- a flag that, when True escapes From_ lines in the
+ body of the message by putting a `>' in front of
+ them. This is used when the message is being
+ serialized by a generator. Default: True.
+
"""
raise_on_defect = False
linesep = '\n'
cte_type = '8bit'
max_line_length = 78
+ mangle_from_ = False
def handle_defect(self, obj, defect):
"""Based on policy, either raise defect or call register_defect.
@@ -266,6 +272,8 @@ class Compat32(Policy):
replicates the behavior of the email package version 5.1.
"""
+ mangle_from_ = True
+
def _sanitize_header(self, name, value):
# If the header value contains surrogates, return a Header using
# the unknown-8bit charset to encode the bytes as encoded words.
diff --git a/Lib/email/charset.py b/Lib/email/charset.py
index e999472..ee56404 100644
--- a/Lib/email/charset.py
+++ b/Lib/email/charset.py
@@ -249,9 +249,6 @@ class Charset:
def __eq__(self, other):
return str(self) == str(other).lower()
- def __ne__(self, other):
- return not self.__eq__(other)
-
def get_body_encoding(self):
"""Return the content-transfer-encoding used for body encoding.
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
index c95b27f..c542018 100644
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -26,6 +26,7 @@ import re
from email import errors
from email import message
from email._policybase import compat32
+from collections import deque
NLCRE = re.compile('\r\n|\r|\n')
NLCRE_bol = re.compile('(\r\n|\r|\n)')
@@ -52,8 +53,8 @@ class BufferedSubFile(object):
def __init__(self):
# Chunks of the last partial line pushed into this object.
self._partial = []
- # The list of full, pushed lines, in reverse order
- self._lines = []
+ # A deque of full, pushed lines
+ self._lines = deque()
# The stack of false-EOF checking predicates.
self._eofstack = []
# A flag indicating whether the file has been closed or not.
@@ -78,21 +79,21 @@ class BufferedSubFile(object):
return NeedMoreData
# Pop the line off the stack and see if it matches the current
# false-EOF predicate.
- line = self._lines.pop()
+ line = self._lines.popleft()
# RFC 2046, section 5.1.2 requires us to recognize outer level
# boundaries at any level of inner nesting. Do this, but be sure it's
# in the order of most to least nested.
- for ateof in self._eofstack[::-1]:
+ for ateof in reversed(self._eofstack):
if ateof(line):
# We're at the false EOF. But push the last line back first.
- self._lines.append(line)
+ self._lines.appendleft(line)
return ''
return line
def unreadline(self, line):
# Let the consumer push a line back into the buffer.
assert line is not NeedMoreData
- self._lines.append(line)
+ self._lines.appendleft(line)
def push(self, data):
"""Push some new data into this object."""
@@ -119,8 +120,7 @@ class BufferedSubFile(object):
self.pushlines(parts)
def pushlines(self, lines):
- # Reverse and insert at the front of the lines.
- self._lines[:0] = lines[::-1]
+ self._lines.extend(lines)
def __iter__(self):
return self
@@ -145,7 +145,7 @@ class FeedParser:
"""
self.policy = policy
- self._factory_kwds = lambda: {'policy': self.policy}
+ self._old_style_factory = False
if _factory is None:
# What this should be:
#self._factory = policy.default_message_factory
@@ -160,7 +160,7 @@ class FeedParser:
_factory(policy=self.policy)
except TypeError:
# Assume this is an old-style factory
- self._factory_kwds = lambda: {}
+ self._old_style_factory = True
self._input = BufferedSubFile()
self._msgstack = []
self._parse = self._parsegen().__next__
@@ -197,7 +197,10 @@ class FeedParser:
return root
def _new_message(self):
- msg = self._factory(**self._factory_kwds())
+ if self._old_style_factory:
+ msg = self._factory()
+ else:
+ msg = self._factory(policy=self.policy)
if self._cur and self._cur.get_content_type() == 'multipart/digest':
msg.set_default_type('message/rfc822')
if self._msgstack:
diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index 4735721..11ff16d 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -32,16 +32,16 @@ class Generator:
# Public interface
#
- def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, *,
+ def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *,
policy=None):
"""Create the generator for message flattening.
outfp is the output file-like object for writing the message to. It
must have a write() method.
- Optional mangle_from_ is a flag that, when True (the default), escapes
- From_ lines in the body of the message by putting a `>' in front of
- them.
+ Optional mangle_from_ is a flag that, when True (the default if policy
+ is not set), escapes From_ lines in the body of the message by putting
+ a `>' in front of them.
Optional maxheaderlen specifies the longest length for a non-continued
header. When a header line is longer (in characters, with tabs
@@ -56,6 +56,9 @@ class Generator:
flatten method is used.
"""
+
+ if mangle_from_ is None:
+ mangle_from_ = True if policy is None else policy.mangle_from_
self._fp = outfp
self._mangle_from_ = mangle_from_
self.maxheaderlen = maxheaderlen
@@ -449,7 +452,7 @@ class DecodedGenerator(Generator):
Like the Generator base class, except that non-text parts are substituted
with a format string representing the part.
"""
- def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
+ def __init__(self, outfp, mangle_from_=None, maxheaderlen=78, fmt=None):
"""Like Generator.__init__() except that an additional optional
argument is allowed.
diff --git a/Lib/email/header.py b/Lib/email/header.py
index 9c89589..6820ea1 100644
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -262,9 +262,6 @@ class Header:
# args and do another comparison.
return other == str(self)
- def __ne__(self, other):
- return not self == other
-
def append(self, s, charset=None, errors='strict'):
"""Append a string to the MIME header.
diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py
index 911a2af..0fc2231 100644
--- a/Lib/email/headerregistry.py
+++ b/Lib/email/headerregistry.py
@@ -16,7 +16,7 @@ from email import _header_value_parser as parser
class Address:
def __init__(self, display_name='', username='', domain='', addr_spec=None):
- """Create an object represeting a full email address.
+ """Create an object representing a full email address.
An address can have a 'display_name', a 'username', and a 'domain'. In
addition to specifying the username and domain separately, they may be
@@ -81,7 +81,8 @@ class Address:
return lp
def __repr__(self):
- return "Address(display_name={!r}, username={!r}, domain={!r})".format(
+ return "{}(display_name={!r}, username={!r}, domain={!r})".format(
+ self.__class__.__name__,
self.display_name, self.username, self.domain)
def __str__(self):
@@ -108,7 +109,7 @@ class Group:
def __init__(self, display_name=None, addresses=None):
"""Create an object representing an address group.
- An address group consists of a display_name followed by colon and an
+ An address group consists of a display_name followed by colon and a
list of addresses (see Address) terminated by a semi-colon. The Group
is created by specifying a display_name and a possibly empty list of
Address objects. A Group can also be used to represent a single
@@ -132,7 +133,8 @@ class Group:
return self._addresses
def __repr__(self):
- return "Group(display_name={!r}, addresses={!r}".format(
+ return "{}(display_name={!r}, addresses={!r}".format(
+ self.__class__.__name__,
self.display_name, self.addresses)
def __str__(self):
diff --git a/Lib/email/message.py b/Lib/email/message.py
index 2f37dbb..aefaf57 100644
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -710,7 +710,7 @@ class Message:
message, it will be set to "text/plain" and the new parameter and
value will be appended as per RFC 2045.
- An alternate header can specified in the header argument, and all
+ An alternate header can be specified in the header argument, and all
parameters will be quoted as necessary unless requote is False.
If charset is specified, the parameter will be encoded according to RFC
@@ -927,20 +927,21 @@ class Message:
"""
return [part.get_content_charset(failobj) for part in self.walk()]
+ def get_content_disposition(self):
+ """Return the message's content-disposition if it exists, or None.
+
+ The return values can be either 'inline', 'attachment' or None
+ according to the rfc2183.
+ """
+ value = self.get('content-disposition')
+ if value is None:
+ return None
+ c_d = _splitparam(value)[0].lower()
+ return c_d
+
# I.e. def walk(self): ...
from email.iterators import walk
-# XXX Support for temporary deprecation hack for is_attachment property.
-class _IsAttachment:
- def __init__(self, value):
- self.value = value
- def __call__(self):
- return self.value
- def __bool__(self):
- warnings.warn("is_attachment will be a method, not a property, in 3.5",
- DeprecationWarning,
- stacklevel=3)
- return self.value
class MIMEPart(Message):
@@ -950,12 +951,9 @@ class MIMEPart(Message):
policy = default
Message.__init__(self, policy)
- @property
def is_attachment(self):
c_d = self.get('content-disposition')
- result = False if c_d is None else c_d.content_disposition == 'attachment'
- # XXX transitional hack to raise deprecation if not called.
- return _IsAttachment(result)
+ return False if c_d is None else c_d.content_disposition == 'attachment'
def _find_body(self, part, preferencelist):
if part.is_attachment():
diff --git a/Lib/email/mime/text.py b/Lib/email/mime/text.py
index ec18b85..479928e 100644
--- a/Lib/email/mime/text.py
+++ b/Lib/email/mime/text.py
@@ -6,6 +6,7 @@
__all__ = ['MIMEText']
+from email.charset import Charset
from email.mime.nonmultipart import MIMENonMultipart
@@ -34,6 +35,8 @@ class MIMEText(MIMENonMultipart):
_charset = 'us-ascii'
except UnicodeEncodeError:
_charset = 'utf-8'
+ if isinstance(_charset, Charset):
+ _charset = str(_charset)
MIMENonMultipart.__init__(self, 'text', _subtype,
**{'charset': _charset})
diff --git a/Lib/email/parser.py b/Lib/email/parser.py
index 8c9bc9e..555b172 100644
--- a/Lib/email/parser.py
+++ b/Lib/email/parser.py
@@ -23,7 +23,7 @@ class Parser:
textual representation of the message.
The string must be formatted as a block of RFC 2822 headers and header
- continuation lines, optionally preceeded by a `Unix-from' header. The
+ continuation lines, optionally preceded by a `Unix-from' header. The
header block is terminated either by the end of the string or by a
blank line.
@@ -87,7 +87,7 @@ class BytesParser:
textual representation of the message.
The input must be formatted as a block of RFC 2822 headers and header
- continuation lines, optionally preceeded by a `Unix-from' header. The
+ continuation lines, optionally preceded by a `Unix-from' header. The
header block is terminated either by the end of the input or by a
blank line.
diff --git a/Lib/email/policy.py b/Lib/email/policy.py
index f0b20f4..6ac64a5 100644
--- a/Lib/email/policy.py
+++ b/Lib/email/policy.py
@@ -35,6 +35,13 @@ class EmailPolicy(Policy):
In addition to the settable attributes listed above that apply to
all Policies, this policy adds the following additional attributes:
+ utf8 -- if False (the default) message headers will be
+ serialized as ASCII, using encoded words to encode
+ any non-ASCII characters in the source strings. If
+ True, the message headers will be serialized using
+ utf8 and will not contain encoded words (see RFC
+ 6532 for more on this serialization format).
+
refold_source -- if the value for a header in the Message object
came from the parsing of some source, this attribute
indicates whether or not a generator should refold
@@ -72,6 +79,7 @@ class EmailPolicy(Policy):
"""
+ utf8 = False
refold_source = 'long'
header_factory = HeaderRegistry()
content_manager = raw_data_manager
@@ -175,9 +183,13 @@ class EmailPolicy(Policy):
refold_header setting, since there is no way to know whether the binary
data consists of single byte characters or multibyte characters.
+ If utf8 is true, headers are encoded to utf8, otherwise to ascii with
+ non-ASCII unicode rendered as encoded words.
+
"""
folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
- return folded.encode('ascii', 'surrogateescape')
+ charset = 'utf8' if self.utf8 else 'ascii'
+ return folded.encode(charset, 'surrogateescape')
def _fold(self, name, value, refold_binary=False):
if hasattr(value, 'name'):
@@ -199,3 +211,4 @@ del default.header_factory
strict = default.clone(raise_on_defect=True)
SMTP = default.clone(linesep='\r\n')
HTTP = default.clone(linesep='\r\n', max_line_length=None)
+SMTPUTF8 = SMTP.clone(utf8=True)
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index 5080d81..a759d23 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -87,7 +87,7 @@ def formataddr(pair, charset='utf-8'):
'utf-8'.
"""
name, address = pair
- # The address MUST (per RFC) be ascii, so raise an UnicodeError if it isn't.
+ # The address MUST (per RFC) be ascii, so raise a UnicodeError if it isn't.
address.encode('ascii')
if name:
try: