summaryrefslogtreecommitdiffstats
path: root/Lib/email/header.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/email/header.py')
-rw-r--r--Lib/email/header.py29
1 files changed, 21 insertions, 8 deletions
diff --git a/Lib/email/header.py b/Lib/email/header.py
index da739d5..8c32514 100644
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -17,7 +17,8 @@ import email.quoprimime
import email.base64mime
from email.errors import HeaderParseError
-from email.charset import Charset
+from email import charset as _charset
+Charset = _charset.Charset
NL = '\n'
SPACE = ' '
@@ -65,7 +66,7 @@ def decode_header(header):
otherwise a lower-case string containing the name of the character set
specified in the encoded string.
- An email.Errors.HeaderParseError may be raised when certain decoding error
+ An email.errors.HeaderParseError may be raised when certain decoding error
occurs (e.g. a base64 decoding exception).
"""
# If no encoding, just return the header with no charset.
@@ -214,6 +215,9 @@ class Header:
# from a charset to None/us-ascii, or from None/us-ascii to a
# charset. Only do this for the second and subsequent chunks.
nextcs = charset
+ if nextcs == _charset.UNKNOWN8BIT:
+ original_bytes = string.encode('ascii', 'surrogateescape')
+ string = original_bytes.decode('ascii', 'replace')
if uchunks:
if lastcs not in (None, 'us-ascii'):
if nextcs in (None, 'us-ascii'):
@@ -267,11 +271,12 @@ class Header:
# Ensure that the bytes we're storing can be decoded to the output
# character set, otherwise an early error is thrown.
output_charset = charset.output_codec or 'us-ascii'
- s.encode(output_charset, errors)
+ if output_charset != _charset.UNKNOWN8BIT:
+ s.encode(output_charset, errors)
self._chunks.append((s, charset))
- def encode(self, splitchars=';, \t', maxlinelen=None):
- """Encode a message header into an RFC-compliant format.
+ def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
+ r"""Encode a message header into an RFC-compliant format.
There are many issues involved in converting a given string for use in
an email header. Only certain character sets are readable in most
@@ -291,6 +296,11 @@ class Header:
Optional splitchars is a string containing characters to split long
ASCII lines on, in rough support of RFC 2822's `highest level
syntactic breaks'. This doesn't affect RFC 2047 encoded lines.
+
+ Optional linesep is a string to be used to separate the lines of
+ the value. The default value is the most useful for typical
+ Python applications, but it can be set to \r\n to produce RFC-compliant
+ line separators when needed.
"""
self._normalize()
if maxlinelen is None:
@@ -314,7 +324,7 @@ class Header:
if len(lines) > 1:
formatter.newline()
formatter.add_transition()
- value = str(formatter)
+ value = formatter._str(linesep)
if _embeded_header.search(value):
raise HeaderParseError("header value appears to contain "
"an embedded header: {!r}".format(value))
@@ -349,9 +359,12 @@ class _ValueFormatter:
self._lines = []
self._current_line = _Accumulator(headerlen)
- def __str__(self):
+ def _str(self, linesep):
self.newline()
- return NL.join(self._lines)
+ return linesep.join(self._lines)
+
+ def __str__(self):
+ return self._str(NL)
def newline(self):
end_of_line = self._current_line.pop()