diff options
Diffstat (limited to 'Lib/email/header.py')
-rw-r--r-- | Lib/email/header.py | 29 |
1 files changed, 21 insertions, 8 deletions
diff --git a/Lib/email/header.py b/Lib/email/header.py index da739d5..8c32514 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -17,7 +17,8 @@ import email.quoprimime import email.base64mime from email.errors import HeaderParseError -from email.charset import Charset +from email import charset as _charset +Charset = _charset.Charset NL = '\n' SPACE = ' ' @@ -65,7 +66,7 @@ def decode_header(header): otherwise a lower-case string containing the name of the character set specified in the encoded string. - An email.Errors.HeaderParseError may be raised when certain decoding error + An email.errors.HeaderParseError may be raised when certain decoding error occurs (e.g. a base64 decoding exception). """ # If no encoding, just return the header with no charset. @@ -214,6 +215,9 @@ class Header: # from a charset to None/us-ascii, or from None/us-ascii to a # charset. Only do this for the second and subsequent chunks. nextcs = charset + if nextcs == _charset.UNKNOWN8BIT: + original_bytes = string.encode('ascii', 'surrogateescape') + string = original_bytes.decode('ascii', 'replace') if uchunks: if lastcs not in (None, 'us-ascii'): if nextcs in (None, 'us-ascii'): @@ -267,11 +271,12 @@ class Header: # Ensure that the bytes we're storing can be decoded to the output # character set, otherwise an early error is thrown. output_charset = charset.output_codec or 'us-ascii' - s.encode(output_charset, errors) + if output_charset != _charset.UNKNOWN8BIT: + s.encode(output_charset, errors) self._chunks.append((s, charset)) - def encode(self, splitchars=';, \t', maxlinelen=None): - """Encode a message header into an RFC-compliant format. + def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): + r"""Encode a message header into an RFC-compliant format. There are many issues involved in converting a given string for use in an email header. Only certain character sets are readable in most @@ -291,6 +296,11 @@ class Header: Optional splitchars is a string containing characters to split long ASCII lines on, in rough support of RFC 2822's `highest level syntactic breaks'. This doesn't affect RFC 2047 encoded lines. + + Optional linesep is a string to be used to separate the lines of + the value. The default value is the most useful for typical + Python applications, but it can be set to \r\n to produce RFC-compliant + line separators when needed. """ self._normalize() if maxlinelen is None: @@ -314,7 +324,7 @@ class Header: if len(lines) > 1: formatter.newline() formatter.add_transition() - value = str(formatter) + value = formatter._str(linesep) if _embeded_header.search(value): raise HeaderParseError("header value appears to contain " "an embedded header: {!r}".format(value)) @@ -349,9 +359,12 @@ class _ValueFormatter: self._lines = [] self._current_line = _Accumulator(headerlen) - def __str__(self): + def _str(self, linesep): self.newline() - return NL.join(self._lines) + return linesep.join(self._lines) + + def __str__(self): + return self._str(NL) def newline(self): end_of_line = self._current_line.pop() |