diff options
author | R. David Murray <rdmurray@bitdance.com> | 2011-01-07 23:25:30 (GMT) |
---|---|---|
committer | R. David Murray <rdmurray@bitdance.com> | 2011-01-07 23:25:30 (GMT) |
commit | 9253214fd9fe22b8b2b4ca5bb28952df8cab3e8c (patch) | |
tree | 30d925a75c0b3bd542c00d6dbd667e72178056a7 /Lib/email/header.py | |
parent | 6f0022d84af15d51ffa1606991f2b6e9e56448ed (diff) | |
download | cpython-9253214fd9fe22b8b2b4ca5bb28952df8cab3e8c.zip cpython-9253214fd9fe22b8b2b4ca5bb28952df8cab3e8c.tar.gz cpython-9253214fd9fe22b8b2b4ca5bb28952df8cab3e8c.tar.bz2 |
#10686: recode non-ASCII headers to 'unknown-8bit' instead of ?s.
This applies only when generating strings from non-RFC compliant binary
input; it makes the existing recoding behavior more consistent (ie:
now no data is lost when recoding).
Diffstat (limited to 'Lib/email/header.py')
-rw-r--r-- | Lib/email/header.py | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/Lib/email/header.py b/Lib/email/header.py index d462bf0..f90883f 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -17,7 +17,8 @@ import email.quoprimime import email.base64mime from email.errors import HeaderParseError -from email.charset import Charset +from email import charset as _charset +Charset = _charset.Charset NL = '\n' SPACE = ' ' @@ -210,6 +211,9 @@ class Header: # from a charset to None/us-ascii, or from None/us-ascii to a # charset. Only do this for the second and subsequent chunks. nextcs = charset + if nextcs == _charset.UNKNOWN8BIT: + original_bytes = string.encode('ascii', 'surrogateescape') + string = original_bytes.decode('ascii', 'replace') if uchunks: if lastcs not in (None, 'us-ascii'): if nextcs in (None, 'us-ascii'): @@ -263,7 +267,8 @@ class Header: # Ensure that the bytes we're storing can be decoded to the output # character set, otherwise an early error is thrown. output_charset = charset.output_codec or 'us-ascii' - s.encode(output_charset, errors) + if output_charset != _charset.UNKNOWN8BIT: + s.encode(output_charset, errors) self._chunks.append((s, charset)) def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): |