summaryrefslogtreecommitdiffstats
path: root/Lib/email/header.py
diff options
context:
space:
mode:
authorR. David Murray <rdmurray@bitdance.com>2011-01-07 23:25:30 (GMT)
committerR. David Murray <rdmurray@bitdance.com>2011-01-07 23:25:30 (GMT)
commit9253214fd9fe22b8b2b4ca5bb28952df8cab3e8c (patch)
tree30d925a75c0b3bd542c00d6dbd667e72178056a7 /Lib/email/header.py
parent6f0022d84af15d51ffa1606991f2b6e9e56448ed (diff)
downloadcpython-9253214fd9fe22b8b2b4ca5bb28952df8cab3e8c.zip
cpython-9253214fd9fe22b8b2b4ca5bb28952df8cab3e8c.tar.gz
cpython-9253214fd9fe22b8b2b4ca5bb28952df8cab3e8c.tar.bz2
#10686: recode non-ASCII headers to 'unknown-8bit' instead of ?s.
This applies only when generating strings from non-RFC compliant binary input; it makes the existing recoding behavior more consistent (ie: now no data is lost when recoding).
Diffstat (limited to 'Lib/email/header.py')
-rw-r--r--Lib/email/header.py9
1 files changed, 7 insertions, 2 deletions
diff --git a/Lib/email/header.py b/Lib/email/header.py
index d462bf0..f90883f 100644
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -17,7 +17,8 @@ import email.quoprimime
import email.base64mime
from email.errors import HeaderParseError
-from email.charset import Charset
+from email import charset as _charset
+Charset = _charset.Charset
NL = '\n'
SPACE = ' '
@@ -210,6 +211,9 @@ class Header:
# from a charset to None/us-ascii, or from None/us-ascii to a
# charset. Only do this for the second and subsequent chunks.
nextcs = charset
+ if nextcs == _charset.UNKNOWN8BIT:
+ original_bytes = string.encode('ascii', 'surrogateescape')
+ string = original_bytes.decode('ascii', 'replace')
if uchunks:
if lastcs not in (None, 'us-ascii'):
if nextcs in (None, 'us-ascii'):
@@ -263,7 +267,8 @@ class Header:
# Ensure that the bytes we're storing can be decoded to the output
# character set, otherwise an early error is thrown.
output_charset = charset.output_codec or 'us-ascii'
- s.encode(output_charset, errors)
+ if output_charset != _charset.UNKNOWN8BIT:
+ s.encode(output_charset, errors)
self._chunks.append((s, charset))
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):