#10686: recode non-ASCII headers to 'unknown-8bit' instead of ?s.

This applies only when generating strings from non-RFC compliant binary input; it makes the existing recoding behavior more consistent (ie: now no data is lost when recoding).
author: R. David Murray <rdmurray@bitdance.com> 2011-01-07 23:25:30 (GMT)
committer: R. David Murray <rdmurray@bitdance.com> 2011-01-07 23:25:30 (GMT)
commit: 9253214fd9fe22b8b2b4ca5bb28952df8cab3e8c (patch)
tree: 30d925a75c0b3bd542c00d6dbd667e72178056a7 /Lib/email/header.py
parent: 6f0022d84af15d51ffa1606991f2b6e9e56448ed (diff)
download: cpython-9253214fd9fe22b8b2b4ca5bb28952df8cab3e8c.zip
cpython-9253214fd9fe22b8b2b4ca5bb28952df8cab3e8c.tar.gz
cpython-9253214fd9fe22b8b2b4ca5bb28952df8cab3e8c.tar.bz2
1 files changed, 7 insertions, 2 deletions
diff --git a/Lib/email/header.py b/Lib/email/header.py
index d462bf0..f90883f 100644
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -17,7 +17,8 @@ import email.quoprimime
 import email.base64mime
 
 from email.errors import HeaderParseError
-from email.charset import Charset
+from email import charset as _charset
+Charset = _charset.Charset
 
 NL = '\n'
 SPACE = ' '
@@ -210,6 +211,9 @@ class Header:
             # from a charset to None/us-ascii, or from None/us-ascii to a
             # charset.  Only do this for the second and subsequent chunks.
             nextcs = charset
+            if nextcs == _charset.UNKNOWN8BIT:
+                original_bytes = string.encode('ascii', 'surrogateescape')
+                string = original_bytes.decode('ascii', 'replace')
             if uchunks:
                 if lastcs not in (None, 'us-ascii'):
                     if nextcs in (None, 'us-ascii'):
@@ -263,7 +267,8 @@ class Header:
         # Ensure that the bytes we're storing can be decoded to the output
         # character set, otherwise an early error is thrown.
         output_charset = charset.output_codec or 'us-ascii'
-        s.encode(output_charset, errors)
+        if output_charset != _charset.UNKNOWN8BIT:
+            s.encode(output_charset, errors)
         self._chunks.append((s, charset))
 
     def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
author	R. David Murray <rdmurray@bitdance.com>	2011-01-07 23:25:30 (GMT)
committer	R. David Murray <rdmurray@bitdance.com>	2011-01-07 23:25:30 (GMT)
commit	9253214fd9fe22b8b2b4ca5bb28952df8cab3e8c (patch)
tree	30d925a75c0b3bd542c00d6dbd667e72178056a7 /Lib/email/header.py
parent	6f0022d84af15d51ffa1606991f2b6e9e56448ed (diff)
download	cpython-9253214fd9fe22b8b2b4ca5bb28952df8cab3e8c.zip cpython-9253214fd9fe22b8b2b4ca5bb28952df8cab3e8c.tar.gz cpython-9253214fd9fe22b8b2b4ca5bb28952df8cab3e8c.tar.bz2