__init__(): Fix an invariant, that the charset item in a chunk tuple

must be a Charset instance, not a string. The bug here was that self._charset wasn't being converted to a Charset instance so later .append() calls which used the default charset would break. _split(): If the charset of the chunk is '8bit', return the chunk unchanged. We can't safely split it, so this is the avenue of least harm.
author: Barry Warsaw <barry@python.org> 2002-10-14 15:13:17 (GMT)
committer: Barry Warsaw <barry@python.org> 2002-10-14 15:13:17 (GMT)
commit: 5e3bcff651f77bd7504751a581b4db7d4b937cac (patch)
tree: 69fb9946c3d51ae9534211e9f99d6d2d965bd9ca
parent: 6c2bc4635539765dc267094b95e0ef8f0ce9053a (diff)
download: cpython-5e3bcff651f77bd7504751a581b4db7d4b937cac.zip
cpython-5e3bcff651f77bd7504751a581b4db7d4b937cac.tar.gz
cpython-5e3bcff651f77bd7504751a581b4db7d4b937cac.tar.bz2
1 files changed, 11 insertions, 2 deletions
diff --git a/Lib/email/Header.py b/Lib/email/Header.py
index 378b3dd..9bbc32f 100644
--- a/Lib/email/Header.py
+++ b/Lib/email/Header.py
@@ -153,6 +153,8 @@ class Header:
         """
         if charset is None:
             charset = USASCII
+        if not isinstance(charset, Charset):
+            charset = Charset(charset)
         self._charset = charset
         self._continuation_ws = continuation_ws
         cws_expanded_len = len(continuation_ws.replace('\t', SPACE8))
@@ -233,14 +235,21 @@ class Header:
         self._chunks.append((s, charset))
 
     def _split(self, s, charset, firstline=False):
-        # Split up a header safely for use with encode_chunks.  BAW: this
-        # appears to be a private convenience method.
+        # Split up a header safely for use with encode_chunks.
         splittable = charset.to_splittable(s)
         encoded = charset.from_splittable(splittable)
         elen = charset.encoded_header_len(encoded)
 
         if elen <= self._maxlinelen:
             return [(encoded, charset)]
+        # If we have undetermined raw 8bit characters sitting in a byte
+        # string, we really don't know what the right thing to do is.  We
+        # can't really split it because it might be multibyte data which we
+        # could break if we split it between pairs.  The least harm seems to
+        # be to not split the header at all, but that means they could go out
+        # longer than maxlinelen.
+        elif charset == '8bit':
+            return [(s, charset)]
         # BAW: I'm not sure what the right test here is.  What we're trying to
         # do is be faithful to RFC 2822's recommendation that ($2.2.3):
         #
author	Barry Warsaw <barry@python.org>	2002-10-14 15:13:17 (GMT)
committer	Barry Warsaw <barry@python.org>	2002-10-14 15:13:17 (GMT)
commit	5e3bcff651f77bd7504751a581b4db7d4b937cac (patch)
tree	69fb9946c3d51ae9534211e9f99d6d2d965bd9ca
parent	6c2bc4635539765dc267094b95e0ef8f0ce9053a (diff)
download	cpython-5e3bcff651f77bd7504751a581b4db7d4b937cac.zip cpython-5e3bcff651f77bd7504751a581b4db7d4b937cac.tar.gz cpython-5e3bcff651f77bd7504751a581b4db7d4b937cac.tar.bz2