diff options
author | Barry Warsaw <barry@python.org> | 2002-10-14 15:13:17 (GMT) |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2002-10-14 15:13:17 (GMT) |
commit | 5e3bcff651f77bd7504751a581b4db7d4b937cac (patch) | |
tree | 69fb9946c3d51ae9534211e9f99d6d2d965bd9ca | |
parent | 6c2bc4635539765dc267094b95e0ef8f0ce9053a (diff) | |
download | cpython-5e3bcff651f77bd7504751a581b4db7d4b937cac.zip cpython-5e3bcff651f77bd7504751a581b4db7d4b937cac.tar.gz cpython-5e3bcff651f77bd7504751a581b4db7d4b937cac.tar.bz2 |
__init__(): Fix an invariant, that the charset item in a chunk tuple
must be a Charset instance, not a string. The bug here was that
self._charset wasn't being converted to a Charset instance so later
.append() calls which used the default charset would break.
_split(): If the charset of the chunk is '8bit', return the chunk
unchanged. We can't safely split it, so this is the avenue of least
harm.
-rw-r--r-- | Lib/email/Header.py | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/Lib/email/Header.py b/Lib/email/Header.py index 378b3dd..9bbc32f 100644 --- a/Lib/email/Header.py +++ b/Lib/email/Header.py @@ -153,6 +153,8 @@ class Header: """ if charset is None: charset = USASCII + if not isinstance(charset, Charset): + charset = Charset(charset) self._charset = charset self._continuation_ws = continuation_ws cws_expanded_len = len(continuation_ws.replace('\t', SPACE8)) @@ -233,14 +235,21 @@ class Header: self._chunks.append((s, charset)) def _split(self, s, charset, firstline=False): - # Split up a header safely for use with encode_chunks. BAW: this - # appears to be a private convenience method. + # Split up a header safely for use with encode_chunks. splittable = charset.to_splittable(s) encoded = charset.from_splittable(splittable) elen = charset.encoded_header_len(encoded) if elen <= self._maxlinelen: return [(encoded, charset)] + # If we have undetermined raw 8bit characters sitting in a byte + # string, we really don't know what the right thing to do is. We + # can't really split it because it might be multibyte data which we + # could break if we split it between pairs. The least harm seems to + # be to not split the header at all, but that means they could go out + # longer than maxlinelen. + elif charset == '8bit': + return [(s, charset)] # BAW: I'm not sure what the right test here is. What we're trying to # do is be faithful to RFC 2822's recommendation that ($2.2.3): # |