summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/email.header.rst17
-rw-r--r--Lib/email/header.py26
-rw-r--r--Lib/email/test/test_email.py4
-rw-r--r--Misc/NEWS3
4 files changed, 26 insertions, 24 deletions
diff --git a/Doc/library/email.header.rst b/Doc/library/email.header.rst
index 113a6b8..808f7e5 100644
--- a/Doc/library/email.header.rst
+++ b/Doc/library/email.header.rst
@@ -94,14 +94,15 @@ Here is the :class:`Header` class description:
decoded with that character set.
If *s* is an instance of :class:`str`, then *charset* is a hint specifying
- the character set of the characters in the string. In this case, when
- producing an :rfc:`2822`\ -compliant header using :rfc:`2047` rules, the
- Unicode string will be encoded using the following charsets in order:
- ``us-ascii``, the *charset* hint, ``utf-8``. The first character set to
- not provoke a :exc:`UnicodeError` is used.
-
- Optional *errors* is passed through to any :func:`encode` or
- :func:`ustr.encode` call, and defaults to "strict".
+ the character set of the characters in the string.
+
+ In either case, when producing an :rfc:`2822`\ -compliant header using
+ :rfc:`2047` rules, the string will be encoded using the output codec of
+ the charset. If the string cannot be encoded using the output codec, a
+ UnicodeError will be raised.
+
+ Optional *errors* is passed as the errors argument to the decode call
+ if *s* is a byte string.
.. method:: encode(splitchars=';, \\t', maxlinelen=None, linesep='\\n')
diff --git a/Lib/email/header.py b/Lib/email/header.py
index bb2c5ee..94eb1a9 100644
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -245,32 +245,26 @@ class Header:
that byte string, and a UnicodeError will be raised if the string
cannot be decoded with that charset. If s is a Unicode string, then
charset is a hint specifying the character set of the characters in
- the string. In this case, when producing an RFC 2822 compliant header
- using RFC 2047 rules, the Unicode string will be encoded using the
- following charsets in order: us-ascii, the charset hint, utf-8. The
- first character set not to provoke a UnicodeError is used.
+ the string. In either case, when producing an RFC 2822 compliant
+ header using RFC 2047 rules, the string will be encoded using the
+ output codec of the charset. If the string cannot be encoded to the
+ output codec, a UnicodeError will be raised.
- Optional `errors' is passed as the third argument to any unicode() or
- ustr.encode() call.
+ Optional `errors' is passed as the errors argument to the decode
+ call if s is a byte string.
"""
if charset is None:
charset = self._charset
elif not isinstance(charset, Charset):
charset = Charset(charset)
- if isinstance(s, str):
- # Convert the string from the input character set to the output
- # character set and store the resulting bytes and the charset for
- # composition later.
+ if not isinstance(s, str):
input_charset = charset.input_codec or 'us-ascii'
- input_bytes = s.encode(input_charset, errors)
- else:
- # We already have the bytes we will store internally.
- input_bytes = s
+ s = s.decode(input_charset, errors)
# Ensure that the bytes we're storing can be decoded to the output
# character set, otherwise an early error is thrown.
output_charset = charset.output_codec or 'us-ascii'
- output_string = input_bytes.decode(output_charset, errors)
- self._chunks.append((output_string, charset))
+ s.encode(output_charset, errors)
+ self._chunks.append((s, charset))
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
"""Encode a message header into an RFC-compliant format.
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
index 73ac347..e7fcee3 100644
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -3620,6 +3620,10 @@ A very long line that must get split to something other than at the
s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
raises(errors.HeaderParseError, decode_header, s)
+ def test_shift_jis_charset(self):
+ h = Header('文', charset='shift_jis')
+ self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
+
# Test RFC 2231 header parameters (en/de)coding
diff --git a/Misc/NEWS b/Misc/NEWS
index 5dd389c..7ce7445 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -30,6 +30,9 @@ Core and Builtins
Library
-------
+- Issue #10790: email.header.Header.append's charset logic now works correctly
+ for charsets whose output codec is different from its input codec.
+
- Issue #10819: SocketIO.name property returns -1 when its closed, instead of
raising a ValueError, to fix repr().