summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2013-02-05 15:49:49 (GMT)
committerR David Murray <rdmurray@bitdance.com>2013-02-05 15:49:49 (GMT)
commitf581b372003de0ae604c14a1f1dc2e8c36ea277b (patch)
treeed6202c27c0c8f2fb739acdfa6e3b6eaaed620d2
parent43536e9e373f395a047403831c08acedf3c5f258 (diff)
downloadcpython-f581b372003de0ae604c14a1f1dc2e8c36ea277b.zip
cpython-f581b372003de0ae604c14a1f1dc2e8c36ea277b.tar.gz
cpython-f581b372003de0ae604c14a1f1dc2e8c36ea277b.tar.bz2
#16948: Fix quopri encoding of non-latin1 character sets.
-rw-r--r--Lib/email/charset.py13
-rw-r--r--Lib/email/test/test_email.py21
-rw-r--r--Misc/NEWS4
3 files changed, 38 insertions, 0 deletions
diff --git a/Lib/email/charset.py b/Lib/email/charset.py
index f22be2c..c106649 100644
--- a/Lib/email/charset.py
+++ b/Lib/email/charset.py
@@ -392,6 +392,19 @@ class Charset:
string = string.encode(self.output_charset)
return email.base64mime.body_encode(string)
elif self.body_encoding is QP:
+ # quopromime.body_encode takes a string, but operates on it as if
+ # it were a list of byte codes. For a (minimal) history on why
+ # this is so, see changeset 0cf700464177. To correctly encode a
+ # character set, then, we must turn it into pseudo bytes via the
+ # latin1 charset, which will encode any byte as a single code point
+ # between 0 and 255, which is what body_encode is expecting.
+ #
+ # Note that this clause doesn't handle the case of a _payload that
+ # is already bytes. It never did, and the semantics of _payload
+ # being bytes has never been nailed down, so fixing that is a
+ # longer term TODO.
+ if isinstance(string, str):
+ string = string.encode(self.output_charset).decode('latin1')
return email.quoprimime.body_encode(string)
else:
if isinstance(string, str):
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
index 352b9b1..2fa4aa8 100644
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -670,6 +670,27 @@ class TestEncoders(unittest.TestCase):
msg = MIMEText('文', _charset='euc-jp')
eq(msg['content-transfer-encoding'], '7bit')
+ def test_qp_encode_latin1(self):
+ msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
+ self.assertEqual(str(msg), textwrap.dedent("""\
+ MIME-Version: 1.0
+ Content-Type: text/text; charset="iso-8859-1"
+ Content-Transfer-Encoding: quoted-printable
+
+ =E1=F6
+ """))
+
+ def test_qp_encode_non_latin1(self):
+ # Issue 16948
+ msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
+ self.assertEqual(str(msg), textwrap.dedent("""\
+ MIME-Version: 1.0
+ Content-Type: text/text; charset="iso-8859-2"
+ Content-Transfer-Encoding: quoted-printable
+
+ =BF
+ """))
+
# Test long header wrapping
class TestLongHeaders(TestEmailBase):
diff --git a/Misc/NEWS b/Misc/NEWS
index 07a33ce..b7eabd7 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -212,6 +212,10 @@ Core and Builtins
Library
-------
+
+- Issue #16948: Fix quoted printable body encoding for non-latin1 character
+ sets in the email package.
+
- Issue #17089: Expat parser now correctly works with string input not only when
an internal XML encoding is UTF-8 or US-ASCII. It now accepts bytes and
strings larger than 2 GiB.