From e5e366c856b677d568b4ee35194ed9b8c7d4bbb4 Mon Sep 17 00:00:00 2001 From: R David Murray Date: Sat, 18 Jun 2011 12:57:28 -0400 Subject: #11584: make Header and make_header handle binary unknown-8bit input Analogous to the decode_header fix, this fix makes Header.append and make_header correctly handle the unknown-8bit charset introduced by email5.1, when the input to them is binary strings. Previous to this fix the make_header(decode_header(x)) == x invariant was broken in the face of the unknown-8bit charset. --- Lib/email/header.py | 5 ++++- Lib/email/test/test_email.py | 15 +++++++++++++++ Misc/NEWS | 3 ++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/Lib/email/header.py b/Lib/email/header.py index 0670885..2e687b7 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -275,7 +275,10 @@ class Header: charset = Charset(charset) if not isinstance(s, str): input_charset = charset.input_codec or 'us-ascii' - s = s.decode(input_charset, errors) + if input_charset == _charset.UNKNOWN8BIT: + s = s.decode('us-ascii', 'surrogateescape') + else: + s = s.decode(input_charset, errors) # Ensure that the bytes we're storing can be decoded to the output # character set, otherwise an early error is thrown. output_charset = charset.output_codec or 'us-ascii' diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index 97a1e86..102e15b 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -4182,6 +4182,21 @@ A very long line that must get split to something other than at the 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) + def test_header_handles_binary_unknown8bit(self): + x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' + h = Header(x, charset=email.charset.UNKNOWN8BIT) + self.assertEqual(str(h), + 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') + self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) + + def test_make_header_handles_binary_unknown8bit(self): + x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' + h = Header(x, charset=email.charset.UNKNOWN8BIT) + h2 = email.header.make_header(email.header.decode_header(h)) + self.assertEqual(str(h2), + 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') + self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')]) + def test_modify_returned_list_does_not_change_header(self): h = Header('test') chunks = email.header.decode_header(h) diff --git a/Misc/NEWS b/Misc/NEWS index 9c3f693..0ec55b6 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -26,7 +26,8 @@ Library ------- - Issue #11584: email.header.decode_header no longer fails if the header - passed to it is a Header object. + passed to it is a Header object, and Header/make_header no longer fail + if given binary unknown-8bit input. - Issue #11700: mailbox proxy object close methods can now be called multiple times without error. -- cgit v0.12