summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2011-06-18 17:02:42 (GMT)
committerR David Murray <rdmurray@bitdance.com>2011-06-18 17:02:42 (GMT)
commite76ff4081aeabf2da9b04c70f031e34e53933110 (patch)
treec5dca5bc8589411507a01161af40e557e18b5869 /Lib
parent7df08379c65b623df96a534fdacd7302c6a24476 (diff)
parente5e366c856b677d568b4ee35194ed9b8c7d4bbb4 (diff)
downloadcpython-e76ff4081aeabf2da9b04c70f031e34e53933110.zip
cpython-e76ff4081aeabf2da9b04c70f031e34e53933110.tar.gz
cpython-e76ff4081aeabf2da9b04c70f031e34e53933110.tar.bz2
merge #11584: make Header and make_header handle binary unknown-8bit input
Diffstat (limited to 'Lib')
-rw-r--r--Lib/email/header.py5
-rw-r--r--Lib/test/test_email/test_email.py15
2 files changed, 19 insertions, 1 deletions
diff --git a/Lib/email/header.py b/Lib/email/header.py
index 0670885..2e687b7 100644
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -275,7 +275,10 @@ class Header:
charset = Charset(charset)
if not isinstance(s, str):
input_charset = charset.input_codec or 'us-ascii'
- s = s.decode(input_charset, errors)
+ if input_charset == _charset.UNKNOWN8BIT:
+ s = s.decode('us-ascii', 'surrogateescape')
+ else:
+ s = s.decode(input_charset, errors)
# Ensure that the bytes we're storing can be decoded to the output
# character set, otherwise an early error is thrown.
output_charset = charset.output_codec or 'us-ascii'
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index aad565c..17451f3 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -4330,6 +4330,21 @@ A very long line that must get split to something other than at the
'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
+ def test_header_handles_binary_unknown8bit(self):
+ x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
+ h = Header(x, charset=email.charset.UNKNOWN8BIT)
+ self.assertEqual(str(h),
+ 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
+ self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
+
+ def test_make_header_handles_binary_unknown8bit(self):
+ x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
+ h = Header(x, charset=email.charset.UNKNOWN8BIT)
+ h2 = email.header.make_header(email.header.decode_header(h))
+ self.assertEqual(str(h2),
+ 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
+ self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
+
def test_modify_returned_list_does_not_change_header(self):
h = Header('test')
chunks = email.header.decode_header(h)