From 1e949890f618867b7eabc1c08873611e960f5d03 Mon Sep 17 00:00:00 2001 From: R David Murray Date: Fri, 7 Feb 2014 15:02:19 -0500 Subject: #17369: Improve handling of broken RFC2231 values in get_filename. This fixes a regression relative to python2. --- Lib/email/utils.py | 4 ++++ Lib/test/test_email/test_email.py | 20 ++++++++++++++++++++ Misc/NEWS | 4 ++++ 3 files changed, 28 insertions(+) diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 93a625c..f76c21e 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -337,6 +337,10 @@ def collapse_rfc2231_value(value, errors='replace', # object. We do not want bytes() normal utf-8 decoder, we want a straight # interpretation of the string as character bytes. charset, language, text = value + if charset is None: + # Issue 17369: if charset/lang is None, decode_rfc2231 couldn't parse + # the value, so use the fallback_charset. + charset = fallback_charset rawbytes = bytes(text, 'raw-unicode-escape') try: return str(rawbytes, charset, errors) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index c787695..4157a06 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -5018,6 +5018,26 @@ Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\" self.assertNotIsInstance(param, tuple) self.assertEqual(param, "Frank's Document") + def test_rfc2231_missing_tick(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0*="'This%20is%20broken"; +''' + msg = email.message_from_string(m) + self.assertEqual( + msg.get_filename(), + "'This is broken") + + def test_rfc2231_missing_tick_with_encoded_non_ascii(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0*="'This%20is%E2broken"; +''' + msg = email.message_from_string(m) + self.assertEqual( + msg.get_filename(), + "'This is\ufffdbroken") + # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang def test_rfc2231_tick_attack_extended(self): eq = self.assertEqual diff --git a/Misc/NEWS b/Misc/NEWS index bf7781f..e663bfe 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -45,6 +45,10 @@ Core and Builtins Library ------- +- Issue #17369: get_filename was raising an exception if the filename + parameter's RFC2231 encoding was broken in certain ways. This was + a regression relative to python2. + - Issue #20013: Some imap servers disconnect if the current mailbox is deleted, and imaplib did not handle that case gracefully. Now it handles the 'bye' correctly. -- cgit v0.12