diff options
author | R David Murray <rdmurray@bitdance.com> | 2014-02-07 20:02:19 (GMT) |
---|---|---|
committer | R David Murray <rdmurray@bitdance.com> | 2014-02-07 20:02:19 (GMT) |
commit | 1e949890f618867b7eabc1c08873611e960f5d03 (patch) | |
tree | 04554726253eecaf972503d7f8660a93a2539f98 | |
parent | bd3a11ba34ff57e1a376d6a6eff2e636bdee2160 (diff) | |
download | cpython-1e949890f618867b7eabc1c08873611e960f5d03.zip cpython-1e949890f618867b7eabc1c08873611e960f5d03.tar.gz cpython-1e949890f618867b7eabc1c08873611e960f5d03.tar.bz2 |
#17369: Improve handling of broken RFC2231 values in get_filename.
This fixes a regression relative to python2.
-rw-r--r-- | Lib/email/utils.py | 4 | ||||
-rw-r--r-- | Lib/test/test_email/test_email.py | 20 | ||||
-rw-r--r-- | Misc/NEWS | 4 |
3 files changed, 28 insertions, 0 deletions
diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 93a625c..f76c21e 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -337,6 +337,10 @@ def collapse_rfc2231_value(value, errors='replace', # object. We do not want bytes() normal utf-8 decoder, we want a straight # interpretation of the string as character bytes. charset, language, text = value + if charset is None: + # Issue 17369: if charset/lang is None, decode_rfc2231 couldn't parse + # the value, so use the fallback_charset. + charset = fallback_charset rawbytes = bytes(text, 'raw-unicode-escape') try: return str(rawbytes, charset, errors) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index c787695..4157a06 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -5018,6 +5018,26 @@ Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\" self.assertNotIsInstance(param, tuple) self.assertEqual(param, "Frank's Document") + def test_rfc2231_missing_tick(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0*="'This%20is%20broken"; +''' + msg = email.message_from_string(m) + self.assertEqual( + msg.get_filename(), + "'This is broken") + + def test_rfc2231_missing_tick_with_encoded_non_ascii(self): + m = '''\ +Content-Disposition: inline; +\tfilename*0*="'This%20is%E2broken"; +''' + msg = email.message_from_string(m) + self.assertEqual( + msg.get_filename(), + "'This is\ufffdbroken") + # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang def test_rfc2231_tick_attack_extended(self): eq = self.assertEqual @@ -45,6 +45,10 @@ Core and Builtins Library ------- +- Issue #17369: get_filename was raising an exception if the filename + parameter's RFC2231 encoding was broken in certain ways. This was + a regression relative to python2. + - Issue #20013: Some imap servers disconnect if the current mailbox is deleted, and imaplib did not handle that case gracefully. Now it handles the 'bye' correctly. |