summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2014-02-07 20:04:26 (GMT)
committerR David Murray <rdmurray@bitdance.com>2014-02-07 20:04:26 (GMT)
commitc489e83432ef29c9c2a638c4ca290b308e5921e4 (patch)
tree37b9499524794f5aba72cc3d4a5df59d09306609
parentf1e953364ca4ee2715cbeaf85cc67a445770951e (diff)
parent1e949890f618867b7eabc1c08873611e960f5d03 (diff)
downloadcpython-c489e83432ef29c9c2a638c4ca290b308e5921e4.zip
cpython-c489e83432ef29c9c2a638c4ca290b308e5921e4.tar.gz
cpython-c489e83432ef29c9c2a638c4ca290b308e5921e4.tar.bz2
Merge: #17369: Improve handling of broken RFC2231 values in get_filename.
-rw-r--r--Lib/email/utils.py4
-rw-r--r--Lib/test/test_email/test_email.py20
-rw-r--r--Misc/NEWS4
3 files changed, 28 insertions, 0 deletions
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index 25b0d56..95855d8 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -347,6 +347,10 @@ def collapse_rfc2231_value(value, errors='replace',
# object. We do not want bytes() normal utf-8 decoder, we want a straight
# interpretation of the string as character bytes.
charset, language, text = value
+ if charset is None:
+ # Issue 17369: if charset/lang is None, decode_rfc2231 couldn't parse
+ # the value, so use the fallback_charset.
+ charset = fallback_charset
rawbytes = bytes(text, 'raw-unicode-escape')
try:
return str(rawbytes, charset, errors)
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index ce91476..31fd83a 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -5052,6 +5052,26 @@ Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
self.assertNotIsInstance(param, tuple)
self.assertEqual(param, "Frank's Document")
+ def test_rfc2231_missing_tick(self):
+ m = '''\
+Content-Disposition: inline;
+\tfilename*0*="'This%20is%20broken";
+'''
+ msg = email.message_from_string(m)
+ self.assertEqual(
+ msg.get_filename(),
+ "'This is broken")
+
+ def test_rfc2231_missing_tick_with_encoded_non_ascii(self):
+ m = '''\
+Content-Disposition: inline;
+\tfilename*0*="'This%20is%E2broken";
+'''
+ msg = email.message_from_string(m)
+ self.assertEqual(
+ msg.get_filename(),
+ "'This is\ufffdbroken")
+
# test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
def test_rfc2231_tick_attack_extended(self):
eq = self.assertEqual
diff --git a/Misc/NEWS b/Misc/NEWS
index 828cceb..495b292 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -24,6 +24,10 @@ Core and Builtins
Library
-------
+- Issue #17369: get_filename was raising an exception if the filename
+ parameter's RFC2231 encoding was broken in certain ways. This was
+ a regression relative to python2.
+
- Issue #20013: Some imap servers disconnect if the current mailbox is
deleted, and imaplib did not handle that case gracefully. Now it
handles the 'bye' correctly.