diff options
author | RanKKI <hliu86.me@gmail.com> | 2025-01-06 01:32:16 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-01-06 01:32:16 (GMT) |
commit | a62ba52f1439c1f878a3ff9b8544caf9aeef9b90 (patch) | |
tree | 1497bc9199ac370221eff8d042cdf4ff17e5d9a6 /Lib/test | |
parent | 3b231be8f000ae59faa04d5a2f1af11beafee866 (diff) | |
download | cpython-a62ba52f1439c1f878a3ff9b8544caf9aeef9b90.zip cpython-a62ba52f1439c1f878a3ff9b8544caf9aeef9b90.tar.gz cpython-a62ba52f1439c1f878a3ff9b8544caf9aeef9b90.tar.bz2 |
gh-98188: Fix EmailMessage.get_payload to decode data when CTE value has extra text (#127547)
Up to this point message handling has been very strict with regards to content encoding values: mixed case was accepted, but trailing blanks or other text would cause decoding failure, even if the first token was a valid encoding. By Postel's Rule we should go ahead and decode as long as we can recognize that first token. We have not thought of any security or backward compatibility concerns with this fix.
This fix does introduce a new technique/pattern to the Message code: we look to see if the header has a 'cte' attribute, and if so we use that. This effectively promotes the header API exposed by HeaderRegistry to an API that any header parser "should" support. This seems like a reasonable thing to do. It is not, however, a requirement, as the string value of the header is still used if there is no cte attribute.
The full fix (ignore any trailing blanks or blank-separated trailing text) applies only to the non-compat32 API. compat32 is only fixed to the extent that it now ignores trailing spaces. Note that the HeaderRegistry parsing still records a HeaderDefect if there is extra text.
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
Diffstat (limited to 'Lib/test')
-rw-r--r-- | Lib/test/test_email/test_email.py | 44 | ||||
-rw-r--r-- | Lib/test/test_email/test_headerregistry.py | 5 |
2 files changed, 49 insertions, 0 deletions
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index abe9ef2..2deb357 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -810,6 +810,16 @@ class TestMessageAPI(TestEmailBase): w4kgdGVzdGFiYwo= """)) + def test_string_payload_with_base64_cte(self): + msg = email.message_from_string(textwrap.dedent("""\ + Content-Transfer-Encoding: base64 + + SGVsbG8uIFRlc3Rpbmc= + """), policy=email.policy.default) + self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing") + self.assertDefectsEqual(msg['content-transfer-encoding'].defects, []) + + # Test the email.encoders module class TestEncoders(unittest.TestCase): @@ -2352,6 +2362,40 @@ counter to RFC 2822, there's no separating newline here self.assertDefectsEqual(msg.defects, [errors.MissingHeaderBodySeparatorDefect]) + def test_string_payload_with_extra_space_after_cte(self): + # https://github.com/python/cpython/issues/98188 + cte = "base64 " + msg = email.message_from_string(textwrap.dedent(f"""\ + Content-Transfer-Encoding: {cte} + + SGVsbG8uIFRlc3Rpbmc= + """), policy=email.policy.default) + self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing") + self.assertDefectsEqual(msg['content-transfer-encoding'].defects, []) + + def test_string_payload_with_extra_text_after_cte(self): + msg = email.message_from_string(textwrap.dedent("""\ + Content-Transfer-Encoding: base64 some text + + SGVsbG8uIFRlc3Rpbmc= + """), policy=email.policy.default) + self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing") + cte = msg['content-transfer-encoding'] + self.assertDefectsEqual(cte.defects, [email.errors.InvalidHeaderDefect]) + + def test_string_payload_with_extra_space_after_cte_compat32(self): + cte = "base64 " + msg = email.message_from_string(textwrap.dedent(f"""\ + Content-Transfer-Encoding: {cte} + + SGVsbG8uIFRlc3Rpbmc= + """), policy=email.policy.compat32) + pasted_cte = msg['content-transfer-encoding'] + self.assertEqual(pasted_cte, cte) + self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing") + self.assertDefectsEqual(msg.defects, []) + + # Test RFC 2047 header encoding and decoding class TestRFC2047(TestEmailBase): diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index 4c0523f..ff7a6da 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -837,6 +837,11 @@ class TestContentTransferEncoding(TestHeaderBase): '7bit', [errors.InvalidHeaderDefect]), + 'extra_space_after_cte': ( + 'base64 ', + 'base64', + []), + } |