summaryrefslogtreecommitdiffstats
path: root/Lib/test
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2024-01-10 13:21:50 (GMT)
committerGitHub <noreply@github.com>2024-01-10 13:21:50 (GMT)
commit84677ff19c0ffdc328203cdb25d16053e9c5a910 (patch)
tree42aa3119f1ee7c5f165a9391faa7850b7a85cd6f /Lib/test
parent082998c3af5bfd163da5c0198cd10acf588a44ba (diff)
downloadcpython-84677ff19c0ffdc328203cdb25d16053e9c5a910.zip
cpython-84677ff19c0ffdc328203cdb25d16053e9c5a910.tar.gz
cpython-84677ff19c0ffdc328203cdb25d16053e9c5a910.tar.bz2
[3.12] gh-113594: Fix UnicodeEncodeError in TokenList.fold() (GH-113730) (GH-113907)
It occurred when try to re-encode an unknown-8bit part combined with non-unknown-8bit part. (cherry picked from commit e9d5b6ea2d68564f176fdf70c2d7028e060c62b5) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Diffstat (limited to 'Lib/test')
-rw-r--r--Lib/test/test_email/test__header_value_parser.py39
1 files changed, 39 insertions, 0 deletions
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
index 854f2ff..bdb0e55 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -2915,6 +2915,45 @@ class TestFolding(TestEmailBase):
"mich. And that's\n"
" all I'm sayin.\n")
+ def test_unicode_after_unknown_not_combined(self):
+ self._test(parser.get_unstructured("=?unknown-8bit?q?=A4?=\xa4"),
+ "=?unknown-8bit?q?=A4?==?utf-8?q?=C2=A4?=\n")
+ prefix = "0123456789 "*5
+ self._test(parser.get_unstructured(prefix + "=?unknown-8bit?q?=A4?=\xa4"),
+ prefix + "=?unknown-8bit?q?=A4?=\n =?utf-8?q?=C2=A4?=\n")
+
+ def test_ascii_after_unknown_not_combined(self):
+ self._test(parser.get_unstructured("=?unknown-8bit?q?=A4?=abc"),
+ "=?unknown-8bit?q?=A4?=abc\n")
+ prefix = "0123456789 "*5
+ self._test(parser.get_unstructured(prefix + "=?unknown-8bit?q?=A4?=abc"),
+ prefix + "=?unknown-8bit?q?=A4?=\n =?utf-8?q?abc?=\n")
+
+ def test_unknown_after_unicode_not_combined(self):
+ self._test(parser.get_unstructured("\xa4"
+ "=?unknown-8bit?q?=A4?="),
+ "=?utf-8?q?=C2=A4?==?unknown-8bit?q?=A4?=\n")
+ prefix = "0123456789 "*5
+ self._test(parser.get_unstructured(prefix + "\xa4=?unknown-8bit?q?=A4?="),
+ prefix + "=?utf-8?q?=C2=A4?=\n =?unknown-8bit?q?=A4?=\n")
+
+ def test_unknown_after_ascii_not_combined(self):
+ self._test(parser.get_unstructured("abc"
+ "=?unknown-8bit?q?=A4?="),
+ "abc=?unknown-8bit?q?=A4?=\n")
+ prefix = "0123456789 "*5
+ self._test(parser.get_unstructured(prefix + "abcd=?unknown-8bit?q?=A4?="),
+ prefix + "abcd\n =?unknown-8bit?q?=A4?=\n")
+
+ def test_unknown_after_unknown(self):
+ self._test(parser.get_unstructured("=?unknown-8bit?q?=C2?="
+ "=?unknown-8bit?q?=A4?="),
+ "=?unknown-8bit?q?=C2=A4?=\n")
+ prefix = "0123456789 "*5
+ self._test(parser.get_unstructured(prefix + "=?unknown-8bit?q?=C2?="
+ "=?unknown-8bit?q?=A4?="),
+ prefix + "=?unknown-8bit?q?=C2?=\n =?unknown-8bit?q?=A4?=\n")
+
# XXX Need test of an encoded word so long that it needs to be wrapped
def test_simple_address(self):