diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2024-01-10 12:54:36 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-10 12:54:36 (GMT) |
commit | e9d5b6ea2d68564f176fdf70c2d7028e060c62b5 (patch) | |
tree | c309da0c1f4a51289b8c7ff6f581c68783c08329 /Lib/email | |
parent | 568d220993fa9b4b812ff1b425edd80dbe17dda9 (diff) | |
download | cpython-e9d5b6ea2d68564f176fdf70c2d7028e060c62b5.zip cpython-e9d5b6ea2d68564f176fdf70c2d7028e060c62b5.tar.gz cpython-e9d5b6ea2d68564f176fdf70c2d7028e060c62b5.tar.bz2 |
gh-113594: Fix UnicodeEncodeError in TokenList.fold() (GH-113730)
It occurred when try to re-encode an unknown-8bit part combined with non-unknown-8bit part.
Diffstat (limited to 'Lib/email')
-rw-r--r-- | Lib/email/_header_value_parser.py | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 0d6bd81..5b653f6 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2766,6 +2766,7 @@ def _refold_parse_tree(parse_tree, *, policy): encoding = 'utf-8' if policy.utf8 else 'us-ascii' lines = [''] last_ew = None + last_charset = None wrap_as_ew_blocked = 0 want_encoding = False end_ew_not_allowed = Terminal('', 'wrap_as_ew_blocked') @@ -2820,8 +2821,14 @@ def _refold_parse_tree(parse_tree, *, policy): else: # It's a terminal, wrap it as an encoded word, possibly # combining it with previously encoded words if allowed. + if (last_ew is not None and + charset != last_charset and + (last_charset == 'unknown-8bit' or + last_charset == 'utf-8' and charset != 'us-ascii')): + last_ew = None last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew, part.ew_combine_allowed, charset) + last_charset = charset want_encoding = False continue if len(tstr) <= maxlen - len(lines[-1]): |