diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2022-04-30 10:17:23 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-30 10:17:23 (GMT) |
commit | e91dee87edcf6dee5dd78053004d76e5f05456d4 (patch) | |
tree | 39267dc065a5ce04396423837df258e7c808056b /Lib/email | |
parent | 3483299a24e41a7f2e958369cb3573d7c2253e33 (diff) | |
download | cpython-e91dee87edcf6dee5dd78053004d76e5f05456d4.zip cpython-e91dee87edcf6dee5dd78053004d76e5f05456d4.tar.gz cpython-e91dee87edcf6dee5dd78053004d76e5f05456d4.tar.bz2 |
bpo-43323: Fix UnicodeEncodeError in the email module (GH-32137)
It was raised if the charset itself contains characters not encodable
in UTF-8 (in particular \udcxx characters representing non-decodable
bytes in the source).
Diffstat (limited to 'Lib/email')
-rw-r--r-- | Lib/email/_encoded_words.py | 10 | ||||
-rw-r--r-- | Lib/email/_header_value_parser.py | 2 |
2 files changed, 6 insertions, 6 deletions
diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py index 295ae7e..6795a60 100644 --- a/Lib/email/_encoded_words.py +++ b/Lib/email/_encoded_words.py @@ -179,15 +179,15 @@ def decode(ew): # Turn the CTE decoded bytes into unicode. try: string = bstring.decode(charset) - except UnicodeError: + except UnicodeDecodeError: defects.append(errors.UndecodableBytesDefect("Encoded word " - "contains bytes not decodable using {} charset".format(charset))) + f"contains bytes not decodable using {charset!r} charset")) string = bstring.decode(charset, 'surrogateescape') - except LookupError: + except (LookupError, UnicodeEncodeError): string = bstring.decode('ascii', 'surrogateescape') if charset.lower() != 'unknown-8bit': - defects.append(errors.CharsetError("Unknown charset {} " - "in encoded word; decoded as unknown bytes".format(charset))) + defects.append(errors.CharsetError(f"Unknown charset {charset!r} " + f"in encoded word; decoded as unknown bytes")) return string, charset, lang, defects diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 51d355f..8a8fb8b 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -781,7 +781,7 @@ class MimeParameters(TokenList): else: try: value = value.decode(charset, 'surrogateescape') - except LookupError: + except (LookupError, UnicodeEncodeError): # XXX: there should really be a custom defect for # unknown character set to make it easy to find, # because otherwise unknown charset is a silent |