summaryrefslogtreecommitdiffstats
path: root/Lib/email
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2022-04-30 10:17:23 (GMT)
committerGitHub <noreply@github.com>2022-04-30 10:17:23 (GMT)
commite91dee87edcf6dee5dd78053004d76e5f05456d4 (patch)
tree39267dc065a5ce04396423837df258e7c808056b /Lib/email
parent3483299a24e41a7f2e958369cb3573d7c2253e33 (diff)
downloadcpython-e91dee87edcf6dee5dd78053004d76e5f05456d4.zip
cpython-e91dee87edcf6dee5dd78053004d76e5f05456d4.tar.gz
cpython-e91dee87edcf6dee5dd78053004d76e5f05456d4.tar.bz2
bpo-43323: Fix UnicodeEncodeError in the email module (GH-32137)
It was raised if the charset itself contains characters not encodable in UTF-8 (in particular \udcxx characters representing non-decodable bytes in the source).
Diffstat (limited to 'Lib/email')
-rw-r--r--Lib/email/_encoded_words.py10
-rw-r--r--Lib/email/_header_value_parser.py2
2 files changed, 6 insertions, 6 deletions
diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py
index 295ae7e..6795a60 100644
--- a/Lib/email/_encoded_words.py
+++ b/Lib/email/_encoded_words.py
@@ -179,15 +179,15 @@ def decode(ew):
# Turn the CTE decoded bytes into unicode.
try:
string = bstring.decode(charset)
- except UnicodeError:
+ except UnicodeDecodeError:
defects.append(errors.UndecodableBytesDefect("Encoded word "
- "contains bytes not decodable using {} charset".format(charset)))
+ f"contains bytes not decodable using {charset!r} charset"))
string = bstring.decode(charset, 'surrogateescape')
- except LookupError:
+ except (LookupError, UnicodeEncodeError):
string = bstring.decode('ascii', 'surrogateescape')
if charset.lower() != 'unknown-8bit':
- defects.append(errors.CharsetError("Unknown charset {} "
- "in encoded word; decoded as unknown bytes".format(charset)))
+ defects.append(errors.CharsetError(f"Unknown charset {charset!r} "
+ f"in encoded word; decoded as unknown bytes"))
return string, charset, lang, defects
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 51d355f..8a8fb8b 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -781,7 +781,7 @@ class MimeParameters(TokenList):
else:
try:
value = value.decode(charset, 'surrogateescape')
- except LookupError:
+ except (LookupError, UnicodeEncodeError):
# XXX: there should really be a custom defect for
# unknown character set to make it easy to find,
# because otherwise unknown charset is a silent