summaryrefslogtreecommitdiffstats
path: root/Lib/email/utils.py
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2013-10-17 02:48:40 (GMT)
committerR David Murray <rdmurray@bitdance.com>2013-10-17 02:48:40 (GMT)
commit3da240fd01e02a31e516847ba865a9f56d7cfcbc (patch)
tree056628df84b2e949fb793d75235e38766a16fbcc /Lib/email/utils.py
parent1a1628819779110854d4e1ab490db2cab80e26b4 (diff)
downloadcpython-3da240fd01e02a31e516847ba865a9f56d7cfcbc.zip
cpython-3da240fd01e02a31e516847ba865a9f56d7cfcbc.tar.gz
cpython-3da240fd01e02a31e516847ba865a9f56d7cfcbc.tar.bz2
#18891: Complete new provisional email API.
This adds EmailMessage and, MIMEPart subclasses of Message with new API methods, and a ContentManager class used by the new methods. Also a new policy setting, content_manager. Patch was reviewed by Stephen J. Turnbull and Serhiy Storchaka, and reflects their feedback. I will ideally add some examples of using the new API to the documentation before the final release.
Diffstat (limited to 'Lib/email/utils.py')
-rw-r--r--Lib/email/utils.py10
1 files changed, 7 insertions, 3 deletions
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index b3b42bb..25b0d56 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -68,9 +68,13 @@ def _has_surrogates(s):
# How to deal with a string containing bytes before handing it to the
# application through the 'normal' interface.
def _sanitize(string):
- # Turn any escaped bytes into unicode 'unknown' char.
- original_bytes = string.encode('ascii', 'surrogateescape')
- return original_bytes.decode('ascii', 'replace')
+ # Turn any escaped bytes into unicode 'unknown' char. If the escaped
+ # bytes happen to be utf-8 they will instead get decoded, even if they
+ # were invalid in the charset the source was supposed to be in. This
+ # seems like it is not a bad thing; a defect was still registered.
+ original_bytes = string.encode('utf-8', 'surrogateescape')
+ return original_bytes.decode('utf-8', 'replace')
+
# Helpers