diff options
author | Barry Warsaw <barry@python.org> | 2006-07-26 05:54:46 (GMT) |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2006-07-26 05:54:46 (GMT) |
commit | d92ae78bdbab63a68e88fb561a2bc9555d8cef6c (patch) | |
tree | 309a26b8b0749aac42b3b947db4df0cebfbe3258 /Lib/email/message.py | |
parent | 9815f8b25238e22fc14f8305b0bb53711bbb3de6 (diff) | |
download | cpython-d92ae78bdbab63a68e88fb561a2bc9555d8cef6c.zip cpython-d92ae78bdbab63a68e88fb561a2bc9555d8cef6c.tar.gz cpython-d92ae78bdbab63a68e88fb561a2bc9555d8cef6c.tar.bz2 |
Forward port some fixes that were in email 2.5 but for some reason didn't make
it into email 4.0. Specifically, in Message.get_content_charset(), handle RFC
2231 headers that contain an encoding not known to Python, or a character in
the data that isn't in the charset encoding. Also forward port the
appropriate unit tests.
Diffstat (limited to 'Lib/email/message.py')
-rw-r--r-- | Lib/email/message.py | 13 |
1 files changed, 12 insertions, 1 deletions
diff --git a/Lib/email/message.py b/Lib/email/message.py index 50d90b4..79c5c4c 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -747,7 +747,18 @@ class Message: if isinstance(charset, tuple): # RFC 2231 encoded, so decode it, and it better end up as ascii. pcharset = charset[0] or 'us-ascii' - charset = unicode(charset[2], pcharset).encode('us-ascii') + try: + # LookupError will be raised if the charset isn't known to + # Python. UnicodeError will be raised if the encoded text + # contains a character not in the charset. + charset = unicode(charset[2], pcharset).encode('us-ascii') + except (LookupError, UnicodeError): + charset = charset[2] + # charset character must be in us-ascii range + try: + charset = unicode(charset, 'us-ascii').encode('us-ascii') + except UnicodeError: + return failobj # RFC 2046, $4.1.2 says charsets are not case sensitive return charset.lower() |