diff options
Diffstat (limited to 'Lib/email/Utils.py')
-rw-r--r-- | Lib/email/Utils.py | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/Lib/email/Utils.py b/Lib/email/Utils.py new file mode 100644 index 0000000..6bbf2d3 --- /dev/null +++ b/Lib/email/Utils.py @@ -0,0 +1,104 @@ +# Copyright (C) 2001 Python Software Foundation +# Author: barry@zope.com (Barry Warsaw) + +"""Miscellaneous utilities. +""" + +import re + +from rfc822 import unquote, quote, parseaddr +from rfc822 import dump_address_pair +from rfc822 import AddrlistClass as _AddrlistClass +from rfc822 import parsedate_tz, parsedate, mktime_tz, formatdate + +from quopri import decodestring as _qdecode +import base64 + +# Intrapackage imports +from Encoders import _bencode, _qencode + +COMMASPACE = ', ' +UEMPTYSTRING = u'' + + + +# Helpers + +def _identity(s): + return s + + +def _bdecode(s): + if not s: + return s + # We can't quite use base64.encodestring() since it tacks on a "courtesy + # newline". Blech! + if not s: + return s + hasnewline = (s[-1] == '\n') + value = base64.decodestring(s) + if not hasnewline and value[-1] == '\n': + return value[:-1] + return value + + + +def getaddresses(fieldvalues): + """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" + all = COMMASPACE.join(fieldvalues) + a = _AddrlistClass(all) + return a.getaddrlist() + + + +ecre = re.compile(r''' + =\? # literal =? + (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset + \? # literal ? + (?P<encoding>[qb]) # either a "q" or a "b", case insensitive + \? # literal ? + (?P<atom>.*?) # non-greedy up to the next ?= is the atom + \?= # literal ?= + ''', re.VERBOSE | re.IGNORECASE) + + +def decode(s): + """Return a decoded string according to RFC 2047, as a unicode string.""" + rtn = [] + parts = ecre.split(s, 1) + while parts: + # If there are less than 4 parts, it can't be encoded and we're done + if len(parts) < 5: + rtn.extend(parts) + break + # The first element is any non-encoded leading text + rtn.append(parts[0]) + charset = parts[1] + encoding = parts[2] + atom = parts[3] + # The next chunk to decode should be in parts[4] + parts = ecre.split(parts[4]) + # The encoding must be either `q' or `b', case-insensitive + if encoding.lower() == 'q': + func = _qdecode + elif encoding.lower() == 'b': + func = _bdecode + else: + func = _identity + # Decode and get the unicode in the charset + rtn.append(unicode(func(atom), charset)) + # Now that we've decoded everything, we just need to join all the parts + # together into the final string. + return UEMPTYSTRING.join(rtn) + + + +def encode(s, charset='iso-8859-1', encoding='q'): + """Encode a string according to RFC 2047.""" + if encoding.lower() == 'q': + estr = _qencode(s) + elif encoding.lower() == 'b': + estr = _bencode(s) + else: + raise ValueError, 'Illegal encoding code: ' + encoding + return '=?%s?%s?%s?=' % (charset.lower(), encoding.lower(), estr) |