diff options
author | Barry Warsaw <barry@python.org> | 2002-04-10 21:01:31 (GMT) |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2002-04-10 21:01:31 (GMT) |
commit | 409a4c08b545aa064cf8fe3b8de51404756a301e (patch) | |
tree | 06cf8fe44e1fe28fbc0147635ec41961f2df6515 /Lib/email/Utils.py | |
parent | 68e69338ae19c37bd3e69cb76e107bfa76231e06 (diff) | |
download | cpython-409a4c08b545aa064cf8fe3b8de51404756a301e.zip cpython-409a4c08b545aa064cf8fe3b8de51404756a301e.tar.gz cpython-409a4c08b545aa064cf8fe3b8de51404756a301e.tar.bz2 |
Sync'ing with standalone email package 2.0.1. This adds support for
non-us-ascii character sets in headers and bodies. Some API changes
(with DeprecationWarnings for the old APIs). Better RFC-compliant
implementations of base64 and quoted-printable.
Updated test cases. Documentation updates to follow (after I finish
writing them ;).
Diffstat (limited to 'Lib/email/Utils.py')
-rw-r--r-- | Lib/email/Utils.py | 143 |
1 files changed, 117 insertions, 26 deletions
diff --git a/Lib/email/Utils.py b/Lib/email/Utils.py index 3d48287..887be55 100644 --- a/Lib/email/Utils.py +++ b/Lib/email/Utils.py @@ -1,16 +1,26 @@ -# Copyright (C) 2001 Python Software Foundation +# Copyright (C) 2001,2002 Python Software Foundation # Author: barry@zope.com (Barry Warsaw) """Miscellaneous utilities. """ import time +import socket import re +import random +import os +import warnings +from cStringIO import StringIO +from types import ListType -from rfc822 import unquote, quote, parseaddr -from rfc822 import dump_address_pair +from rfc822 import unquote, quote from rfc822 import AddrlistClass as _AddrlistClass -from rfc822 import parsedate_tz, parsedate, mktime_tz +from rfc822 import mktime_tz + +# We need wormarounds for bugs in these methods in older Pythons (see below) +from rfc822 import parsedate as _parsedate +from rfc822 import parsedate_tz as _parsedate_tz +from rfc822 import parseaddr as _parseaddr from quopri import decodestring as _qdecode import base64 @@ -20,6 +30,10 @@ from Encoders import _bencode, _qencode COMMASPACE = ', ' UEMPTYSTRING = u'' +CRLF = '\r\n' + +specialsre = re.compile(r'[][\()<>@,:;".]') +escapesre = re.compile(r'[][\()"]') @@ -44,6 +58,41 @@ def _bdecode(s): +def fix_eols(s): + """Replace all line-ending characters with \r\n.""" + # Fix newlines with no preceding carriage return + s = re.sub(r'(?<!\r)\n', CRLF, s) + # Fix carriage returns with no following newline + s = re.sub(r'\r(?!\n)', CRLF, s) + return s + + + +def formataddr(pair): + """The inverse of parseaddr(), this takes a 2-tuple of the form + (realname, email_address) and returns the string value suitable + for an RFC 2822 From:, To: or Cc:. + + If the first element of pair is false, then the second element is + returned unmodified. + """ + name, address = pair + if name: + quotes = '' + if specialsre.search(name): + quotes = '"' + name = escapesre.sub(r'\\\g<0>', name) + return '%s%s%s <%s>' % (quotes, name, quotes, address) + return address + +# For backwards compatibility +def dump_address_pair(pair): + warnings.warn('Use email.Utils.formataddr() instead', + DeprecationWarning, 2) + return formataddr(pair) + + + def getaddresses(fieldvalues): """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" all = COMMASPACE.join(fieldvalues) @@ -64,30 +113,26 @@ ecre = re.compile(r''' def decode(s): - """Return a decoded string according to RFC 2047, as a unicode string.""" + """Return a decoded string according to RFC 2047, as a unicode string. + + NOTE: This function is deprecated. Use Header.decode_header() instead. + """ + warnings.warn('Use Header.decode_header() instead.', DeprecationWarning, 2) + # Intra-package import here to avoid circular import problems. + from Header import decode_header + L = decode_header(s) + if not isinstance(L, ListType): + # s wasn't decoded + return s + rtn = [] - parts = ecre.split(s, 1) - while parts: - # If there are less than 4 parts, it can't be encoded and we're done - if len(parts) < 5: - rtn.extend(parts) - break - # The first element is any non-encoded leading text - rtn.append(parts[0]) - charset = parts[1] - encoding = parts[2].lower() - atom = parts[3] - # The next chunk to decode should be in parts[4] - parts = ecre.split(parts[4]) - # The encoding must be either `q' or `b', case-insensitive - if encoding == 'q': - func = _qdecode - elif encoding == 'b': - func = _bdecode + for atom, charset in L: + if charset is None: + rtn.append(atom) else: - func = _identity - # Decode and get the unicode in the charset - rtn.append(unicode(func(atom), charset)) + # Convert the string to Unicode using the given encoding. Leave + # Unicode conversion errors to strict. + rtn.append(unicode(atom, charset)) # Now that we've decoded everything, we just need to join all the parts # together into the final string. return UEMPTYSTRING.join(rtn) @@ -96,6 +141,7 @@ def decode(s): def encode(s, charset='iso-8859-1', encoding='q'): """Encode a string according to RFC 2047.""" + warnings.warn('Use Header.Header.encode() instead.', DeprecationWarning, 2) encoding = encoding.lower() if encoding == 'q': estr = _qencode(s) @@ -150,3 +196,48 @@ def formatdate(timeval=None, localtime=0): 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1], now[0], now[3], now[4], now[5], zone) + + + +def make_msgid(idstring=None): + """Returns a string suitable for RFC 2822 compliant Message-ID:, e.g: + + <20020201195627.33539.96671@nightshade.la.mastaler.com> + + Optional idstring if given is a string used to strengthen the + uniqueness of the Message-ID, otherwise an empty string is used. + """ + timeval = time.time() + utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval)) + pid = os.getpid() + randint = random.randrange(100000) + if idstring is None: + idstring = '' + else: + idstring = '.' + idstring + idhost = socket.getfqdn() + msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost) + return msgid + + + +# These functions are in the standalone mimelib version only because they've +# subsequently been fixed in the latest Python versions. We use this to worm +# around broken older Pythons. +def parsedate(data): + if not data: + return None + return _parsedate(data) + + +def parsedate_tz(data): + if not data: + return None + return _parsedate_tz(data) + + +def parseaddr(addr): + realname, emailaddr = _parseaddr(addr) + if realname == '' and emailaddr is None: + return '', '' + return realname, emailaddr |