From 875048bd4c95ae90c3e541cad681b11436ce1f3f Mon Sep 17 00:00:00 2001 From: R David Murray Date: Wed, 20 Jul 2011 11:41:21 -0400 Subject: #665194: support roundtripping RFC2822 date stamps in the email.utils module --- Doc/library/email.util.rst | 28 ++++++++++++++++++++++++ Lib/email/_parseaddr.py | 19 +++++++++++++++- Lib/email/utils.py | 46 +++++++++++++++++++++++++++++++++------ Lib/test/test_email/test_utils.py | 45 ++++++++++++++++++++++++++++++++++++++ Misc/NEWS | 3 +++ 5 files changed, 133 insertions(+), 8 deletions(-) create mode 100644 Lib/test/test_email/test_utils.py diff --git a/Doc/library/email.util.rst b/Doc/library/email.util.rst index 4d96857..2f9ef89 100644 --- a/Doc/library/email.util.rst +++ b/Doc/library/email.util.rst @@ -81,6 +81,20 @@ There are several useful utilities provided in the :mod:`email.utils` module: indexes 6, 7, and 8 of the result tuple are not usable. +.. function:: parsedate_to_datetime(date) + + The inverse of :func:`format_datetime`. Performs the same function as + :func:`parsedate`, but on success returns a :mod:`~datetime.datetime`. If + the input date has a timezone of ``-0000``, the ``datetime`` will be a naive + ``datetime``, and if the date is conforming to the RFCs it will represent a + time in UTC but with no indication of the actual source timezone of the + message the date comes from. If the input date has any other valid timezone + offset, the ``datetime`` will be an aware ``datetime`` with the + corresponding a :class:`~datetime.timezone` :class:`~datetime.tzinfo`. + + .. versionadded:: 3.3 + + .. function:: mktime_tz(tuple) Turn a 10-tuple as returned by :func:`parsedate_tz` into a UTC timestamp. It @@ -112,6 +126,20 @@ There are several useful utilities provided in the :mod:`email.utils` module: ``False``. The default is ``False``. +.. function:: format_datetime(dt, usegmt=False) + + Like ``formatdate``, but the input is a :mod:`datetime` instance. If it is + a naive datetime, it is assumed to be "UTC with no information about the + source timezone", and the conventional ``-0000`` is used for the timezone. + If it is an aware ``datetime``, then the numeric timezone offset is used. + If it is an aware timezone with offset zero, then *usegmt* may be set to + ``True``, in which case the string ``GMT`` is used instead of the numeric + timezone offset. This provides a way to generate standards conformant HTTP + date headers. + + .. versionadded:: 3.3 + + .. function:: make_msgid(idstring=None, domain=None) Returns a string suitable for an :rfc:`2822`\ -compliant diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py index 4b2f5c6..c455e05 100644 --- a/Lib/email/_parseaddr.py +++ b/Lib/email/_parseaddr.py @@ -47,6 +47,21 @@ def parsedate_tz(data): Accounts for military timezones. """ + res = _parsedate_tz(data) + if res[9] is None: + res[9] = 0 + return tuple(res) + +def _parsedate_tz(data): + """Convert date to extended time tuple. + + The last (additional) element is the time zone offset in seconds, except if + the timezone was specified as -0000. In that case the last element is + None. This indicates a UTC timestamp that explicitly declaims knowledge of + the source timezone, as opposed to a +0000 timestamp that indicates the + source timezone really was UTC. + + """ data = data.split() # The FWS after the comma after the day-of-week is optional, so search and # adjust for this. @@ -138,6 +153,8 @@ def parsedate_tz(data): tzoffset = int(tz) except ValueError: pass + if tzoffset==0 and tz.startswith('-'): + tzoffset = None # Convert a timezone offset into seconds ; -0500 -> -18000 if tzoffset: if tzoffset < 0: @@ -147,7 +164,7 @@ def parsedate_tz(data): tzsign = 1 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) # Daylight Saving Time flag is set to -1, since DST is unknown. - return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset + return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset] def parsedate(data): diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 82f7283..aecea65 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -11,12 +11,14 @@ __all__ = [ 'encode_rfc2231', 'formataddr', 'formatdate', + 'format_datetime', 'getaddresses', 'make_msgid', 'mktime_tz', 'parseaddr', 'parsedate', 'parsedate_tz', + 'parsedate_to_datetime', 'unquote', ] @@ -26,6 +28,7 @@ import time import base64 import random import socket +import datetime import urllib.parse import warnings from io import StringIO @@ -37,6 +40,7 @@ from email._parseaddr import mktime_tz # We need wormarounds for bugs in these methods in older Pythons (see below) from email._parseaddr import parsedate as _parsedate from email._parseaddr import parsedate_tz as _parsedate_tz +from email._parseaddr import _parsedate_tz as __parsedate_tz from quopri import decodestring as _qdecode @@ -110,6 +114,14 @@ ecre = re.compile(r''' ''', re.VERBOSE | re.IGNORECASE) +def _format_timetuple_and_zone(timetuple, zone): + return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( + ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]], + timetuple[2], + ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1], + timetuple[0], timetuple[3], timetuple[4], timetuple[5], + zone) def formatdate(timeval=None, localtime=False, usegmt=False): """Returns a date string as specified by RFC 2822, e.g.: @@ -154,14 +166,25 @@ def formatdate(timeval=None, localtime=False, usegmt=False): zone = 'GMT' else: zone = '-0000' - return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( - ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]], - now[2], - ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1], - now[0], now[3], now[4], now[5], - zone) + return _format_timetuple_and_zone(now, zone) +def format_datetime(dt, usegmt=False): + """Turn a datetime into a date string as specified in RFC 2822. + + If usegmt is True, dt must be an aware datetime with an offset of zero. In + this case 'GMT' will be rendered instead of the normal +0000 required by + RFC2822. This is to support HTTP headers involving date stamps. + """ + now = dt.timetuple() + if usegmt: + if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc: + raise ValueError("usegmt option requires a UTC datetime") + zone = 'GMT' + elif dt.tzinfo is None: + zone = '-0000' + else: + zone = dt.strftime("%z") + return _format_timetuple_and_zone(now, zone) def make_msgid(idstring=None, domain=None): @@ -203,6 +226,15 @@ def parsedate_tz(data): return None return _parsedate_tz(data) +def parsedate_to_datetime(data): + if not data: + return None + *dtuple, tz = __parsedate_tz(data) + if tz is None: + return datetime.datetime(*dtuple[:6]) + return datetime.datetime(*dtuple[:6], + tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) + def parseaddr(addr): addrs = _AddressList(addr).addresslist diff --git a/Lib/test/test_email/test_utils.py b/Lib/test/test_email/test_utils.py new file mode 100644 index 0000000..e003a64 --- /dev/null +++ b/Lib/test/test_email/test_utils.py @@ -0,0 +1,45 @@ +import datetime +from email import utils +import unittest + +class DateTimeTests(unittest.TestCase): + + datestring = 'Sun, 23 Sep 2001 20:10:55' + dateargs = (2001, 9, 23, 20, 10, 55) + offsetstring = ' -0700' + utcoffset = datetime.timedelta(hours=-7) + tz = datetime.timezone(utcoffset) + naive_dt = datetime.datetime(*dateargs) + aware_dt = datetime.datetime(*dateargs, tzinfo=tz) + + def test_naive_datetime(self): + self.assertEqual(utils.format_datetime(self.naive_dt), + self.datestring + ' -0000') + + def test_aware_datetime(self): + self.assertEqual(utils.format_datetime(self.aware_dt), + self.datestring + self.offsetstring) + + def test_usegmt(self): + utc_dt = datetime.datetime(*self.dateargs, + tzinfo=datetime.timezone.utc) + self.assertEqual(utils.format_datetime(utc_dt, usegmt=True), + self.datestring + ' GMT') + + def test_usegmt_with_naive_datetime_raises(self): + with self.assertRaises(ValueError): + utils.format_datetime(self.naive_dt, usegmt=True) + + def test_usegmt_with_non_utc_datetime_raises(self): + with self.assertRaises(ValueError): + utils.format_datetime(self.aware_dt, usegmt=True) + + def test_parsedate_to_datetime(self): + self.assertEqual( + utils.parsedate_to_datetime(self.datestring + self.offsetstring), + self.aware_dt) + + def test_parsedate_to_datetime_naive(self): + self.assertEqual( + utils.parsedate_to_datetime(self.datestring + ' -0000'), + self.naive_dt) diff --git a/Misc/NEWS b/Misc/NEWS index 91f3883..d648501 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -234,6 +234,9 @@ Core and Builtins Library ------- +- Issue #665194: email.utils now has format_datetime and parsedate_to_datetime + functions, allowing for round tripping of RFC2822 format dates. + - Issue #12571: Add a plat-linux3 directory mirroring the plat-linux2 directory, so that "import DLFCN" and other similar imports work on Linux 3.0. -- cgit v0.12