From db01ee0e22c348b642143dbb728517ca7e13f526 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 23 Dec 1998 22:22:10 +0000 Subject: Patch by Mike Meyer: Extended the rfc822 parsedate routines to handle the cases they failed on in an archive of ~37,000 messages. I believe the changes are compatible, in that all previously correct parsing are still correct. [I still see problems with some messages, but no showstoppers.] --- Lib/rfc822.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/Lib/rfc822.py b/Lib/rfc822.py index b466fae..d8de862 100644 --- a/Lib/rfc822.py +++ b/Lib/rfc822.py @@ -748,9 +748,11 @@ def dump_address_pair(pair): # Parse a date field -_monthnames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', - 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] -_daynames = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] +_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', + 'aug', 'sep', 'oct', 'nov', 'dec', + 'january', 'february', 'march', 'april', 'may', 'june', 'july', + 'august', 'september', 'october', 'november', 'december'] +_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] # The timezone table does not include the military time zones defined # in RFC822, other than Z. According to RFC1123, the description in @@ -773,7 +775,7 @@ def parsedate_tz(data): Accounts for military timezones. """ data = string.split(data) - if data[0][-1] == ',' or data[0] in _daynames: + if data[0][-1] in (',', '.') or string.lower(data[0]) in _daynames: # There's a dayname here. Skip it del data[0] if len(data) == 3: # RFC 850 date, deprecated @@ -791,11 +793,23 @@ def parsedate_tz(data): return None data = data[:5] [dd, mm, yy, tm, tz] = data + mm = string.lower(mm) if not mm in _monthnames: - dd, mm, yy, tm, tz = mm, dd, tm, yy, tz + dd, mm = mm, string.lower(dd) if not mm in _monthnames: return None mm = _monthnames.index(mm)+1 + if dd[-1] == ',': + dd = dd[:-1] + i = string.find(yy, ':') + if i > 0: + yy, tm = tm, yy + if yy[-1] == ',': + yy = yy[:-1] + if yy[0] not in string.digits: + yy, tz = tz, yy + if tm[-1] == ',': + tm = tm[:-1] tm = string.splitfields(tm, ':') if len(tm) == 2: [thh, tmm] = tm -- cgit v0.12