summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1998-12-23 22:22:10 (GMT)
committerGuido van Rossum <guido@python.org>1998-12-23 22:22:10 (GMT)
commitdb01ee0e22c348b642143dbb728517ca7e13f526 (patch)
tree1392df8e443d72074f809d574a0507a232a987c3
parent9a4d63730ed3d93a1f059bb10f546609e2290cfb (diff)
downloadcpython-db01ee0e22c348b642143dbb728517ca7e13f526.zip
cpython-db01ee0e22c348b642143dbb728517ca7e13f526.tar.gz
cpython-db01ee0e22c348b642143dbb728517ca7e13f526.tar.bz2
Patch by Mike Meyer:
Extended the rfc822 parsedate routines to handle the cases they failed on in an archive of ~37,000 messages. I believe the changes are compatible, in that all previously correct parsing are still correct. [I still see problems with some messages, but no showstoppers.]
-rw-r--r--Lib/rfc822.py24
1 files changed, 19 insertions, 5 deletions
diff --git a/Lib/rfc822.py b/Lib/rfc822.py
index b466fae..d8de862 100644
--- a/Lib/rfc822.py
+++ b/Lib/rfc822.py
@@ -748,9 +748,11 @@ def dump_address_pair(pair):
# Parse a date field
-_monthnames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
- 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
-_daynames = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
+_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
+ 'aug', 'sep', 'oct', 'nov', 'dec',
+ 'january', 'february', 'march', 'april', 'may', 'june', 'july',
+ 'august', 'september', 'october', 'november', 'december']
+_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
# The timezone table does not include the military time zones defined
# in RFC822, other than Z. According to RFC1123, the description in
@@ -773,7 +775,7 @@ def parsedate_tz(data):
Accounts for military timezones.
"""
data = string.split(data)
- if data[0][-1] == ',' or data[0] in _daynames:
+ if data[0][-1] in (',', '.') or string.lower(data[0]) in _daynames:
# There's a dayname here. Skip it
del data[0]
if len(data) == 3: # RFC 850 date, deprecated
@@ -791,11 +793,23 @@ def parsedate_tz(data):
return None
data = data[:5]
[dd, mm, yy, tm, tz] = data
+ mm = string.lower(mm)
if not mm in _monthnames:
- dd, mm, yy, tm, tz = mm, dd, tm, yy, tz
+ dd, mm = mm, string.lower(dd)
if not mm in _monthnames:
return None
mm = _monthnames.index(mm)+1
+ if dd[-1] == ',':
+ dd = dd[:-1]
+ i = string.find(yy, ':')
+ if i > 0:
+ yy, tm = tm, yy
+ if yy[-1] == ',':
+ yy = yy[:-1]
+ if yy[0] not in string.digits:
+ yy, tz = tz, yy
+ if tm[-1] == ',':
+ tm = tm[:-1]
tm = string.splitfields(tm, ':')
if len(tm) == 2:
[thh, tmm] = tm