summaryrefslogtreecommitdiffstats
path: root/Lib/email
diff options
context:
space:
mode:
authorBarry Warsaw <barry@python.org>2007-03-14 04:59:50 (GMT)
committerBarry Warsaw <barry@python.org>2007-03-14 04:59:50 (GMT)
commitdcd24ae5015bb94ef83015a4f584e5b8f173e999 (patch)
treef0de750ea1f48bb6de48669130272296391f04ad /Lib/email
parent47c52a8b60444e4f0aae9c0e91794d214b68bd45 (diff)
downloadcpython-dcd24ae5015bb94ef83015a4f584e5b8f173e999.zip
cpython-dcd24ae5015bb94ef83015a4f584e5b8f173e999.tar.gz
cpython-dcd24ae5015bb94ef83015a4f584e5b8f173e999.tar.bz2
SF bug #1582282; decode_header() incorrectly splits not-conformant RFC
2047-like headers where there is no whitespace between encoded words. This fix changes the matching regexp to include a trailing lookahead assertion that the closing ?= must be followed by whitespace, newline, or end-of-string. This also changes the regexp to add the MULTILINE flag.
Diffstat (limited to 'Lib/email')
-rw-r--r--Lib/email/header.py3
-rw-r--r--Lib/email/test/test_email.py12
-rw-r--r--Lib/email/test/test_email_renamed.py12
3 files changed, 26 insertions, 1 deletions
diff --git a/Lib/email/header.py b/Lib/email/header.py
index 183c337..e139ccf 100644
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -39,7 +39,8 @@ ecre = re.compile(r'''
\? # literal ?
(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
\?= # literal ?=
- ''', re.VERBOSE | re.IGNORECASE)
+ (?=[ \t]|$) # whitespace or the end of the string
+ ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
# Field name regexp, including trailing colon, but not separating whitespace,
# according to RFC 2822. Character range is from tilde to exclamation mark.
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
index 14b8a1b..a032999 100644
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -1527,6 +1527,18 @@ class TestRFC2047(unittest.TestCase):
hu = make_header(dh).__unicode__()
eq(hu, u'The quick brown fox jumped over the lazy dog')
+ def test_rfc2047_without_whitespace(self):
+ s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
+ dh = decode_header(s)
+ self.assertEqual(dh, [(s, None)])
+
+ def test_rfc2047_with_whitespace(self):
+ s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
+ dh = decode_header(s)
+ self.assertEqual(dh, [('Sm', None), ('\xf6', 'iso-8859-1'),
+ ('rg', None), ('\xe5', 'iso-8859-1'),
+ ('sbord', None)])
+
# Test the MIMEMessage class
diff --git a/Lib/email/test/test_email_renamed.py b/Lib/email/test/test_email_renamed.py
index 1995040..44238c7 100644
--- a/Lib/email/test/test_email_renamed.py
+++ b/Lib/email/test/test_email_renamed.py
@@ -1525,6 +1525,18 @@ class TestRFC2047(unittest.TestCase):
hu = make_header(dh).__unicode__()
eq(hu, u'The quick brown fox jumped over the lazy dog')
+ def test_rfc2047_missing_whitespace(self):
+ s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
+ dh = decode_header(s)
+ self.assertEqual(dh, [(s, None)])
+
+ def test_rfc2047_with_whitespace(self):
+ s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
+ dh = decode_header(s)
+ self.assertEqual(dh, [('Sm', None), ('\xf6', 'iso-8859-1'),
+ ('rg', None), ('\xe5', 'iso-8859-1'),
+ ('sbord', None)])
+
# Test the MIMEMessage class