diff options
author | R David Murray <rdmurray@bitdance.com> | 2012-06-02 21:56:49 (GMT) |
---|---|---|
committer | R David Murray <rdmurray@bitdance.com> | 2012-06-02 21:56:49 (GMT) |
commit | 07ea53cb218812404cdbde820647ce6e4b2d0f8e (patch) | |
tree | 153fbb31a5056379715475ed55a5c91a0fcbd8a9 /Lib/test/test_email | |
parent | e11eb0f21b8107d7cf61efd37ff3555258577d51 (diff) | |
download | cpython-07ea53cb218812404cdbde820647ce6e4b2d0f8e.zip cpython-07ea53cb218812404cdbde820647ce6e4b2d0f8e.tar.gz cpython-07ea53cb218812404cdbde820647ce6e4b2d0f8e.tar.bz2 |
#1079: Fix parsing of encoded words.
This is a behavior change: before this leading and trailing spaces were
stripped from ASCII parts, now they are preserved. Without this fix we didn't
parse the examples in the RFC correctly, so I think breaking backward
compatibility here is justified.
Patch by Ralf Schlatterbeck.
Diffstat (limited to 'Lib/test/test_email')
-rw-r--r-- | Lib/test/test_email/test_asian_codecs.py | 2 | ||||
-rw-r--r-- | Lib/test/test_email/test_email.py | 79 |
2 files changed, 67 insertions, 14 deletions
diff --git a/Lib/test/test_email/test_asian_codecs.py b/Lib/test/test_email/test_asian_codecs.py index a4dd9a9..089269f 100644 --- a/Lib/test/test_email/test_asian_codecs.py +++ b/Lib/test/test_email/test_asian_codecs.py @@ -41,7 +41,7 @@ class TestEmailAsianCodecs(TestEmailBase): Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?= =?iso-8859-1?q?Gr=FC=DF_Gott!?=""") eq(decode_header(h.encode()), - [(b'Hello World!', None), + [(b'Hello World! ', None), (b'\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'), (b'Gr\xfc\xdf Gott!', gcode)]) subject_bytes = (b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5' diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 3dda921..b7ad667 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -1994,9 +1994,9 @@ class TestRFC2047(TestEmailBase): foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""" dh = decode_header(s) eq(dh, [ - (b'Re:', None), + (b'Re: ', None), (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'), - (b'baz foo bar', None), + (b' baz foo bar ', None), (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')]) header = make_header(dh) eq(str(header), @@ -2005,35 +2005,37 @@ class TestRFC2047(TestEmailBase): Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?= =?mac-iceland?q?=9Arg=8Cs?=""") - def test_whitespace_eater_unicode(self): + def test_whitespace_keeper_unicode(self): eq = self.assertEqual s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>' dh = decode_header(s) eq(dh, [(b'Andr\xe9', 'iso-8859-1'), - (b'Pirard <pirard@dom.ain>', None)]) + (b' Pirard <pirard@dom.ain>', None)]) header = str(make_header(dh)) eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>') - def test_whitespace_eater_unicode_2(self): + def test_whitespace_keeper_unicode_2(self): eq = self.assertEqual s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?=' dh = decode_header(s) - eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'), - (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')]) + eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'), + (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')]) hu = str(make_header(dh)) eq(hu, 'The quick brown fox jumped over the lazy dog') def test_rfc2047_missing_whitespace(self): s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' dh = decode_header(s) - self.assertEqual(dh, [(s, None)]) + self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'), + (b'rg', None), (b'\xe5', 'iso-8859-1'), + (b'sbord', None)]) def test_rfc2047_with_whitespace(self): s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' dh = decode_header(s) - self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'), - (b'rg', None), (b'\xe5', 'iso-8859-1'), - (b'sbord', None)]) + self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'), + (b' rg ', None), (b'\xe5', 'iso-8859-1'), + (b' sbord', None)]) def test_rfc2047_B_bad_padding(self): s = '=?iso-8859-1?B?%s?=' @@ -2051,6 +2053,57 @@ Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?= self.assertEqual(decode_header(s), [(b'andr\xe9=zz', 'iso-8659-1')]) + def test_rfc2047_rfc2047_1(self): + # 1st testcase at end of rfc2047 + s = '(=?ISO-8859-1?Q?a?=)' + self.assertEqual(decode_header(s), + [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)]) + + def test_rfc2047_rfc2047_2(self): + # 2nd testcase at end of rfc2047 + s = '(=?ISO-8859-1?Q?a?= b)' + self.assertEqual(decode_header(s), + [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)]) + + def test_rfc2047_rfc2047_3(self): + # 3rd testcase at end of rfc2047 + s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' + self.assertEqual(decode_header(s), + [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) + + def test_rfc2047_rfc2047_4(self): + # 4th testcase at end of rfc2047 + s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' + self.assertEqual(decode_header(s), + [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) + + def test_rfc2047_rfc2047_5a(self): + # 5th testcase at end of rfc2047 newline is \r\n + s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)' + self.assertEqual(decode_header(s), + [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) + + def test_rfc2047_rfc2047_5b(self): + # 5th testcase at end of rfc2047 newline is \n + s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)' + self.assertEqual(decode_header(s), + [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) + + def test_rfc2047_rfc2047_6(self): + # 6th testcase at end of rfc2047 + s = '(=?ISO-8859-1?Q?a_b?=)' + self.assertEqual(decode_header(s), + [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)]) + + def test_rfc2047_rfc2047_7(self): + # 7th testcase at end of rfc2047 + s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)' + self.assertEqual(decode_header(s), + [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'), + (b')', None)]) + self.assertEqual(make_header(decode_header(s)).encode(), s.lower()) + self.assertEqual(str(make_header(decode_header(s))), '(a b)') + # Test the MIMEMessage class class TestMIMEMessage(TestEmailBase): @@ -4388,11 +4441,11 @@ A very long line that must get split to something other than at the h = make_header(decode_header(s)) eq(h.encode(), s) - def test_whitespace_eater(self): + def test_whitespace_keeper(self): eq = self.assertEqual s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.' parts = decode_header(s) - eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)]) + eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)]) hdr = make_header(parts) eq(hdr.encode(), 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.') |