diff options
-rw-r--r-- | Lib/email/test/test_email.py | 209 | ||||
-rw-r--r-- | Lib/email/test/test_email_codecs.py | 32 |
2 files changed, 165 insertions, 76 deletions
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index 59e6724..0ffc957 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -1,15 +1,15 @@ # Copyright (C) 2001,2002,2003 Python Software Foundation # email package unit tests -import sys import os +import sys import time -import unittest import base64 import difflib +import unittest +import warnings from cStringIO import StringIO from types import StringType, ListType -import warnings import email @@ -566,7 +566,7 @@ bug demonstration g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. " cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. " utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8") - h = Header(g_head, g) + h = Header(g_head, g, header_name='Subject') h.append(cz_head, cz) h.append(utf8_head, utf8) msg = Message() @@ -574,40 +574,32 @@ bug demonstration sfp = StringIO() g = Generator(sfp) g.flatten(msg) - eq(sfp.getvalue(), '''\ -Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?= - =?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?= - =?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?= - =?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?= - =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?= - =?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= - =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?= - =?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?= - =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?= - =?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?= - =?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?= - =?utf-8?q?s_Nunstuck_git_und?= - =?utf-8?q?_Slotermeyer=3F_Ja!_Beiherhund_das_Ode?= - =?utf-8?q?r_die_Flipperwaldt?= - =?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?= + eq(sfp.getvalue(), """\ +Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerd?= + =?iso-8859-1?q?erband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndi?= + =?iso-8859-1?q?schen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Kling?= + =?iso-8859-1?q?en_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_met?= + =?iso-8859-2?q?ropole_se_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= + =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE?= + =?utf-8?b?44G+44Gb44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB?= + =?utf-8?b?44GC44Go44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CM?= + =?utf-8?q?Wenn_ist_das_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das?= + =?utf-8?b?IE9kZXIgZGllIEZsaXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBow==?= + =?utf-8?b?44Gm44GE44G+44GZ44CC?= -''') - eq(h.encode(), '''\ -=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?= - =?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?= - =?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?= - =?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?= - =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?= - =?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= - =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?= - =?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?= - =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?= - =?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?= - =?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?= - =?utf-8?q?s_Nunstuck_git_und?= - =?utf-8?q?_Slotermeyer=3F_Ja!_Beiherhund_das_Ode?= - =?utf-8?q?r_die_Flipperwaldt?= - =?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=''') +""") + eq(h.encode(), """\ +=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerd?= + =?iso-8859-1?q?erband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndi?= + =?iso-8859-1?q?schen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Kling?= + =?iso-8859-1?q?en_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_met?= + =?iso-8859-2?q?ropole_se_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= + =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE?= + =?utf-8?b?44G+44Gb44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB?= + =?utf-8?b?44GC44Go44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CM?= + =?utf-8?q?Wenn_ist_das_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das?= + =?utf-8?b?IE9kZXIgZGllIEZsaXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBow==?= + =?utf-8?b?44Gm44GE44G+44GZ44CC?=""") def test_long_header_encode(self): eq = self.ndiffAssertEqual @@ -712,12 +704,13 @@ from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] def test_long_8bit_header(self): eq = self.ndiffAssertEqual msg = Message() - h = Header('Britische Regierung gibt', 'iso-8859-1') + h = Header('Britische Regierung gibt', 'iso-8859-1', + header_name='Subject') h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte') msg['Subject'] = h eq(msg.as_string(), """\ -Subject: =?iso-8859-1?q?Britische_Regierung_gibt?= - =?iso-8859-1?q?gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?= +Subject: =?iso-8859-1?q?Britische_Regierung_gibt?= =?iso-8859-1?q?gr=FCnes?= + =?iso-8859-1?q?_Licht_f=FCr_Offshore-Windkraftprojekte?= """) @@ -730,6 +723,48 @@ Reply-To: Britische Regierung gibt gr\xfcnes Licht f\xfcr Offshore-Windkraftproj """) + def test_long_to_header(self): + eq = self.ndiffAssertEqual + to = '"Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,"Someone Test #B" <someone@umich.edu>, "Someone Test #C" <someone@eecs.umich.edu>, "Someone Test #D" <someone@eecs.umich.edu>' + msg = Message() + msg['To'] = to + eq(msg.as_string(0), '''\ +To: "Someone Test #A" <someone@eecs.umich.edu>, <someone@eecs.umich.edu>, +\t"Someone Test #B" <someone@umich.edu>, +\t"Someone Test #C" <someone@eecs.umich.edu>, +\t"Someone Test #D" <someone@eecs.umich.edu> + +''') + + def test_long_line_after_append(self): + eq = self.ndiffAssertEqual + s = 'This is an example of string which has almost the limit of header length.' + h = Header(s) + h.append('Add another line.') + eq(h.encode(), """\ +This is an example of string which has almost the limit of header length. + Add another line.""") + + def test_shorter_line_with_append(self): + eq = self.ndiffAssertEqual + s = 'This is a shorter line.' + h = Header(s) + h.append('Add another sentence. (Surprise?)') + eq(h.encode(), + 'This is a shorter line. Add another sentence. (Surprise?)') + + def test_long_field_name(self): + eq = self.ndiffAssertEqual + fn = 'X-Very-Very-Very-Long-Header-Name' + gs = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. " + h = Header(gs, 'iso-8859-1', header_name=fn) + # BAW: this seems broken because the first line is too long + eq(h.encode(), """\ +=?iso-8859-1?q?Die_Mieter_treten_hier_?= + =?iso-8859-1?q?ein_werden_mit_einem_Foerderband_komfortabel_den_Korridor_?= + =?iso-8859-1?q?entlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_g?= + =?iso-8859-1?q?egen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""") + # Test mangling of "From " lines in the body of a message @@ -1309,6 +1344,43 @@ Your message cannot be delivered to the following recipients: g.flatten(msg) self.assertEqual(sfp.getvalue(), text) + def test_no_nl_preamble(self): + eq = self.ndiffAssertEqual + msg = Message() + msg['From'] = 'aperson@dom.ain' + msg['To'] = 'bperson@dom.ain' + msg['Subject'] = 'Test' + msg.preamble = 'MIME message' + msg.epilogue = '' + msg1 = MIMEText('One') + msg2 = MIMEText('Two') + msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') + msg.attach(msg1) + msg.attach(msg2) + eq(msg.as_string(), """\ +From: aperson@dom.ain +To: bperson@dom.ain +Subject: Test +Content-Type: multipart/mixed; boundary="BOUNDARY" + +MIME message +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +One + +--BOUNDARY +Content-Type: text/plain; charset="us-ascii" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit + +Two + +--BOUNDARY-- +""") + def test_default_type(self): eq = self.assertEqual fp = openfile('msg_30.txt') @@ -2181,14 +2253,14 @@ class TestHeader(TestEmailBase): h = Header('Hello World!') eq(h.encode(), 'Hello World!') h.append(' Goodbye World!') - eq(h.encode(), 'Hello World! Goodbye World!') + eq(h.encode(), 'Hello World! Goodbye World!') def test_simple_surprise(self): eq = self.ndiffAssertEqual h = Header('Hello World!') eq(h.encode(), 'Hello World!') h.append('Goodbye World!') - eq(h.encode(), 'Hello World!Goodbye World!') + eq(h.encode(), 'Hello World! Goodbye World!') def test_header_needs_no_decoding(self): h = 'no decoding needed' @@ -2197,7 +2269,7 @@ class TestHeader(TestEmailBase): def test_long(self): h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.", maxlinelen=76) - for l in h.encode().split('\n '): + for l in h.encode(splitchars=' ').split('\n '): self.failUnless(len(l) <= 76) def test_multilingual(self): @@ -2212,21 +2284,18 @@ class TestHeader(TestEmailBase): h.append(cz_head, cz) h.append(utf8_head, utf8) enc = h.encode() - eq(enc, """=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?= - =?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?= - =?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?= - =?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?= - =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?= - =?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= - =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?= - =?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?= - =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?= - =?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?= - =?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?= - =?utf-8?q?s_Nunstuck_git_und?= - =?utf-8?q?_Slotermeyer=3F_Ja!_Beiherhund_das_Ode?= - =?utf-8?q?r_die_Flipperwaldt?= - =?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""") + eq(enc, """\ +=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_ko?= + =?iso-8859-1?q?mfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wan?= + =?iso-8859-1?q?dgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6?= + =?iso-8859-1?q?rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?= + =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= + =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?= + =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?= + =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?= + =?utf-8?q?_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das_Oder_die_Fl?= + =?utf-8?b?aXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBo+OBpuOBhOOBvuOBmQ==?= + =?utf-8?b?44CC?=""") eq(decode_header(enc), [(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"), (utf8_head, "utf-8")]) @@ -2318,6 +2387,30 @@ A very long line that must get split to something other than at the h.append(x, errors='replace') eq(str(h), x) + def test_encoded_adjacent_nonencoded(self): + eq = self.assertEqual + h = Header() + h.append('hello', 'iso-8859-1') + h.append('world') + s = h.encode() + eq(s, '=?iso-8859-1?q?hello?= world') + h = make_header(decode_header(s)) + eq(h.encode(), s) + + def test_whitespace_eater(self): + eq = self.assertEqual + s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.' + parts = decode_header(s) + eq(parts, [('Subject:', None), ('\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), ('zz.', None)]) + hdr = make_header(parts) + eq(hdr.encode(), + 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.') + + def test_broken_base64_header(self): + raises = self.assertRaises + s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3IQ?=' + raises(Errors.HeaderParseError, decode_header, s) + # Test RFC 2231 header parameters (en/de)coding diff --git a/Lib/email/test/test_email_codecs.py b/Lib/email/test/test_email_codecs.py index 0f68151..99a3227 100644 --- a/Lib/email/test/test_email_codecs.py +++ b/Lib/email/test/test_email_codecs.py @@ -26,7 +26,14 @@ class TestEmailAsianCodecs(TestEmailBase): ghello = 'Gr\xfc\xdf Gott!' h.append(jhello, j) h.append(ghello, g) - eq(h.encode(), 'Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=\n =?iso-8859-1?q?Gr=FC=DF_Gott!?=') + # BAW: This used to -- and maybe should -- fold the two iso-8859-1 + # chunks into a single encoded word. However it doesn't violate the + # standard to have them as two encoded chunks and maybe it's + # reasonable <wink> for each .append() call to result in a separate + # encoded word. + eq(h.encode(), """\ +Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?= + =?iso-8859-1?q?Gr=FC=DF?= =?iso-8859-1?q?_Gott!?=""") eq(decode_header(h.encode()), [('Hello World!', None), ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'), @@ -35,23 +42,12 @@ class TestEmailAsianCodecs(TestEmailBase): h = Header(long, j, header_name="Subject") # test a very long header enc = h.encode() - # BAW: The following used to pass. Sadly, the test afterwards is what - # happens now. I've no idea which is right. Please, any Japanese and - # RFC 2047 experts, please verify! -## eq(enc, '''\ -##=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYRsoQg==?= -## =?iso-2022-jp?b?GyRCITwlayRPO0oycTxUJE4+NRsoQg==?= -## =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=''') - eq(enc, """\ -=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYRsoQg==?= - =?iso-2022-jp?b?GyRCITwlayRPO0oycTxUJE4+NUcnJHJCVCRDJEYkJCReJDkbKEI=?=""") - # BAW: same deal here. :( -## self.assertEqual( -## decode_header(enc), -## [("test-ja \x1b$B$XEj9F$5$l$?%a\x1b(B\x1b$B!<%k$O;J2q<T$N>5\x1b(B\x1b$BG'$rBT$C$F$$$^$9\x1b(B", 'iso-2022-jp')]) - self.assertEqual( - decode_header(enc), - [("test-ja \x1b$B$XEj9F$5$l$?%a\x1b(B\x1b$B!<%k$O;J2q<T$N>5G'$rBT$C$F$$$^$9\x1b(B", 'iso-2022-jp')]) + # TK: splitting point may differ by codec design and/or Header encoding + eq(enc , """\ +=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?= + =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""") + # TK: full decode comparison + eq(h.__unicode__().encode('euc-jp'), long) |