summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_email/test__encoded_words.py
blob: 0b8b1de3359aa6e68e04110e6b40de4dcd94f6a8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import unittest
from email import _encoded_words as _ew
from email import errors
from test.test_email import TestEmailBase


class TestDecodeQ(TestEmailBase):

    def _test(self, source, ex_result, ex_defects=[]):
        result, defects = _ew.decode_q(source)
        self.assertEqual(result, ex_result)
        self.assertDefectsEqual(defects, ex_defects)

    def test_no_encoded(self):
        self._test(b'foobar', b'foobar')

    def test_spaces(self):
        self._test(b'foo=20bar=20', b'foo bar ')
        self._test(b'foo_bar_', b'foo bar ')

    def test_run_of_encoded(self):
        self._test(b'foo=20=20=21=2Cbar', b'foo  !,bar')


class TestDecodeB(TestEmailBase):

    def _test(self, source, ex_result, ex_defects=[]):
        result, defects = _ew.decode_b(source)
        self.assertEqual(result, ex_result)
        self.assertDefectsEqual(defects, ex_defects)

    def test_simple(self):
        self._test(b'Zm9v', b'foo')

    def test_missing_padding(self):
        # 1 missing padding character
        self._test(b'dmk', b'vi', [errors.InvalidBase64PaddingDefect])
        # 2 missing padding characters
        self._test(b'dg', b'v', [errors.InvalidBase64PaddingDefect])

    def test_invalid_character(self):
        self._test(b'dm\x01k===', b'vi', [errors.InvalidBase64CharactersDefect])

    def test_invalid_character_and_bad_padding(self):
        self._test(b'dm\x01k', b'vi', [errors.InvalidBase64CharactersDefect,
                                       errors.InvalidBase64PaddingDefect])

    def test_invalid_length(self):
        self._test(b'abcde', b'abcde', [errors.InvalidBase64LengthDefect])


class TestDecode(TestEmailBase):

    def test_wrong_format_input_raises(self):
        with self.assertRaises(ValueError):
            _ew.decode('=?badone?=')
        with self.assertRaises(ValueError):
            _ew.decode('=?')
        with self.assertRaises(ValueError):
            _ew.decode('')
        with self.assertRaises(KeyError):
            _ew.decode('=?utf-8?X?somevalue?=')

    def _test(self, source, result, charset='us-ascii', lang='', defects=[]):
        res, char, l, d = _ew.decode(source)
        self.assertEqual(res, result)
        self.assertEqual(char, charset)
        self.assertEqual(l, lang)
        self.assertDefectsEqual(d, defects)

    def test_simple_q(self):
        self._test('=?us-ascii?q?foo?=', 'foo')

    def test_simple_b(self):
        self._test('=?us-ascii?b?dmk=?=', 'vi')

    def test_q_case_ignored(self):
        self._test('=?us-ascii?Q?foo?=', 'foo')

    def test_b_case_ignored(self):
        self._test('=?us-ascii?B?dmk=?=', 'vi')

    def test_non_trivial_q(self):
        self._test('=?latin-1?q?=20F=fcr=20Elise=20?=', ' Für Elise ', 'latin-1')

    def test_q_escaped_bytes_preserved(self):
        self._test(b'=?us-ascii?q?=20\xACfoo?='.decode('us-ascii',
                                                       'surrogateescape'),
                   ' \uDCACfoo',
                   defects = [errors.UndecodableBytesDefect])

    def test_b_undecodable_bytes_ignored_with_defect(self):
        self._test(b'=?us-ascii?b?dm\xACk?='.decode('us-ascii',
                                                   'surrogateescape'),
                   'vi',
                   defects = [
                    errors.InvalidBase64CharactersDefect,
                    errors.InvalidBase64PaddingDefect])

    def test_b_invalid_bytes_ignored_with_defect(self):
        self._test('=?us-ascii?b?dm\x01k===?=',
                   'vi',
                   defects = [errors.InvalidBase64CharactersDefect])

    def test_b_invalid_bytes_incorrect_padding(self):
        self._test('=?us-ascii?b?dm\x01k?=',
                   'vi',
                   defects = [
                    errors.InvalidBase64CharactersDefect,
                    errors.InvalidBase64PaddingDefect])

    def test_b_padding_defect(self):
        self._test('=?us-ascii?b?dmk?=',
                   'vi',
                    defects = [errors.InvalidBase64PaddingDefect])

    def test_nonnull_lang(self):
        self._test('=?us-ascii*jive?q?test?=', 'test', lang='jive')

    def test_unknown_8bit_charset(self):
        self._test('=?unknown-8bit?q?foo=ACbar?=',
                   b'foo\xacbar'.decode('ascii', 'surrogateescape'),
                   charset = 'unknown-8bit',
                   defects = [])

    def test_unknown_charset(self):
        self._test('=?foobar?q?foo=ACbar?=',
                   b'foo\xacbar'.decode('ascii', 'surrogateescape'),
                   charset = 'foobar',
                   # XXX Should this be a new Defect instead?
                   defects = [errors.CharsetError])

    def test_q_nonascii(self):
        self._test('=?utf-8?q?=C3=89ric?=',
                   'Éric',
                   charset='utf-8')


class TestEncodeQ(TestEmailBase):

    def _test(self, src, expected):
        self.assertEqual(_ew.encode_q(src), expected)

    def test_all_safe(self):
        self._test(b'foobar', 'foobar')

    def test_spaces(self):
        self._test(b'foo bar ', 'foo_bar_')

    def test_run_of_encodables(self):
        self._test(b'foo  ,,bar', 'foo__=2C=2Cbar')


class TestEncodeB(TestEmailBase):

    def test_simple(self):
        self.assertEqual(_ew.encode_b(b'foo'), 'Zm9v')

    def test_padding(self):
        self.assertEqual(_ew.encode_b(b'vi'), 'dmk=')


class TestEncode(TestEmailBase):

    def test_q(self):
        self.assertEqual(_ew.encode('foo', 'utf-8', 'q'), '=?utf-8?q?foo?=')

    def test_b(self):
        self.assertEqual(_ew.encode('foo', 'utf-8', 'b'), '=?utf-8?b?Zm9v?=')

    def test_auto_q(self):
        self.assertEqual(_ew.encode('foo', 'utf-8'), '=?utf-8?q?foo?=')

    def test_auto_q_if_short_mostly_safe(self):
        self.assertEqual(_ew.encode('vi.', 'utf-8'), '=?utf-8?q?vi=2E?=')

    def test_auto_b_if_enough_unsafe(self):
        self.assertEqual(_ew.encode('.....', 'utf-8'), '=?utf-8?b?Li4uLi4=?=')

    def test_auto_b_if_long_unsafe(self):
        self.assertEqual(_ew.encode('vi.vi.vi.vi.vi.', 'utf-8'),
                         '=?utf-8?b?dmkudmkudmkudmkudmku?=')

    def test_auto_q_if_long_mostly_safe(self):
        self.assertEqual(_ew.encode('vi vi vi.vi ', 'utf-8'),
                         '=?utf-8?q?vi_vi_vi=2Evi_?=')

    def test_utf8_default(self):
        self.assertEqual(_ew.encode('foo'), '=?utf-8?q?foo?=')

    def test_lang(self):
        self.assertEqual(_ew.encode('foo', lang='jive'), '=?utf-8*jive?q?foo?=')

    def test_unknown_8bit(self):
        self.assertEqual(_ew.encode('foo\uDCACbar', charset='unknown-8bit'),
                         '=?unknown-8bit?q?foo=ACbar?=')


if __name__ == '__main__':
    unittest.main()