summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_email
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2013-07-11 19:52:57 (GMT)
committerR David Murray <rdmurray@bitdance.com>2013-07-11 19:52:57 (GMT)
commit65171b28e77f589a490335c8749a24151e1d8817 (patch)
tree6377dcdda4bad3baec23587cf016587858bbeaca /Lib/test/test_email
parent3641a74e1c03ce153042d2c21639e5b6b9604f3b (diff)
downloadcpython-65171b28e77f589a490335c8749a24151e1d8817.zip
cpython-65171b28e77f589a490335c8749a24151e1d8817.tar.gz
cpython-65171b28e77f589a490335c8749a24151e1d8817.tar.bz2
#18044: Fix parsing of encoded words of the form =?utf8?q?=XX...?=
The problem was I was only checking for decimal digits after the third '?', not for *hex* digits :(. This changeset also fixes a couple of comment typos, deletes an unused function relating to encoded word parsing, and removed an invalid 'if' test from the folding function that was revealed by the tests written to validate this issue.
Diffstat (limited to 'Lib/test/test_email')
-rw-r--r--Lib/test/test_email/test__encoded_words.py5
-rw-r--r--Lib/test/test_email/test__header_value_parser.py9
-rw-r--r--Lib/test/test_email/test_headerregistry.py41
3 files changed, 51 insertions, 4 deletions
diff --git a/Lib/test/test_email/test__encoded_words.py b/Lib/test/test_email/test__encoded_words.py
index 14395fe..f8e380d 100644
--- a/Lib/test/test_email/test__encoded_words.py
+++ b/Lib/test/test_email/test__encoded_words.py
@@ -122,6 +122,11 @@ class TestDecode(TestEmailBase):
# XXX Should this be a new Defect instead?
defects = [errors.CharsetError])
+ def test_q_nonascii(self):
+ self._test('=?utf-8?q?=C3=89ric?=',
+ 'Éric',
+ charset='utf-8')
+
class TestEncodeQ(TestEmailBase):
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
index 6101e19..8917447 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -170,6 +170,15 @@ class TestParser(TestParserMixin, TestEmailBase):
[],
'')
+ def test_get_encoded_word_quopri_utf_escape_follows_cte(self):
+ # Issue 18044
+ self._test_get_x(parser.get_encoded_word,
+ '=?utf-8?q?=C3=89ric?=',
+ 'Éric',
+ 'Éric',
+ [],
+ '')
+
# get_unstructured
def _get_unst(self, value):
diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py
index c0c81c1..80f1c02 100644
--- a/Lib/test/test_email/test_headerregistry.py
+++ b/Lib/test/test_email/test_headerregistry.py
@@ -123,12 +123,45 @@ class TestBaseHeaderFeatures(TestHeaderBase):
# self.assertEqual(h, value)
# self.assertDefectsEqual(h.defects, [errors.ObsoleteHeaderDefect])
- def test_RFC2047_value_decoded(self):
- value = '=?utf-8?q?this_is_a_test?='
- h = self.make_header('subject', value)
- self.assertEqual(h, 'this is a test')
+@parameterize
+class TestUnstructuredHeader(TestHeaderBase):
+ def string_as_value(self,
+ source,
+ decoded,
+ *args):
+ l = len(args)
+ defects = args[0] if l>0 else []
+ header = 'Subject:' + (' ' if source else '')
+ folded = header + (args[1] if l>1 else source) + '\n'
+ h = self.make_header('Subject', source)
+ self.assertEqual(h, decoded)
+ self.assertDefectsEqual(h.defects, defects)
+ self.assertEqual(h.fold(policy=policy.default), folded)
+
+ string_params = {
+
+ 'rfc2047_simple_quopri': (
+ '=?utf-8?q?this_is_a_test?=',
+ 'this is a test',
+ [],
+ 'this is a test'),
+
+ 'rfc2047_gb2312_base64': (
+ '=?gb2312?b?1eLKx9bQzsSy4srUo6E=?=',
+ '\u8fd9\u662f\u4e2d\u6587\u6d4b\u8bd5\uff01',
+ [],
+ '=?utf-8?b?6L+Z5piv5Lit5paH5rWL6K+V77yB?='),
+
+ 'rfc2047_simple_nonascii_quopri': (
+ '=?utf-8?q?=C3=89ric?=',
+ 'Éric'),
+
+ }
+
+
+@parameterize
class TestDateHeader(TestHeaderBase):
datestring = 'Sun, 23 Sep 2001 20:10:55 -0700'