diff options
-rw-r--r-- | Lib/email/_parseaddr.py | 16 | ||||
-rw-r--r-- | Lib/email/test/test_email.py | 18 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
3 files changed, 35 insertions, 2 deletions
diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py index 3bd4ba4..699d418 100644 --- a/Lib/email/_parseaddr.py +++ b/Lib/email/_parseaddr.py @@ -199,14 +199,18 @@ class AddrlistClass: self.commentlist = [] def gotonext(self): - """Parse up to the start of the next address.""" + """Skip white space and extract comments.""" + wslist = [] while self.pos < len(self.field): if self.field[self.pos] in self.LWS + '\n\r': + if self.field[self.pos] not in '\n\r': + wslist.append(self.field[self.pos]) self.pos += 1 elif self.field[self.pos] == '(': self.commentlist.append(self.getcomment()) else: break + return EMPTYSTRING.join(wslist) def getaddrlist(self): """Parse all addresses. @@ -319,16 +323,24 @@ class AddrlistClass: self.gotonext() while self.pos < len(self.field): + preserve_ws = True if self.field[self.pos] == '.': + if aslist and not aslist[-1].strip(): + aslist.pop() aslist.append('.') self.pos += 1 + preserve_ws = False elif self.field[self.pos] == '"': aslist.append('"%s"' % quote(self.getquote())) elif self.field[self.pos] in self.atomends: + if aslist and not aslist[-1].strip(): + aslist.pop() break else: aslist.append(self.getatom()) - self.gotonext() + ws = self.gotonext() + if preserve_ws and ws: + aslist.append(ws) if self.pos >= len(self.field) or self.field[self.pos] != '@': return EMPTYSTRING.join(aslist) diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index 78fb961..e5eece2 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -2342,6 +2342,24 @@ class TestMiscellaneous(TestEmailBase): eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'), ('', '"\\\\"example\\\\" example"@example.com')) + def test_parseaddr_preserves_spaces_in_local_part(self): + # issue 9286. A normal RFC5322 local part should not contain any + # folding white space, but legacy local parts can (they are a sequence + # of atoms, not dotatoms). On the other hand we strip whitespace from + # before the @ and around dots, on the assumption that the whitespace + # around the punctuation is a mistake in what would otherwise be + # an RFC5322 local part. Leading whitespace is, usual, stripped as well. + self.assertEqual(('', "merwok wok@xample.com"), + utils.parseaddr("merwok wok@xample.com")) + self.assertEqual(('', "merwok wok@xample.com"), + utils.parseaddr("merwok wok@xample.com")) + self.assertEqual(('', "merwok wok@xample.com"), + utils.parseaddr(" merwok wok @xample.com")) + self.assertEqual(('', 'merwok"wok" wok@xample.com'), + utils.parseaddr('merwok"wok" wok@xample.com')) + self.assertEqual(('', 'merwok.wok.wok@xample.com'), + utils.parseaddr('merwok. wok . wok@xample.com')) + def test_multiline_from_comment(self): x = """\ Foo @@ -23,6 +23,9 @@ Core and Builtins Library ------- +- Issue #9286: email.utils.parseaddr no longer concatenates blank-separated + words in the local part of email addresses, thereby preserving the input. + - Issue #6791: Limit header line length (to 65535 bytes) in http.client and http.server, to avoid denial of services from the other party. |