summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/email/_parseaddr.py16
-rw-r--r--Lib/email/test/test_email.py18
-rw-r--r--Misc/NEWS3
3 files changed, 35 insertions, 2 deletions
diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py
index 3bd4ba4..699d418 100644
--- a/Lib/email/_parseaddr.py
+++ b/Lib/email/_parseaddr.py
@@ -199,14 +199,18 @@ class AddrlistClass:
self.commentlist = []
def gotonext(self):
- """Parse up to the start of the next address."""
+ """Skip white space and extract comments."""
+ wslist = []
while self.pos < len(self.field):
if self.field[self.pos] in self.LWS + '\n\r':
+ if self.field[self.pos] not in '\n\r':
+ wslist.append(self.field[self.pos])
self.pos += 1
elif self.field[self.pos] == '(':
self.commentlist.append(self.getcomment())
else:
break
+ return EMPTYSTRING.join(wslist)
def getaddrlist(self):
"""Parse all addresses.
@@ -319,16 +323,24 @@ class AddrlistClass:
self.gotonext()
while self.pos < len(self.field):
+ preserve_ws = True
if self.field[self.pos] == '.':
+ if aslist and not aslist[-1].strip():
+ aslist.pop()
aslist.append('.')
self.pos += 1
+ preserve_ws = False
elif self.field[self.pos] == '"':
aslist.append('"%s"' % quote(self.getquote()))
elif self.field[self.pos] in self.atomends:
+ if aslist and not aslist[-1].strip():
+ aslist.pop()
break
else:
aslist.append(self.getatom())
- self.gotonext()
+ ws = self.gotonext()
+ if preserve_ws and ws:
+ aslist.append(ws)
if self.pos >= len(self.field) or self.field[self.pos] != '@':
return EMPTYSTRING.join(aslist)
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
index 78fb961..e5eece2 100644
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -2342,6 +2342,24 @@ class TestMiscellaneous(TestEmailBase):
eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
('', '"\\\\"example\\\\" example"@example.com'))
+ def test_parseaddr_preserves_spaces_in_local_part(self):
+ # issue 9286. A normal RFC5322 local part should not contain any
+ # folding white space, but legacy local parts can (they are a sequence
+ # of atoms, not dotatoms). On the other hand we strip whitespace from
+ # before the @ and around dots, on the assumption that the whitespace
+ # around the punctuation is a mistake in what would otherwise be
+ # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
+ self.assertEqual(('', "merwok wok@xample.com"),
+ utils.parseaddr("merwok wok@xample.com"))
+ self.assertEqual(('', "merwok wok@xample.com"),
+ utils.parseaddr("merwok wok@xample.com"))
+ self.assertEqual(('', "merwok wok@xample.com"),
+ utils.parseaddr(" merwok wok @xample.com"))
+ self.assertEqual(('', 'merwok"wok" wok@xample.com'),
+ utils.parseaddr('merwok"wok" wok@xample.com'))
+ self.assertEqual(('', 'merwok.wok.wok@xample.com'),
+ utils.parseaddr('merwok. wok . wok@xample.com'))
+
def test_multiline_from_comment(self):
x = """\
Foo
diff --git a/Misc/NEWS b/Misc/NEWS
index 0e90db2..9428d1b 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -23,6 +23,9 @@ Core and Builtins
Library
-------
+- Issue #9286: email.utils.parseaddr no longer concatenates blank-separated
+ words in the local part of email addresses, thereby preserving the input.
+
- Issue #6791: Limit header line length (to 65535 bytes) in http.client
and http.server, to avoid denial of services from the other party.