diff options
author | Michael Selik <mike@selik.org> | 2019-09-20 03:25:55 (GMT) |
---|---|---|
committer | Abhilash Raj <maxking@users.noreply.github.com> | 2019-09-20 03:25:55 (GMT) |
commit | 2702638eabe5f7b25f36d295f0ad78cb8d4eda05 (patch) | |
tree | 1231f4f3c8ca7667c98aafd33136dab024c980a0 | |
parent | 3368f3c6ae4140a0883e19350e672fd09c9db616 (diff) | |
download | cpython-2702638eabe5f7b25f36d295f0ad78cb8d4eda05.zip cpython-2702638eabe5f7b25f36d295f0ad78cb8d4eda05.tar.gz cpython-2702638eabe5f7b25f36d295f0ad78cb8d4eda05.tar.bz2 |
bpo-34002: Minor efficiency and clarity improvements in email package. (GH-7999)
* Check intersection of two sets explicitly
Comparing ``len(a) > ``len(a - b)`` is essentially looking for an
intersection between the two sets. If set ``b`` does not intersect ``a``
then ``len(a - b)`` will be equal to ``len(a)``. This logic is more
clearly expressed as ``a & b``.
* Change while/pop to a for-loop
Copying the list, then repeatedly popping the first element was
unnecessarily slow. I also cleaned up a couple other inefficiencies.
There's no need to unpack a tuple, then re-pack and append it. The list
can be created with the first element instead of empty. Secondly, the
``endswith`` method returns a bool, so there's no need for an if-
statement to set ``encoding`` to True or False.
* Use set.intersection to check for intersections
``a.intersection(b)`` method is more clear of purpose than ``not
a.isdisjoint(b)`` and avoids an unnecessary set construction that ``a &
set(b)`` performs.
* Use not isdisjoint instead of intersection
While it reads slightly worse, the isdisjoint method will stop when it
finds a counterexample and returns a bool, rather than looping over the
entire iterable and constructing a new set.
-rw-r--r-- | Lib/email/headerregistry.py | 22 | ||||
-rw-r--r-- | Lib/email/utils.py | 14 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2019-09-19-19-58-33.bpo-34002.KBnaVX.rst | 2 |
3 files changed, 13 insertions, 25 deletions
diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py index dcc960b..cc1d191 100644 --- a/Lib/email/headerregistry.py +++ b/Lib/email/headerregistry.py @@ -69,11 +69,9 @@ class Address: """The addr_spec (username@domain) portion of the address, quoted according to RFC 5322 rules, but with no Content Transfer Encoding. """ - nameset = set(self.username) - if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS): - lp = parser.quote_string(self.username) - else: - lp = self.username + lp = self.username + if not parser.DOT_ATOM_ENDS.isdisjoint(lp): + lp = parser.quote_string(lp) if self.domain: return lp + '@' + self.domain if not lp: @@ -86,11 +84,9 @@ class Address: self.display_name, self.username, self.domain) def __str__(self): - nameset = set(self.display_name) - if len(nameset) > len(nameset-parser.SPECIALS): - disp = parser.quote_string(self.display_name) - else: - disp = self.display_name + disp = self.display_name + if not parser.SPECIALS.isdisjoint(disp): + disp = parser.quote_string(disp) if disp: addr_spec = '' if self.addr_spec=='<>' else self.addr_spec return "{} <{}>".format(disp, addr_spec) @@ -141,10 +137,8 @@ class Group: if self.display_name is None and len(self.addresses)==1: return str(self.addresses[0]) disp = self.display_name - if disp is not None: - nameset = set(disp) - if len(nameset) > len(nameset-parser.SPECIALS): - disp = parser.quote_string(disp) + if disp is not None and not parser.SPECIALS.isdisjoint(disp): + disp = parser.quote_string(disp) adrstr = ", ".join(str(x) for x in self.addresses) adrstr = ' ' + adrstr if adrstr else adrstr return "{}:{};".format(disp, adrstr) diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 858f620..b137ce3 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -259,21 +259,13 @@ def decode_params(params): params is a sequence of 2-tuples containing (param name, string value). """ - # Copy params so we don't mess with the original - params = params[:] - new_params = [] + new_params = [params[0]] # Map parameter's name to a list of continuations. The values are a # 3-tuple of the continuation number, the string value, and a flag # specifying whether a particular segment is %-encoded. rfc2231_params = {} - name, value = params.pop(0) - new_params.append((name, value)) - while params: - name, value = params.pop(0) - if name.endswith('*'): - encoded = True - else: - encoded = False + for name, value in params[1:]: + encoded = name.endswith('*') value = unquote(value) mo = rfc2231_continuation.match(name) if mo: diff --git a/Misc/NEWS.d/next/Library/2019-09-19-19-58-33.bpo-34002.KBnaVX.rst b/Misc/NEWS.d/next/Library/2019-09-19-19-58-33.bpo-34002.KBnaVX.rst new file mode 100644 index 0000000..5d4c231 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-09-19-19-58-33.bpo-34002.KBnaVX.rst @@ -0,0 +1,2 @@ +Improve efficiency in parts of email package by changing while-pop to a for +loop, using isdisjoint instead of set intersections. |