From 60398512c86c5535edd817c99ccb50453b3b0471 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <rhettinger@users.noreply.github.com>
Date: Thu, 28 May 2020 08:35:46 -0700
Subject: bpo-40755: Add missing multiset operations to Counter() (GH-20339)

---
 Doc/library/collections.rst                        |  41 ++++++++
 Lib/collections/__init__.py                        | 110 +++++++++++++++++++--
 Lib/test/test_collections.py                       |  59 +++++++++++
 .../2020-05-23-18-24-13.bpo-22533.k64XGo.rst       |   2 +
 4 files changed, 206 insertions(+), 6 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2020-05-23-18-24-13.bpo-22533.k64XGo.rst

diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst
index 549ac1b..ea2b420 100644
--- a/Doc/library/collections.rst
+++ b/Doc/library/collections.rst
@@ -290,6 +290,47 @@ For example::
             >>> sorted(c.elements())
             ['a', 'a', 'a', 'a', 'b', 'b']
 
+    .. method:: isdisjoint(other)
+
+        True if none of the elements in *self* overlap with those in *other*.
+        Negative or missing counts are ignored.
+        Logically equivalent to:  ``not (+self) & (+other)``
+
+        .. versionadded:: 3.10
+
+    .. method:: isequal(other)
+
+        Test whether counts agree exactly.
+        Negative or missing counts are treated as zero.
+
+        This method works differently than the inherited :meth:`__eq__` method
+        which treats negative or missing counts as distinct from zero::
+
+            >>> Counter(a=1, b=0).isequal(Counter(a=1))
+            True
+            >>> Counter(a=1, b=0) == Counter(a=1)
+            False
+
+        Logically equivalent to:  ``+self == +other``
+
+        .. versionadded:: 3.10
+
+    .. method:: issubset(other)
+
+        True if the counts in *self* are less than or equal to those in *other*.
+        Negative or missing counts are treated as zero.
+        Logically equivalent to:  ``not self - (+other)``
+
+        .. versionadded:: 3.10
+
+    .. method:: issuperset(other)
+
+        True if the counts in *self* are greater than or equal to those in *other*.
+        Negative or missing counts are treated as zero.
+        Logically equivalent to:  ``not other - (+self)``
+
+        .. versionadded:: 3.10
+
     .. method:: most_common([n])
 
         Return a list of the *n* most common elements and their counts from the
diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py
index 011a0c1..55fb46c 100644
--- a/Lib/collections/__init__.py
+++ b/Lib/collections/__init__.py
@@ -710,12 +710,24 @@ class Counter(dict):
     # To strip negative and zero counts, add-in an empty counter:
     #       c += Counter()
     #
-    # Rich comparison operators for multiset subset and superset tests
-    # are deliberately omitted due to semantic conflicts with the
-    # existing inherited dict equality method.  Subset and superset
-    # semantics ignore zero counts and require that p≤q ∧ p≥q → p=q;
-    # however, that would not be the case for p=Counter(a=1, b=0)
-    # and q=Counter(a=1) where the dictionaries are not equal.
+    # When the multiplicities are all zero or one, multiset operations
+    # are guaranteed to be equivalent to the corresponding operations
+    # for regular sets.
+    #     Given counter multisets such as:
+    #         cp = Counter(a=1, b=0, c=1)
+    #         cq = Counter(c=1, d=0, e=1)
+    #     The corresponding regular sets would be:
+    #         sp = {'a', 'c'}
+    #         sq = {'c', 'e'}
+    #     All of the following relations would hold:
+    #         set(cp + cq) == sp | sq
+    #         set(cp - cq) == sp - sq
+    #         set(cp | cq) == sp | sq
+    #         set(cp & cq) == sp & sq
+    #         cp.isequal(cq) == (sp == sq)
+    #         cp.issubset(cq) == sp.issubset(sq)
+    #         cp.issuperset(cq) == sp.issuperset(sq)
+    #         cp.isdisjoint(cq) == sp.isdisjoint(sq)
 
     def __add__(self, other):
         '''Add counts from two counters.
@@ -874,6 +886,92 @@ class Counter(dict):
                 self[elem] = other_count
         return self._keep_positive()
 
+    def isequal(self, other):
+        ''' Test whether counts agree exactly.
+
+        Negative or missing counts are treated as zero.
+
+        This is different than the inherited __eq__() method which
+        treats negative or missing counts as distinct from zero:
+
+            >>> Counter(a=1, b=0).isequal(Counter(a=1))
+            True
+            >>> Counter(a=1, b=0) == Counter(a=1)
+            False
+
+        Logically equivalent to:  +self == +other
+        '''
+        if not isinstance(other, Counter):
+            other = Counter(other)
+        for elem in set(self) | set(other):
+            left = self[elem]
+            right = other[elem]
+            if left == right:
+                continue
+            if left < 0:
+                left = 0
+            if right < 0:
+                right = 0
+            if left != right:
+                return False
+        return True
+
+    def issubset(self, other):
+        '''True if the counts in self are less than or equal to those in other.
+
+        Negative or missing counts are treated as zero.
+
+        Logically equivalent to:  not self - (+other)
+        '''
+        if not isinstance(other, Counter):
+            other = Counter(other)
+        for elem, count in self.items():
+            other_count = other[elem]
+            if other_count < 0:
+                other_count = 0
+            if count > other_count:
+                return False
+        return True
+
+    def issuperset(self, other):
+        '''True if the counts in self are greater than or equal to those in other.
+
+        Negative or missing counts are treated as zero.
+
+        Logically equivalent to:  not other - (+self)
+        '''
+        if not isinstance(other, Counter):
+            other = Counter(other)
+        return other.issubset(self)
+
+    def isdisjoint(self, other):
+        '''True if none of the elements in self overlap with those in other.
+
+        Negative or missing counts are ignored.
+
+        Logically equivalent to:  not (+self) & (+other)
+        '''
+        if not isinstance(other, Counter):
+            other = Counter(other)
+        for elem, count in self.items():
+            if count > 0 and other[elem] > 0:
+                return False
+        return True
+
+    # Rich comparison operators for multiset subset and superset tests
+    # have been deliberately omitted due to semantic conflicts with the
+    # existing inherited dict equality method.  Subset and superset
+    # semantics ignore zero counts and require that p⊆q ∧ p⊇q ⇔ p=q;
+    # however, that would not be the case for p=Counter(a=1, b=0)
+    # and q=Counter(a=1) where the dictionaries are not equal.
+
+    def _omitted(self, other):
+        raise TypeError(
+            'Rich comparison operators have been deliberately omitted. '
+            'Use the isequal(), issubset(), and issuperset() methods instead.')
+
+    __lt__ = __le__ = __gt__ = __ge__ = __lt__ = _omitted
+
 
 ########################################################################
 ###  ChainMap
diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py
index a8d3337..8d80e88 100644
--- a/Lib/test/test_collections.py
+++ b/Lib/test/test_collections.py
@@ -7,6 +7,7 @@ import inspect
 import operator
 import pickle
 from random import choice, randrange
+from itertools import product, chain, combinations
 import string
 import sys
 from test import support
@@ -2219,6 +2220,64 @@ class TestCounter(unittest.TestCase):
         self.assertTrue(c.called)
         self.assertEqual(dict(c), {'a': 5, 'b': 2, 'c': 1, 'd': 1, 'r':2 })
 
+    def test_multiset_operations_equivalent_to_set_operations(self):
+        # When the multiplicities are all zero or one, multiset operations
+        # are guaranteed to be equivalent to the corresponding operations
+        # for regular sets.
+        s = list(product(('a', 'b', 'c'), range(2)))
+        powerset = chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
+        counters = [Counter(dict(groups)) for groups in powerset]
+        for cp, cq in product(counters, repeat=2):
+            sp = set(cp.elements())
+            sq = set(cq.elements())
+            self.assertEqual(set(cp + cq), sp | sq)
+            self.assertEqual(set(cp - cq), sp - sq)
+            self.assertEqual(set(cp | cq), sp | sq)
+            self.assertEqual(set(cp & cq), sp & sq)
+            self.assertEqual(cp.isequal(cq), sp == sq)
+            self.assertEqual(cp.issubset(cq), sp.issubset(sq))
+            self.assertEqual(cp.issuperset(cq), sp.issuperset(sq))
+            self.assertEqual(cp.isdisjoint(cq), sp.isdisjoint(sq))
+
+    def test_multiset_equal(self):
+        self.assertTrue(Counter(a=3, b=2, c=0).isequal('ababa'))
+        self.assertFalse(Counter(a=3, b=2).isequal('babab'))
+
+    def test_multiset_subset(self):
+        self.assertTrue(Counter(a=3, b=2, c=0).issubset('ababa'))
+        self.assertFalse(Counter(a=3, b=2).issubset('babab'))
+
+    def test_multiset_superset(self):
+        self.assertTrue(Counter(a=3, b=2, c=0).issuperset('aab'))
+        self.assertFalse(Counter(a=3, b=2, c=0).issuperset('aabd'))
+
+    def test_multiset_disjoint(self):
+        self.assertTrue(Counter(a=3, b=2, c=0).isdisjoint('cde'))
+        self.assertFalse(Counter(a=3, b=2, c=0).isdisjoint('bcd'))
+
+    def test_multiset_predicates_with_negative_counts(self):
+        # Multiset predicates run on the output of the elements() method,
+        # meaning that zero counts and negative counts are ignored.
+        # The tests below confirm that we get that same results as the
+        # tests above, even after a negative count has been included
+        # in either *self* or *other*.
+        self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isequal('ababa'))
+        self.assertFalse(Counter(a=3, b=2, d=-1).isequal('babab'))
+        self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issubset('ababa'))
+        self.assertFalse(Counter(a=3, b=2, d=-1).issubset('babab'))
+        self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issuperset('aab'))
+        self.assertFalse(Counter(a=3, b=2, c=0, d=-1).issuperset('aabd'))
+        self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isdisjoint('cde'))
+        self.assertFalse(Counter(a=3, b=2, c=0, d=-1).isdisjoint('bcd'))
+        self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isequal(Counter(a=3, b=2, c=-1)))
+        self.assertFalse(Counter(a=3, b=2, d=-1).isequal(Counter(a=2, b=3, c=-1)))
+        self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issubset(Counter(a=3, b=2, c=-1)))
+        self.assertFalse(Counter(a=3, b=2, d=-1).issubset(Counter(a=2, b=3, c=-1)))
+        self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issuperset(Counter(a=2, b=1, c=-1)))
+        self.assertFalse(Counter(a=3, b=2, c=0, d=-1).issuperset(Counter(a=2, b=1, c=-1, d=1)))
+        self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isdisjoint(Counter(c=1, d=2, e=3, f=-1)))
+        self.assertFalse(Counter(a=3, b=2, c=0, d=-1).isdisjoint(Counter(b=1, c=1, d=1, e=-1)))
+
 
 ################################################################################
 ### Run tests
diff --git a/Misc/NEWS.d/next/Library/2020-05-23-18-24-13.bpo-22533.k64XGo.rst b/Misc/NEWS.d/next/Library/2020-05-23-18-24-13.bpo-22533.k64XGo.rst
new file mode 100644
index 0000000..737162f
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-05-23-18-24-13.bpo-22533.k64XGo.rst
@@ -0,0 +1,2 @@
+Add multiset comparison methods to collections.Counter(): isequal(),
+issubset(), issuperset(), and isdisjoint().
-- 
cgit v0.12