From b7d79b4f36787874128c439d38397fe95c48429b Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 31 May 2020 14:57:42 -0700 Subject: bpo-40755: Add rich comparisons to Counter (GH-20548) --- Doc/library/collections.rst | 54 +++------ Lib/collections/__init__.py | 122 ++++++--------------- Lib/test/test_collections.py | 92 +++++----------- .../2020-05-23-18-24-13.bpo-22533.k64XGo.rst | 2 - .../2020-05-30-18-48-58.bpo-40755.IyOe2J.rst | 1 + 5 files changed, 76 insertions(+), 195 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2020-05-23-18-24-13.bpo-22533.k64XGo.rst create mode 100644 Misc/NEWS.d/next/Library/2020-05-30-18-48-58.bpo-40755.IyOe2J.rst diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index ea2b420..f538da5 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -290,47 +290,6 @@ For example:: >>> sorted(c.elements()) ['a', 'a', 'a', 'a', 'b', 'b'] - .. method:: isdisjoint(other) - - True if none of the elements in *self* overlap with those in *other*. - Negative or missing counts are ignored. - Logically equivalent to: ``not (+self) & (+other)`` - - .. versionadded:: 3.10 - - .. method:: isequal(other) - - Test whether counts agree exactly. - Negative or missing counts are treated as zero. - - This method works differently than the inherited :meth:`__eq__` method - which treats negative or missing counts as distinct from zero:: - - >>> Counter(a=1, b=0).isequal(Counter(a=1)) - True - >>> Counter(a=1, b=0) == Counter(a=1) - False - - Logically equivalent to: ``+self == +other`` - - .. versionadded:: 3.10 - - .. method:: issubset(other) - - True if the counts in *self* are less than or equal to those in *other*. - Negative or missing counts are treated as zero. - Logically equivalent to: ``not self - (+other)`` - - .. versionadded:: 3.10 - - .. method:: issuperset(other) - - True if the counts in *self* are greater than or equal to those in *other*. - Negative or missing counts are treated as zero. - Logically equivalent to: ``not other - (+self)`` - - .. versionadded:: 3.10 - .. method:: most_common([n]) Return a list of the *n* most common elements and their counts from the @@ -369,6 +328,19 @@ For example:: instead of replacing them. Also, the *iterable* is expected to be a sequence of elements, not a sequence of ``(key, value)`` pairs. +Counters support rich comparison operators for equality, subset, and +superset relationships: ``==``, ``!=``, ``<``, ``<=``, ``>``, ``>=``. +All of those tests treat missing elements as having zero counts so that +``Counter(a=1) == Counter(a=1, b=0)`` returns true. + +.. versionadded:: 3.10 + Rich comparison operations we were added + +.. versionchanged:: 3.10 + In equality tests, missing elements are treated as having zero counts. + Formerly, ``Counter(a=3)`` and ``Counter(a=3, b=0)`` were considered + distinct. + Common patterns for working with :class:`Counter` objects:: sum(c.values()) # total of all counts diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index 55fb46c..2acf672 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -691,6 +691,42 @@ class Counter(dict): if elem in self: super().__delitem__(elem) + def __eq__(self, other): + 'True if all counts agree. Missing counts are treated as zero.' + if not isinstance(other, Counter): + return NotImplemented + return all(self[e] == other[e] for c in (self, other) for e in c) + + def __ne__(self, other): + 'True if any counts disagree. Missing counts are treated as zero.' + if not isinstance(other, Counter): + return NotImplemented + return not self == other + + def __le__(self, other): + 'True if all counts in self are a subset of those in other.' + if not isinstance(other, Counter): + return NotImplemented + return all(self[e] <= other[e] for c in (self, other) for e in c) + + def __lt__(self, other): + 'True if all counts in self are a proper subset of those in other.' + if not isinstance(other, Counter): + return NotImplemented + return self <= other and self != other + + def __ge__(self, other): + 'True if all counts in self are a superset of those in other.' + if not isinstance(other, Counter): + return NotImplemented + return all(self[e] >= other[e] for c in (self, other) for e in c) + + def __gt__(self, other): + 'True if all counts in self are a proper superset of those in other.' + if not isinstance(other, Counter): + return NotImplemented + return self >= other and self != other + def __repr__(self): if not self: return '%s()' % self.__class__.__name__ @@ -886,92 +922,6 @@ class Counter(dict): self[elem] = other_count return self._keep_positive() - def isequal(self, other): - ''' Test whether counts agree exactly. - - Negative or missing counts are treated as zero. - - This is different than the inherited __eq__() method which - treats negative or missing counts as distinct from zero: - - >>> Counter(a=1, b=0).isequal(Counter(a=1)) - True - >>> Counter(a=1, b=0) == Counter(a=1) - False - - Logically equivalent to: +self == +other - ''' - if not isinstance(other, Counter): - other = Counter(other) - for elem in set(self) | set(other): - left = self[elem] - right = other[elem] - if left == right: - continue - if left < 0: - left = 0 - if right < 0: - right = 0 - if left != right: - return False - return True - - def issubset(self, other): - '''True if the counts in self are less than or equal to those in other. - - Negative or missing counts are treated as zero. - - Logically equivalent to: not self - (+other) - ''' - if not isinstance(other, Counter): - other = Counter(other) - for elem, count in self.items(): - other_count = other[elem] - if other_count < 0: - other_count = 0 - if count > other_count: - return False - return True - - def issuperset(self, other): - '''True if the counts in self are greater than or equal to those in other. - - Negative or missing counts are treated as zero. - - Logically equivalent to: not other - (+self) - ''' - if not isinstance(other, Counter): - other = Counter(other) - return other.issubset(self) - - def isdisjoint(self, other): - '''True if none of the elements in self overlap with those in other. - - Negative or missing counts are ignored. - - Logically equivalent to: not (+self) & (+other) - ''' - if not isinstance(other, Counter): - other = Counter(other) - for elem, count in self.items(): - if count > 0 and other[elem] > 0: - return False - return True - - # Rich comparison operators for multiset subset and superset tests - # have been deliberately omitted due to semantic conflicts with the - # existing inherited dict equality method. Subset and superset - # semantics ignore zero counts and require that p⊆q ∧ p⊇q ⇔ p=q; - # however, that would not be the case for p=Counter(a=1, b=0) - # and q=Counter(a=1) where the dictionaries are not equal. - - def _omitted(self, other): - raise TypeError( - 'Rich comparison operators have been deliberately omitted. ' - 'Use the isequal(), issubset(), and issuperset() methods instead.') - - __lt__ = __le__ = __gt__ = __ge__ = __lt__ = _omitted - ######################################################################## ### ChainMap diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py index 8d80e88..7c7f865 100644 --- a/Lib/test/test_collections.py +++ b/Lib/test/test_collections.py @@ -2123,29 +2123,6 @@ class TestCounter(unittest.TestCase): set_result = setop(set(p.elements()), set(q.elements())) self.assertEqual(counter_result, dict.fromkeys(set_result, 1)) - def test_subset_superset_not_implemented(self): - # Verify that multiset comparison operations are not implemented. - - # These operations were intentionally omitted because multiset - # comparison semantics conflict with existing dict equality semantics. - - # For multisets, we would expect that if p<=q and p>=q are both true, - # then p==q. However, dict equality semantics require that p!=q when - # one of sets contains an element with a zero count and the other - # doesn't. - - p = Counter(a=1, b=0) - q = Counter(a=1, c=0) - self.assertNotEqual(p, q) - with self.assertRaises(TypeError): - p < q - with self.assertRaises(TypeError): - p <= q - with self.assertRaises(TypeError): - p > q - with self.assertRaises(TypeError): - p >= q - def test_inplace_operations(self): elements = 'abcd' for i in range(1000): @@ -2234,49 +2211,32 @@ class TestCounter(unittest.TestCase): self.assertEqual(set(cp - cq), sp - sq) self.assertEqual(set(cp | cq), sp | sq) self.assertEqual(set(cp & cq), sp & sq) - self.assertEqual(cp.isequal(cq), sp == sq) - self.assertEqual(cp.issubset(cq), sp.issubset(sq)) - self.assertEqual(cp.issuperset(cq), sp.issuperset(sq)) - self.assertEqual(cp.isdisjoint(cq), sp.isdisjoint(sq)) - - def test_multiset_equal(self): - self.assertTrue(Counter(a=3, b=2, c=0).isequal('ababa')) - self.assertFalse(Counter(a=3, b=2).isequal('babab')) - - def test_multiset_subset(self): - self.assertTrue(Counter(a=3, b=2, c=0).issubset('ababa')) - self.assertFalse(Counter(a=3, b=2).issubset('babab')) - - def test_multiset_superset(self): - self.assertTrue(Counter(a=3, b=2, c=0).issuperset('aab')) - self.assertFalse(Counter(a=3, b=2, c=0).issuperset('aabd')) - - def test_multiset_disjoint(self): - self.assertTrue(Counter(a=3, b=2, c=0).isdisjoint('cde')) - self.assertFalse(Counter(a=3, b=2, c=0).isdisjoint('bcd')) - - def test_multiset_predicates_with_negative_counts(self): - # Multiset predicates run on the output of the elements() method, - # meaning that zero counts and negative counts are ignored. - # The tests below confirm that we get that same results as the - # tests above, even after a negative count has been included - # in either *self* or *other*. - self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isequal('ababa')) - self.assertFalse(Counter(a=3, b=2, d=-1).isequal('babab')) - self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issubset('ababa')) - self.assertFalse(Counter(a=3, b=2, d=-1).issubset('babab')) - self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issuperset('aab')) - self.assertFalse(Counter(a=3, b=2, c=0, d=-1).issuperset('aabd')) - self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isdisjoint('cde')) - self.assertFalse(Counter(a=3, b=2, c=0, d=-1).isdisjoint('bcd')) - self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isequal(Counter(a=3, b=2, c=-1))) - self.assertFalse(Counter(a=3, b=2, d=-1).isequal(Counter(a=2, b=3, c=-1))) - self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issubset(Counter(a=3, b=2, c=-1))) - self.assertFalse(Counter(a=3, b=2, d=-1).issubset(Counter(a=2, b=3, c=-1))) - self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issuperset(Counter(a=2, b=1, c=-1))) - self.assertFalse(Counter(a=3, b=2, c=0, d=-1).issuperset(Counter(a=2, b=1, c=-1, d=1))) - self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isdisjoint(Counter(c=1, d=2, e=3, f=-1))) - self.assertFalse(Counter(a=3, b=2, c=0, d=-1).isdisjoint(Counter(b=1, c=1, d=1, e=-1))) + self.assertEqual(cp == cq, sp == sq) + self.assertEqual(cp != cq, sp != sq) + self.assertEqual(cp <= cq, sp <= sq) + self.assertEqual(cp >= cq, sp >= sq) + self.assertEqual(cp < cq, sp < sq) + self.assertEqual(cp > cq, sp > sq) + + def test_eq(self): + self.assertEqual(Counter(a=3, b=2, c=0), Counter('ababa')) + self.assertNotEqual(Counter(a=3, b=2), Counter('babab')) + + def test_le(self): + self.assertTrue(Counter(a=3, b=2, c=0) <= Counter('ababa')) + self.assertFalse(Counter(a=3, b=2) <= Counter('babab')) + + def test_lt(self): + self.assertTrue(Counter(a=3, b=1, c=0) < Counter('ababa')) + self.assertFalse(Counter(a=3, b=2, c=0) < Counter('ababa')) + + def test_ge(self): + self.assertTrue(Counter(a=2, b=1, c=0) >= Counter('aab')) + self.assertFalse(Counter(a=3, b=2, c=0) >= Counter('aabd')) + + def test_gt(self): + self.assertTrue(Counter(a=3, b=2, c=0) > Counter('aab')) + self.assertFalse(Counter(a=2, b=1, c=0) > Counter('aab')) ################################################################################ diff --git a/Misc/NEWS.d/next/Library/2020-05-23-18-24-13.bpo-22533.k64XGo.rst b/Misc/NEWS.d/next/Library/2020-05-23-18-24-13.bpo-22533.k64XGo.rst deleted file mode 100644 index 737162f..0000000 --- a/Misc/NEWS.d/next/Library/2020-05-23-18-24-13.bpo-22533.k64XGo.rst +++ /dev/null @@ -1,2 +0,0 @@ -Add multiset comparison methods to collections.Counter(): isequal(), -issubset(), issuperset(), and isdisjoint(). diff --git a/Misc/NEWS.d/next/Library/2020-05-30-18-48-58.bpo-40755.IyOe2J.rst b/Misc/NEWS.d/next/Library/2020-05-30-18-48-58.bpo-40755.IyOe2J.rst new file mode 100644 index 0000000..be5653e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-05-30-18-48-58.bpo-40755.IyOe2J.rst @@ -0,0 +1 @@ +Add rich comparisons to collections.Counter(). -- cgit v0.12