From 60398512c86c5535edd817c99ccb50453b3b0471 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 28 May 2020 08:35:46 -0700 Subject: bpo-40755: Add missing multiset operations to Counter() (GH-20339) --- Doc/library/collections.rst | 41 ++++++++ Lib/collections/__init__.py | 110 +++++++++++++++++++-- Lib/test/test_collections.py | 59 +++++++++++ .../2020-05-23-18-24-13.bpo-22533.k64XGo.rst | 2 + 4 files changed, 206 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-05-23-18-24-13.bpo-22533.k64XGo.rst diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 549ac1b..ea2b420 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -290,6 +290,47 @@ For example:: >>> sorted(c.elements()) ['a', 'a', 'a', 'a', 'b', 'b'] + .. method:: isdisjoint(other) + + True if none of the elements in *self* overlap with those in *other*. + Negative or missing counts are ignored. + Logically equivalent to: ``not (+self) & (+other)`` + + .. versionadded:: 3.10 + + .. method:: isequal(other) + + Test whether counts agree exactly. + Negative or missing counts are treated as zero. + + This method works differently than the inherited :meth:`__eq__` method + which treats negative or missing counts as distinct from zero:: + + >>> Counter(a=1, b=0).isequal(Counter(a=1)) + True + >>> Counter(a=1, b=0) == Counter(a=1) + False + + Logically equivalent to: ``+self == +other`` + + .. versionadded:: 3.10 + + .. method:: issubset(other) + + True if the counts in *self* are less than or equal to those in *other*. + Negative or missing counts are treated as zero. + Logically equivalent to: ``not self - (+other)`` + + .. versionadded:: 3.10 + + .. method:: issuperset(other) + + True if the counts in *self* are greater than or equal to those in *other*. + Negative or missing counts are treated as zero. + Logically equivalent to: ``not other - (+self)`` + + .. versionadded:: 3.10 + .. method:: most_common([n]) Return a list of the *n* most common elements and their counts from the diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index 011a0c1..55fb46c 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -710,12 +710,24 @@ class Counter(dict): # To strip negative and zero counts, add-in an empty counter: # c += Counter() # - # Rich comparison operators for multiset subset and superset tests - # are deliberately omitted due to semantic conflicts with the - # existing inherited dict equality method. Subset and superset - # semantics ignore zero counts and require that p≤q ∧ p≥q → p=q; - # however, that would not be the case for p=Counter(a=1, b=0) - # and q=Counter(a=1) where the dictionaries are not equal. + # When the multiplicities are all zero or one, multiset operations + # are guaranteed to be equivalent to the corresponding operations + # for regular sets. + # Given counter multisets such as: + # cp = Counter(a=1, b=0, c=1) + # cq = Counter(c=1, d=0, e=1) + # The corresponding regular sets would be: + # sp = {'a', 'c'} + # sq = {'c', 'e'} + # All of the following relations would hold: + # set(cp + cq) == sp | sq + # set(cp - cq) == sp - sq + # set(cp | cq) == sp | sq + # set(cp & cq) == sp & sq + # cp.isequal(cq) == (sp == sq) + # cp.issubset(cq) == sp.issubset(sq) + # cp.issuperset(cq) == sp.issuperset(sq) + # cp.isdisjoint(cq) == sp.isdisjoint(sq) def __add__(self, other): '''Add counts from two counters. @@ -874,6 +886,92 @@ class Counter(dict): self[elem] = other_count return self._keep_positive() + def isequal(self, other): + ''' Test whether counts agree exactly. + + Negative or missing counts are treated as zero. + + This is different than the inherited __eq__() method which + treats negative or missing counts as distinct from zero: + + >>> Counter(a=1, b=0).isequal(Counter(a=1)) + True + >>> Counter(a=1, b=0) == Counter(a=1) + False + + Logically equivalent to: +self == +other + ''' + if not isinstance(other, Counter): + other = Counter(other) + for elem in set(self) | set(other): + left = self[elem] + right = other[elem] + if left == right: + continue + if left < 0: + left = 0 + if right < 0: + right = 0 + if left != right: + return False + return True + + def issubset(self, other): + '''True if the counts in self are less than or equal to those in other. + + Negative or missing counts are treated as zero. + + Logically equivalent to: not self - (+other) + ''' + if not isinstance(other, Counter): + other = Counter(other) + for elem, count in self.items(): + other_count = other[elem] + if other_count < 0: + other_count = 0 + if count > other_count: + return False + return True + + def issuperset(self, other): + '''True if the counts in self are greater than or equal to those in other. + + Negative or missing counts are treated as zero. + + Logically equivalent to: not other - (+self) + ''' + if not isinstance(other, Counter): + other = Counter(other) + return other.issubset(self) + + def isdisjoint(self, other): + '''True if none of the elements in self overlap with those in other. + + Negative or missing counts are ignored. + + Logically equivalent to: not (+self) & (+other) + ''' + if not isinstance(other, Counter): + other = Counter(other) + for elem, count in self.items(): + if count > 0 and other[elem] > 0: + return False + return True + + # Rich comparison operators for multiset subset and superset tests + # have been deliberately omitted due to semantic conflicts with the + # existing inherited dict equality method. Subset and superset + # semantics ignore zero counts and require that p⊆q ∧ p⊇q ⇔ p=q; + # however, that would not be the case for p=Counter(a=1, b=0) + # and q=Counter(a=1) where the dictionaries are not equal. + + def _omitted(self, other): + raise TypeError( + 'Rich comparison operators have been deliberately omitted. ' + 'Use the isequal(), issubset(), and issuperset() methods instead.') + + __lt__ = __le__ = __gt__ = __ge__ = __lt__ = _omitted + ######################################################################## ### ChainMap diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py index a8d3337..8d80e88 100644 --- a/Lib/test/test_collections.py +++ b/Lib/test/test_collections.py @@ -7,6 +7,7 @@ import inspect import operator import pickle from random import choice, randrange +from itertools import product, chain, combinations import string import sys from test import support @@ -2219,6 +2220,64 @@ class TestCounter(unittest.TestCase): self.assertTrue(c.called) self.assertEqual(dict(c), {'a': 5, 'b': 2, 'c': 1, 'd': 1, 'r':2 }) + def test_multiset_operations_equivalent_to_set_operations(self): + # When the multiplicities are all zero or one, multiset operations + # are guaranteed to be equivalent to the corresponding operations + # for regular sets. + s = list(product(('a', 'b', 'c'), range(2))) + powerset = chain.from_iterable(combinations(s, r) for r in range(len(s)+1)) + counters = [Counter(dict(groups)) for groups in powerset] + for cp, cq in product(counters, repeat=2): + sp = set(cp.elements()) + sq = set(cq.elements()) + self.assertEqual(set(cp + cq), sp | sq) + self.assertEqual(set(cp - cq), sp - sq) + self.assertEqual(set(cp | cq), sp | sq) + self.assertEqual(set(cp & cq), sp & sq) + self.assertEqual(cp.isequal(cq), sp == sq) + self.assertEqual(cp.issubset(cq), sp.issubset(sq)) + self.assertEqual(cp.issuperset(cq), sp.issuperset(sq)) + self.assertEqual(cp.isdisjoint(cq), sp.isdisjoint(sq)) + + def test_multiset_equal(self): + self.assertTrue(Counter(a=3, b=2, c=0).isequal('ababa')) + self.assertFalse(Counter(a=3, b=2).isequal('babab')) + + def test_multiset_subset(self): + self.assertTrue(Counter(a=3, b=2, c=0).issubset('ababa')) + self.assertFalse(Counter(a=3, b=2).issubset('babab')) + + def test_multiset_superset(self): + self.assertTrue(Counter(a=3, b=2, c=0).issuperset('aab')) + self.assertFalse(Counter(a=3, b=2, c=0).issuperset('aabd')) + + def test_multiset_disjoint(self): + self.assertTrue(Counter(a=3, b=2, c=0).isdisjoint('cde')) + self.assertFalse(Counter(a=3, b=2, c=0).isdisjoint('bcd')) + + def test_multiset_predicates_with_negative_counts(self): + # Multiset predicates run on the output of the elements() method, + # meaning that zero counts and negative counts are ignored. + # The tests below confirm that we get that same results as the + # tests above, even after a negative count has been included + # in either *self* or *other*. + self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isequal('ababa')) + self.assertFalse(Counter(a=3, b=2, d=-1).isequal('babab')) + self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issubset('ababa')) + self.assertFalse(Counter(a=3, b=2, d=-1).issubset('babab')) + self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issuperset('aab')) + self.assertFalse(Counter(a=3, b=2, c=0, d=-1).issuperset('aabd')) + self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isdisjoint('cde')) + self.assertFalse(Counter(a=3, b=2, c=0, d=-1).isdisjoint('bcd')) + self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isequal(Counter(a=3, b=2, c=-1))) + self.assertFalse(Counter(a=3, b=2, d=-1).isequal(Counter(a=2, b=3, c=-1))) + self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issubset(Counter(a=3, b=2, c=-1))) + self.assertFalse(Counter(a=3, b=2, d=-1).issubset(Counter(a=2, b=3, c=-1))) + self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issuperset(Counter(a=2, b=1, c=-1))) + self.assertFalse(Counter(a=3, b=2, c=0, d=-1).issuperset(Counter(a=2, b=1, c=-1, d=1))) + self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isdisjoint(Counter(c=1, d=2, e=3, f=-1))) + self.assertFalse(Counter(a=3, b=2, c=0, d=-1).isdisjoint(Counter(b=1, c=1, d=1, e=-1))) + ################################################################################ ### Run tests diff --git a/Misc/NEWS.d/next/Library/2020-05-23-18-24-13.bpo-22533.k64XGo.rst b/Misc/NEWS.d/next/Library/2020-05-23-18-24-13.bpo-22533.k64XGo.rst new file mode 100644 index 0000000..737162f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-05-23-18-24-13.bpo-22533.k64XGo.rst @@ -0,0 +1,2 @@ +Add multiset comparison methods to collections.Counter(): isequal(), +issubset(), issuperset(), and isdisjoint(). -- cgit v0.12