Forward port r68792 and r68789 putting Counter in __all__ and adding Counter buildouts.

author: Raymond Hettinger <python@rcn.com> 2009-01-20 03:41:22 (GMT)
committer: Raymond Hettinger <python@rcn.com> 2009-01-20 03:41:22 (GMT)
commit: 4d2073a0731f14be65e6a033685923bb1effe33d (patch)
tree: 0ef0d6f7a036fbf7f8f3c6c53bcad1f43c1e5b6c
parent: afccd63ac9541630953cd4e59a421696d3869311 (diff)
download: cpython-4d2073a0731f14be65e6a033685923bb1effe33d.zip
cpython-4d2073a0731f14be65e6a033685923bb1effe33d.tar.gz
cpython-4d2073a0731f14be65e6a033685923bb1effe33d.tar.bz2
3 files changed, 169 insertions, 14 deletions
diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst
index 816d814..0984751 100644
--- a/Doc/library/collections.rst
+++ b/Doc/library/collections.rst
@@ -177,6 +177,7 @@ For example::
        >>> c = Counter()                            # a new, empty counter
        >>> c = Counter('gallahad')                  # a new counter from an iterable
        >>> c = Counter({'red': 4, 'blue': 2})       # a new counter from a mapping
+       >>> c = Counter(spam=8, eggs=1)              # a new counter from keyword args
 
    The returned object has a dictionary style interface except that it returns
    a zero count for missing items (instead of raising a :exc:`KeyError` like a
@@ -207,7 +208,7 @@ For example::
       Elements are returned in arbitrary order.  If an element's count has been
       set to zero or a negative number, :meth:`elements` will ignore it.
 
-            >>> c = Counter({'a': 4, 'b': 2, 'd': 0, 'e': -2})
+            >>> c =  Counter(a=4, b=2, c=0, d=-2)
             >>> list(c.elements())
             ['a', 'a', 'a', 'a', 'b', 'b']
 
@@ -232,10 +233,10 @@ For example::
 
    .. method:: update([iterable-or-mapping])
 
-       Like :meth:`dict.update` but adds-in counts instead of replacing them.
-
        Elements are counted from an *iterable* or added-in from another
-       *mapping* (or counter)::
+       *mapping* (or counter).  Like :meth:`dict.update` but adds-in counts
+       instead of replacing them, and the *iterable* is expected to be a
+       sequence of elements, not a sequence of ``(key, value)`` pairs::
 
             >>> c = Counter('which')
             >>> c.update('witch')           # add elements from another iterable
@@ -255,6 +256,34 @@ Common patterns for working with :class:`Counter` objects::
     Counter(dict(list_of_pairs))  # convert from a list of (elem, cnt) pairs
     c.most_common()[:-n:-1]       # n least common elements
 
+Several multiset mathematical operations are provided for combining
+:class:`Counter` objects.  Multisets are like regular sets but allowed to
+contain repeated elements (with counts of one or more).  Addition and
+subtraction combine counters by adding or subtracting the counts of
+corresponding elements.  Intersection and union return the minimum and maximum
+of corresponding counts::
+
+    >>> c = Counter('a': 3, 'b': 1})
+    >>> d = Counter({'a': 1, 'b': 2})
+    >>> c + d                           # add two counters together:  c[x] + d[x]
+    Counter({'a': 4, 'b': 3})
+    >>> c - d                           # subtract (keeping only positive counts)
+    Counter({'a': 2})
+    >>> c & d                           # interection:  min(c[x], d[x])
+    Counter({'a': 1, 'b': 1})
+    >>> c | d                           # union:  max(c[x], d[x])
+    Counter({'a': 3, 'b': 2})
+
+All four multiset operations produce only positive counts (negative and zero
+results are skipped). If inputs include negative counts, addition will sum
+both counts and then exclude non-positive results.  The other three operations
+are undefined for negative inputs::
+
+    >>> e = Counter(a=8, b=-2, c=0)
+    >>> e += Counter()                  # remove zero and negative counts
+    >>> e
+    Counter({'a': 8})
+
 **References**:
 
 * Wikipedia entry for `Multisets <http://en.wikipedia.org/wiki/Multiset>`_
diff --git a/Lib/collections.py b/Lib/collections.py
index 6c1abce..45558f9 100644
--- a/Lib/collections.py
+++ b/Lib/collections.py
@@ -1,5 +1,5 @@
 __all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
-            'UserString']
+            'UserString', 'Counter']
 # For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
 # They should however be considered an integral part of collections.py.
 from _abcoll import *
@@ -171,7 +171,7 @@ class Counter(dict):
     #   http://code.activestate.com/recipes/259174/
     #   Knuth, TAOCP Vol. II section 4.6.3
 
-    def __init__(self, iterable=None):
+    def __init__(self, iterable=None, **kwds):
         '''Create a new, empty Counter object.  And if given, count elements
         from an input iterable.  Or, initialize the count from another mapping
         of elements to their counts.
@@ -179,9 +179,10 @@ class Counter(dict):
         >>> c = Counter()                           # a new, empty counter
         >>> c = Counter('gallahad')                 # a new counter from an iterable
         >>> c = Counter({'a': 4, 'b': 2})           # a new counter from a mapping
+        >>> c = Counter(a=4, b=2)                   # a new counter from keyword args
 
         '''
-        self.update(iterable)
+        self.update(iterable, **kwds)
 
     def __missing__(self, key):
         'The count of elements not in the Counter is zero.'
@@ -232,7 +233,7 @@ class Counter(dict):
         raise NotImplementedError(
             'Counter.fromkeys() is undefined.  Use Counter(iterable) instead.')
 
-    def update(self, iterable=None):
+    def update(self, iterable=None, **kwds):
         '''Like dict.update() but add counts instead of replacing them.
 
         Source can be an iterable, a dictionary, or another Counter instance.
@@ -249,10 +250,8 @@ class Counter(dict):
         # replace behavior results in the some of original untouched counts
         # being mixed-in with all of the other counts for a mismash that
         # doesn't have a straight-forward interpretation in most counting
-        # contexts.  Instead, we look to Knuth for suggested operations on
-        # multisets and implement the union-add operation discussed in
-        # TAOCP Volume II section 4.6.3 exercise 19.  The Wikipedia entry for
-        # multisets calls that operation a sum or join.
+        # contexts.  Instead, we implement straight-addition.  Both the inputs
+        # and outputs are allowed to contain zero and negative counts.
 
         if iterable is not None:
             if isinstance(iterable, Mapping):
@@ -261,17 +260,102 @@ class Counter(dict):
             else:
                 for elem in iterable:
                     self[elem] += 1
+        if kwds:
+            self.update(kwds)
 
     def copy(self):
         'Like dict.copy() but returns a Counter instance instead of a dict.'
         return Counter(self)
 
+    def __delitem__(self, elem):
+        'Like dict.__delitem__() but does not raise KeyError for missing values.'
+        if elem in self:
+            dict.__delitem__(self, elem)
+
     def __repr__(self):
         if not self:
             return '%s()' % self.__class__.__name__
         items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
         return '%s({%s})' % (self.__class__.__name__, items)
 
+    # Multiset-style mathematical operations discussed in:
+    #       Knuth TAOCP Volume II section 4.6.3 exercise 19
+    #       and at http://en.wikipedia.org/wiki/Multiset
+    #
+    # Results are undefined when inputs contain negative counts.
+    # Outputs guaranteed to only include positive counts.
+    #
+    # To strip negative and zero counts, add-in an empty counter:
+    #       c += Counter()
+
+    def __add__(self, other):
+        '''Add counts from two counters.
+
+        >>> Counter('abbb') + Counter('bcc')
+        Counter({'b': 4, 'c': 2, 'a': 1})
+
+        '''
+        if not isinstance(other, Counter):
+            return NotImplemented
+        result = Counter()
+        for elem in set(self) | set(other):
+            newcount = self[elem] + other[elem]
+            if newcount > 0:
+                result[elem] = newcount
+        return result
+
+    def __sub__(self, other):
+        ''' Subtract count, but keep only results with positive counts.
+
+        >>> Counter('abbbc') - Counter('bccd')
+        Counter({'b': 2, 'a': 1})
+
+        '''
+        if not isinstance(other, Counter):
+            return NotImplemented
+        result = Counter()
+        for elem, count in self.items():
+            newcount = count - other[elem]
+            if newcount > 0:
+                result[elem] = newcount
+        return result
+
+    def __or__(self, other):
+        '''Union is the maximum of value in either of the input counters.
+
+        >>> Counter('abbb') | Counter('bcc')
+        Counter({'b': 3, 'c': 2, 'a': 1})
+
+        '''
+        if not isinstance(other, Counter):
+            return NotImplemented
+        _max = max
+        result = Counter()
+        for elem in set(self) | set(other):
+            newcount = _max(self[elem], other[elem])
+            if newcount > 0:
+                result[elem] = newcount
+        return result
+
+    def __and__(self, other):
+        ''' Intersection is the minimum of corresponding counts.
+
+        >>> Counter('abbb') & Counter('bcc')
+        Counter({'b': 1})
+
+        '''
+        if not isinstance(other, Counter):
+            return NotImplemented
+        _min = min
+        result = Counter()
+        if len(self) < len(other):
+            self, other = other, self
+        for elem in filter(self.__contains__, other):
+            newcount = _min(self[elem], other[elem])
+            if newcount > 0:
+                result[elem] = newcount
+        return result
+
 
 ################################################################################
 ### UserDict
diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py
index 153059a..6630c4e 100644
--- a/Lib/test/test_collections.py
+++ b/Lib/test/test_collections.py
@@ -4,6 +4,8 @@ import unittest, doctest
 from test import support
 from collections import namedtuple, Counter, Mapping
 import pickle, copy
+from random import randrange
+import operator
 from collections import Hashable, Iterable, Iterator
 from collections import Sized, Container, Callable
 from collections import Set, MutableSet
@@ -361,6 +363,8 @@ class TestCounter(unittest.TestCase):
 
     def test_basics(self):
         c = Counter('abcaba')
+        self.assertEqual(c, Counter({'a':3 , 'b': 2, 'c': 1}))
+        self.assertEqual(c, Counter(a=3, b=2, c=1))
         self.assert_(isinstance(c, dict))
         self.assert_(isinstance(c, Mapping))
         self.assert_(issubclass(Counter, dict))
@@ -388,6 +392,7 @@ class TestCounter(unittest.TestCase):
         c['a'] += 1         # increment an existing value
         c['b'] -= 2         # sub existing value to zero
         del c['c']          # remove an entry
+        del c['c']          # make sure that del doesn't raise KeyError
         c['d'] -= 2         # sub from a missing value
         c['e'] = -5         # directly assign a missing value
         c['f'] += 4         # add to a missing value
@@ -403,7 +408,8 @@ class TestCounter(unittest.TestCase):
         self.assertEqual(repr(c), 'Counter()')
         self.assertRaises(NotImplementedError, Counter.fromkeys, 'abc')
         self.assertRaises(TypeError, hash, c)
-        c.update(dict(a=5, b=3, c=1))
+        c.update(dict(a=5, b=3))
+        c.update(c=1)
         c.update(Counter('a' * 50 + 'b' * 30))
         c.update()          # test case with no args
         c.__init__('a' * 500 + 'b' * 300)
@@ -447,7 +453,43 @@ class TestCounter(unittest.TestCase):
         self.assertEqual(dict(Counter(s)), dict(Counter(s).items()))
         self.assertEqual(set(Counter(s)), set(s))
 
-
+    def test_multiset_operations(self):
+        # Verify that adding a zero counter will strip zeros and negatives
+        c = Counter(a=10, b=-2, c=0) + Counter()
+        self.assertEqual(dict(c), dict(a=10))
+
+        elements = 'abcd'
+        for i in range(1000):
+            # test random pairs of multisets
+            p = Counter(dict((elem, randrange(-2,4)) for elem in elements))
+            q = Counter(dict((elem, randrange(-2,4)) for elem in elements))
+            for counterop, numberop, defneg in [
+                (Counter.__add__, lambda x, y: x+y if x+y>0 else 0, True),
+                (Counter.__sub__, lambda x, y: x-y if x-y>0 else 0, False),
+                (Counter.__or__, max, False),
+                (Counter.__and__, min, False),
+            ]:
+                result = counterop(p, q)
+                for x in elements:
+                    # all except __add__ are undefined for negative inputs
+                    if defneg or (p[x] >= 0 and q[x] >= 0):
+                        self.assertEqual(numberop(p[x], q[x]), result[x])
+                # verify that results exclude non-positive counts
+                self.assert_(x>0 for x in result.values())
+
+        elements = 'abcdef'
+        for i in range(100):
+            # verify that random multisets with no repeats are exactly like sets
+            p = Counter(dict((elem, randrange(0, 2)) for elem in elements))
+            q = Counter(dict((elem, randrange(0, 2)) for elem in elements))
+            for counterop, setop in [
+                (Counter.__sub__, set.__sub__),
+                (Counter.__or__, set.__or__),
+                (Counter.__and__, set.__and__),
+            ]:
+                counter_result = counterop(p, q)
+                set_result = setop(set(p.elements()), set(q.elements()))
+                self.assertEqual(counter_result, dict.fromkeys(set_result, 1))
 
 import doctest, collections
author	Raymond Hettinger <python@rcn.com>	2009-01-20 03:41:22 (GMT)
committer	Raymond Hettinger <python@rcn.com>	2009-01-20 03:41:22 (GMT)
commit	4d2073a0731f14be65e6a033685923bb1effe33d (patch)
tree	0ef0d6f7a036fbf7f8f3c6c53bcad1f43c1e5b6c
parent	afccd63ac9541630953cd4e59a421696d3869311 (diff)
download	cpython-4d2073a0731f14be65e6a033685923bb1effe33d.zip cpython-4d2073a0731f14be65e6a033685923bb1effe33d.tar.gz cpython-4d2073a0731f14be65e6a033685923bb1effe33d.tar.bz2