diff options
author | Raymond Hettinger <python@rcn.com> | 2009-01-20 03:41:22 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2009-01-20 03:41:22 (GMT) |
commit | 4d2073a0731f14be65e6a033685923bb1effe33d (patch) | |
tree | 0ef0d6f7a036fbf7f8f3c6c53bcad1f43c1e5b6c /Lib | |
parent | afccd63ac9541630953cd4e59a421696d3869311 (diff) | |
download | cpython-4d2073a0731f14be65e6a033685923bb1effe33d.zip cpython-4d2073a0731f14be65e6a033685923bb1effe33d.tar.gz cpython-4d2073a0731f14be65e6a033685923bb1effe33d.tar.bz2 |
Forward port r68792 and r68789 putting Counter in __all__ and adding Counter buildouts.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/collections.py | 100 | ||||
-rw-r--r-- | Lib/test/test_collections.py | 46 |
2 files changed, 136 insertions, 10 deletions
diff --git a/Lib/collections.py b/Lib/collections.py index 6c1abce..45558f9 100644 --- a/Lib/collections.py +++ b/Lib/collections.py @@ -1,5 +1,5 @@ __all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList', - 'UserString'] + 'UserString', 'Counter'] # For bootstrapping reasons, the collection ABCs are defined in _abcoll.py. # They should however be considered an integral part of collections.py. from _abcoll import * @@ -171,7 +171,7 @@ class Counter(dict): # http://code.activestate.com/recipes/259174/ # Knuth, TAOCP Vol. II section 4.6.3 - def __init__(self, iterable=None): + def __init__(self, iterable=None, **kwds): '''Create a new, empty Counter object. And if given, count elements from an input iterable. Or, initialize the count from another mapping of elements to their counts. @@ -179,9 +179,10 @@ class Counter(dict): >>> c = Counter() # a new, empty counter >>> c = Counter('gallahad') # a new counter from an iterable >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping + >>> c = Counter(a=4, b=2) # a new counter from keyword args ''' - self.update(iterable) + self.update(iterable, **kwds) def __missing__(self, key): 'The count of elements not in the Counter is zero.' @@ -232,7 +233,7 @@ class Counter(dict): raise NotImplementedError( 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') - def update(self, iterable=None): + def update(self, iterable=None, **kwds): '''Like dict.update() but add counts instead of replacing them. Source can be an iterable, a dictionary, or another Counter instance. @@ -249,10 +250,8 @@ class Counter(dict): # replace behavior results in the some of original untouched counts # being mixed-in with all of the other counts for a mismash that # doesn't have a straight-forward interpretation in most counting - # contexts. Instead, we look to Knuth for suggested operations on - # multisets and implement the union-add operation discussed in - # TAOCP Volume II section 4.6.3 exercise 19. The Wikipedia entry for - # multisets calls that operation a sum or join. + # contexts. Instead, we implement straight-addition. Both the inputs + # and outputs are allowed to contain zero and negative counts. if iterable is not None: if isinstance(iterable, Mapping): @@ -261,17 +260,102 @@ class Counter(dict): else: for elem in iterable: self[elem] += 1 + if kwds: + self.update(kwds) def copy(self): 'Like dict.copy() but returns a Counter instance instead of a dict.' return Counter(self) + def __delitem__(self, elem): + 'Like dict.__delitem__() but does not raise KeyError for missing values.' + if elem in self: + dict.__delitem__(self, elem) + def __repr__(self): if not self: return '%s()' % self.__class__.__name__ items = ', '.join(map('%r: %r'.__mod__, self.most_common())) return '%s({%s})' % (self.__class__.__name__, items) + # Multiset-style mathematical operations discussed in: + # Knuth TAOCP Volume II section 4.6.3 exercise 19 + # and at http://en.wikipedia.org/wiki/Multiset + # + # Results are undefined when inputs contain negative counts. + # Outputs guaranteed to only include positive counts. + # + # To strip negative and zero counts, add-in an empty counter: + # c += Counter() + + def __add__(self, other): + '''Add counts from two counters. + + >>> Counter('abbb') + Counter('bcc') + Counter({'b': 4, 'c': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem in set(self) | set(other): + newcount = self[elem] + other[elem] + if newcount > 0: + result[elem] = newcount + return result + + def __sub__(self, other): + ''' Subtract count, but keep only results with positive counts. + + >>> Counter('abbbc') - Counter('bccd') + Counter({'b': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem, count in self.items(): + newcount = count - other[elem] + if newcount > 0: + result[elem] = newcount + return result + + def __or__(self, other): + '''Union is the maximum of value in either of the input counters. + + >>> Counter('abbb') | Counter('bcc') + Counter({'b': 3, 'c': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + _max = max + result = Counter() + for elem in set(self) | set(other): + newcount = _max(self[elem], other[elem]) + if newcount > 0: + result[elem] = newcount + return result + + def __and__(self, other): + ''' Intersection is the minimum of corresponding counts. + + >>> Counter('abbb') & Counter('bcc') + Counter({'b': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + _min = min + result = Counter() + if len(self) < len(other): + self, other = other, self + for elem in filter(self.__contains__, other): + newcount = _min(self[elem], other[elem]) + if newcount > 0: + result[elem] = newcount + return result + ################################################################################ ### UserDict diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py index 153059a..6630c4e 100644 --- a/Lib/test/test_collections.py +++ b/Lib/test/test_collections.py @@ -4,6 +4,8 @@ import unittest, doctest from test import support from collections import namedtuple, Counter, Mapping import pickle, copy +from random import randrange +import operator from collections import Hashable, Iterable, Iterator from collections import Sized, Container, Callable from collections import Set, MutableSet @@ -361,6 +363,8 @@ class TestCounter(unittest.TestCase): def test_basics(self): c = Counter('abcaba') + self.assertEqual(c, Counter({'a':3 , 'b': 2, 'c': 1})) + self.assertEqual(c, Counter(a=3, b=2, c=1)) self.assert_(isinstance(c, dict)) self.assert_(isinstance(c, Mapping)) self.assert_(issubclass(Counter, dict)) @@ -388,6 +392,7 @@ class TestCounter(unittest.TestCase): c['a'] += 1 # increment an existing value c['b'] -= 2 # sub existing value to zero del c['c'] # remove an entry + del c['c'] # make sure that del doesn't raise KeyError c['d'] -= 2 # sub from a missing value c['e'] = -5 # directly assign a missing value c['f'] += 4 # add to a missing value @@ -403,7 +408,8 @@ class TestCounter(unittest.TestCase): self.assertEqual(repr(c), 'Counter()') self.assertRaises(NotImplementedError, Counter.fromkeys, 'abc') self.assertRaises(TypeError, hash, c) - c.update(dict(a=5, b=3, c=1)) + c.update(dict(a=5, b=3)) + c.update(c=1) c.update(Counter('a' * 50 + 'b' * 30)) c.update() # test case with no args c.__init__('a' * 500 + 'b' * 300) @@ -447,7 +453,43 @@ class TestCounter(unittest.TestCase): self.assertEqual(dict(Counter(s)), dict(Counter(s).items())) self.assertEqual(set(Counter(s)), set(s)) - + def test_multiset_operations(self): + # Verify that adding a zero counter will strip zeros and negatives + c = Counter(a=10, b=-2, c=0) + Counter() + self.assertEqual(dict(c), dict(a=10)) + + elements = 'abcd' + for i in range(1000): + # test random pairs of multisets + p = Counter(dict((elem, randrange(-2,4)) for elem in elements)) + q = Counter(dict((elem, randrange(-2,4)) for elem in elements)) + for counterop, numberop, defneg in [ + (Counter.__add__, lambda x, y: x+y if x+y>0 else 0, True), + (Counter.__sub__, lambda x, y: x-y if x-y>0 else 0, False), + (Counter.__or__, max, False), + (Counter.__and__, min, False), + ]: + result = counterop(p, q) + for x in elements: + # all except __add__ are undefined for negative inputs + if defneg or (p[x] >= 0 and q[x] >= 0): + self.assertEqual(numberop(p[x], q[x]), result[x]) + # verify that results exclude non-positive counts + self.assert_(x>0 for x in result.values()) + + elements = 'abcdef' + for i in range(100): + # verify that random multisets with no repeats are exactly like sets + p = Counter(dict((elem, randrange(0, 2)) for elem in elements)) + q = Counter(dict((elem, randrange(0, 2)) for elem in elements)) + for counterop, setop in [ + (Counter.__sub__, set.__sub__), + (Counter.__or__, set.__or__), + (Counter.__and__, set.__and__), + ]: + counter_result = counterop(p, q) + set_result = setop(set(p.elements()), set(q.elements())) + self.assertEqual(counter_result, dict.fromkeys(set_result, 1)) import doctest, collections |