diff options
author | Raymond Hettinger <python@rcn.com> | 2009-01-20 01:19:26 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2009-01-20 01:19:26 (GMT) |
commit | bad1eb2ff3815909978fd71520980f67fd6501a3 (patch) | |
tree | 57ef913765c930112557be3e2d8cb4a91b4dde07 /Lib/collections.py | |
parent | e8b619c152b67a1e0926bc50141d92a601dccd46 (diff) | |
download | cpython-bad1eb2ff3815909978fd71520980f67fd6501a3.zip cpython-bad1eb2ff3815909978fd71520980f67fd6501a3.tar.gz cpython-bad1eb2ff3815909978fd71520980f67fd6501a3.tar.bz2 |
Build-outs for Counter() class:
* Constructor and update() support keyword args (like their dict counterparts).
* The 'del' statement no longer raises KeyError for missing values.
* Add multiset operations: __add__, __sub__, __and__, __or__.
Diffstat (limited to 'Lib/collections.py')
-rw-r--r-- | Lib/collections.py | 99 |
1 files changed, 91 insertions, 8 deletions
diff --git a/Lib/collections.py b/Lib/collections.py index e5656b1..fe33a12 100644 --- a/Lib/collections.py +++ b/Lib/collections.py @@ -10,7 +10,7 @@ from operator import itemgetter as _itemgetter from keyword import iskeyword as _iskeyword import sys as _sys import heapq as _heapq -from itertools import repeat as _repeat, chain as _chain, starmap as _starmap +from itertools import repeat as _repeat, chain as _chain, starmap as _starmap, ifilter as _ifilter ######################################################################## ### namedtuple ####################################################### @@ -167,7 +167,7 @@ class Counter(dict): # http://code.activestate.com/recipes/259174/ # Knuth, TAOCP Vol. II section 4.6.3 - def __init__(self, iterable=None): + def __init__(self, iterable=None, **kwds): '''Create a new, empty Counter object. And if given, count elements from an input iterable. Or, initialize the count from another mapping of elements to their counts. @@ -175,9 +175,10 @@ class Counter(dict): >>> c = Counter() # a new, empty counter >>> c = Counter('gallahad') # a new counter from an iterable >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping + >>> c = Counter(a=4, b=2) # a new counter from keyword args ''' - self.update(iterable) + self.update(iterable, **kwds) def __missing__(self, key): 'The count of elements not in the Counter is zero.' @@ -228,7 +229,7 @@ class Counter(dict): raise NotImplementedError( 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') - def update(self, iterable=None): + def update(self, iterable=None, **kwds): '''Like dict.update() but add counts instead of replacing them. Source can be an iterable, a dictionary, or another Counter instance. @@ -245,10 +246,8 @@ class Counter(dict): # replace behavior results in the some of original untouched counts # being mixed-in with all of the other counts for a mismash that # doesn't have a straight-forward interpretation in most counting - # contexts. Instead, we look to Knuth for suggested operations on - # multisets and implement the union-add operation discussed in - # TAOCP Volume II section 4.6.3 exercise 19. The Wikipedia entry for - # multisets calls that operation a sum or join. + # contexts. Instead, we implement straight-addition. Both the inputs + # and outputs are allowed to contain zero and negative counts. if iterable is not None: if isinstance(iterable, Mapping): @@ -257,17 +256,101 @@ class Counter(dict): else: for elem in iterable: self[elem] += 1 + if kwds: + self.update(kwds) def copy(self): 'Like dict.copy() but returns a Counter instance instead of a dict.' return Counter(self) + def __delitem__(self, elem): + 'Like dict.__delitem__() but does not raise KeyError for missing values.' + if elem in self: + dict.__delitem__(self, elem) + def __repr__(self): if not self: return '%s()' % self.__class__.__name__ items = ', '.join(map('%r: %r'.__mod__, self.most_common())) return '%s({%s})' % (self.__class__.__name__, items) + # Multiset-style mathematical operations discussed in: + # Knuth TAOCP Volume II section 4.6.3 exercise 19 + # and at http://en.wikipedia.org/wiki/Multiset + # + # Results are undefined when inputs contain negative counts. + # Outputs guaranteed to only include positive counts. + # + # To strip negative and zero counts, add-in an empty counter: + # c += Counter() + + def __add__(self, other): + '''Add counts from two counters. + + >>> Counter('abbb') + Counter('bcc') + Counter({'b': 4, 'c': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem in set(self) | set(other): + newcount = self[elem] + other[elem] + if newcount > 0: + result[elem] = newcount + return result + + def __sub__(self, other): + ''' Subtract count, but keep only results with positive counts. + + >>> Counter('abbbc') - Counter('bccd') + Counter({'b': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem, count in self.iteritems(): + newcount = count - other[elem] + if newcount > 0: + result[elem] = newcount + return result + + def __or__(self, other): + '''Union is the maximum of value in either of the input counters. + + >>> Counter('abbb') | Counter('bcc') + Counter({'b': 3, 'c': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + _max = max + result = Counter() + for elem in set(self) | set(other): + newcount = _max(self[elem], other[elem]) + if newcount > 0: + result[elem] = newcount + return result + + def __and__(self, other): + ''' Intersection is the minimum of corresponding counts. + + >>> Counter('abbb') & Counter('bcc') + Counter({'b': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + _min = min + result = Counter() + if len(self) < len(other): + self, other = other, self + for elem in _ifilter(self.__contains__, other): + newcount = _min(self[elem], other[elem]) + if newcount > 0: + result[elem] = newcount + return result if __name__ == '__main__': |