summaryrefslogtreecommitdiffstats
path: root/Lib/collections.py
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2009-01-14 02:20:07 (GMT)
committerRaymond Hettinger <python@rcn.com>2009-01-14 02:20:07 (GMT)
commitb8baf6360f7e93f5caa2a20292a868f890056215 (patch)
tree02aa4cb923d36aa3b72e5d9657c1bd509e1c8b4e /Lib/collections.py
parent3bc0f7ebeed6b165c2694845256cf0f62e8fddfe (diff)
downloadcpython-b8baf6360f7e93f5caa2a20292a868f890056215.zip
cpython-b8baf6360f7e93f5caa2a20292a868f890056215.tar.gz
cpython-b8baf6360f7e93f5caa2a20292a868f890056215.tar.bz2
Issue #1696199: Add collections.Counter().
Forward port from Py2.7.
Diffstat (limited to 'Lib/collections.py')
-rw-r--r--Lib/collections.py159
1 files changed, 159 insertions, 0 deletions
diff --git a/Lib/collections.py b/Lib/collections.py
index c3faa9a..6c1abce 100644
--- a/Lib/collections.py
+++ b/Lib/collections.py
@@ -10,6 +10,9 @@ from _collections import deque, defaultdict
from operator import itemgetter as _itemgetter
from keyword import iskeyword as _iskeyword
import sys as _sys
+import heapq as _heapq
+from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
+
################################################################################
### namedtuple
@@ -113,6 +116,162 @@ def namedtuple(typename, field_names, verbose=False):
return result
+########################################################################
+### Counter
+########################################################################
+
+class Counter(dict):
+ '''Dict subclass for counting hashable items. Sometimes called a bag
+ or multiset. Elements are stored as dictionary keys and their counts
+ are stored as dictionary values.
+
+ >>> c = Counter('abracadabra') # count elements from a string
+
+ >>> c.most_common(3) # three most common elements
+ [('a', 5), ('r', 2), ('b', 2)]
+ >>> sorted(c) # list all unique elements
+ ['a', 'b', 'c', 'd', 'r']
+ >>> ''.join(sorted(c.elements())) # list elements with repetitions
+ 'aaaaabbcdrr'
+ >>> sum(c.values()) # total of all counts
+ 11
+
+ >>> c['a'] # count of letter 'a'
+ 5
+ >>> for elem in 'shazam': # update counts from an iterable
+ ... c[elem] += 1 # by adding 1 to each element's count
+ >>> c['a'] # now there are seven 'a'
+ 7
+ >>> del c['r'] # remove all 'r'
+ >>> c['r'] # now there are zero 'r'
+ 0
+
+ >>> d = Counter('simsalabim') # make another counter
+ >>> c.update(d) # add in the second counter
+ >>> c['a'] # now there are nine 'a'
+ 9
+
+ >>> c.clear() # empty the counter
+ >>> c
+ Counter()
+
+ Note: If a count is set to zero or reduced to zero, it will remain
+ in the counter until the entry is deleted or the counter is cleared:
+
+ >>> c = Counter('aaabbc')
+ >>> c['b'] -= 2 # reduce the count of 'b' by two
+ >>> c.most_common() # 'b' is still in, but its count is zero
+ [('a', 3), ('c', 1), ('b', 0)]
+
+ '''
+ # References:
+ # http://en.wikipedia.org/wiki/Multiset
+ # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
+ # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
+ # http://code.activestate.com/recipes/259174/
+ # Knuth, TAOCP Vol. II section 4.6.3
+
+ def __init__(self, iterable=None):
+ '''Create a new, empty Counter object. And if given, count elements
+ from an input iterable. Or, initialize the count from another mapping
+ of elements to their counts.
+
+ >>> c = Counter() # a new, empty counter
+ >>> c = Counter('gallahad') # a new counter from an iterable
+ >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
+
+ '''
+ self.update(iterable)
+
+ def __missing__(self, key):
+ 'The count of elements not in the Counter is zero.'
+ # Needed so that self[missing_item] does not raise KeyError
+ return 0
+
+ def most_common(self, n=None):
+ '''List the n most common elements and their counts from the most
+ common to the least. If n is None, then list all element counts.
+
+ >>> Counter('abracadabra').most_common(3)
+ [('a', 5), ('r', 2), ('b', 2)]
+
+ '''
+ # Emulate Bag.sortedByCount from Smalltalk
+ if n is None:
+ return sorted(self.items(), key=_itemgetter(1), reverse=True)
+ return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
+
+ def elements(self):
+ '''Iterator over elements repeating each as many times as its count.
+
+ >>> c = Counter('ABCABC')
+ >>> sorted(c.elements())
+ ['A', 'A', 'B', 'B', 'C', 'C']
+
+ # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
+ >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
+ >>> product = 1
+ >>> for factor in prime_factors.elements(): # loop over factors
+ ... product *= factor # and multiply them
+ >>> product
+ 1836
+
+ Note, if an element's count has been set to zero or is a negative
+ number, elements() will ignore it.
+
+ '''
+ # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
+ return _chain.from_iterable(_starmap(_repeat, self.items()))
+
+ # Override dict methods where necessary
+
+ @classmethod
+ def fromkeys(cls, iterable, v=None):
+ # There is no equivalent method for counters because setting v=1
+ # means that no element can have a count greater than one.
+ raise NotImplementedError(
+ 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
+
+ def update(self, iterable=None):
+ '''Like dict.update() but add counts instead of replacing them.
+
+ Source can be an iterable, a dictionary, or another Counter instance.
+
+ >>> c = Counter('which')
+ >>> c.update('witch') # add elements from another iterable
+ >>> d = Counter('watch')
+ >>> c.update(d) # add elements from another counter
+ >>> c['h'] # four 'h' in which, witch, and watch
+ 4
+
+ '''
+ # The regular dict.update() operation makes no sense here because the
+ # replace behavior results in the some of original untouched counts
+ # being mixed-in with all of the other counts for a mismash that
+ # doesn't have a straight-forward interpretation in most counting
+ # contexts. Instead, we look to Knuth for suggested operations on
+ # multisets and implement the union-add operation discussed in
+ # TAOCP Volume II section 4.6.3 exercise 19. The Wikipedia entry for
+ # multisets calls that operation a sum or join.
+
+ if iterable is not None:
+ if isinstance(iterable, Mapping):
+ for elem, count in iterable.items():
+ self[elem] += count
+ else:
+ for elem in iterable:
+ self[elem] += 1
+
+ def copy(self):
+ 'Like dict.copy() but returns a Counter instance instead of a dict.'
+ return Counter(self)
+
+ def __repr__(self):
+ if not self:
+ return '%s()' % self.__class__.__name__
+ items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
+ return '%s({%s})' % (self.__class__.__name__, items)
+
################################################################################
### UserDict