diff options
author | Raymond Hettinger <python@rcn.com> | 2014-05-30 09:28:36 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2014-05-30 09:28:36 (GMT) |
commit | 35db43955cf231a4040d32c77b1ff5a4b639039f (patch) | |
tree | 670f590844d11613b16ab61b018caaeb93e9bab1 | |
parent | e7bfe13635e4201660c9d016b62de10c2f7c9de3 (diff) | |
download | cpython-35db43955cf231a4040d32c77b1ff5a4b639039f.zip cpython-35db43955cf231a4040d32c77b1ff5a4b639039f.tar.gz cpython-35db43955cf231a4040d32c77b1ff5a4b639039f.tar.bz2 |
Issue #13742: Add key and reverse parameters to heapq.merge()
-rw-r--r-- | Doc/glossary.rst | 7 | ||||
-rw-r--r-- | Doc/library/heapq.rst | 14 | ||||
-rw-r--r-- | Lib/heapq.py | 74 | ||||
-rw-r--r-- | Lib/test/test_heapq.py | 19 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
5 files changed, 98 insertions, 19 deletions
diff --git a/Doc/glossary.rst b/Doc/glossary.rst index f71a1f7..08ee9db 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -444,12 +444,13 @@ Glossary A number of tools in Python accept key functions to control how elements are ordered or grouped. They include :func:`min`, :func:`max`, - :func:`sorted`, :meth:`list.sort`, :func:`heapq.nsmallest`, - :func:`heapq.nlargest`, and :func:`itertools.groupby`. + :func:`sorted`, :meth:`list.sort`, :func:`heapq.merge`, + :func:`heapq.nsmallest`, :func:`heapq.nlargest`, and + :func:`itertools.groupby`. There are several ways to create a key function. For example. the :meth:`str.lower` method can serve as a key function for case insensitive - sorts. Alternatively, an ad-hoc key function can be built from a + sorts. Alternatively, a key function can be built from a :keyword:`lambda` expression such as ``lambda r: (r[0], r[2])``. Also, the :mod:`operator` module provides three key function constructors: :func:`~operator.attrgetter`, :func:`~operator.itemgetter`, and diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index 4f1a682..2a41434 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -81,7 +81,7 @@ The following functions are provided: The module also offers three general purpose functions based on heaps. -.. function:: merge(*iterables) +.. function:: merge(*iterables, key=None, reverse=False) Merge multiple sorted inputs into a single sorted output (for example, merge timestamped entries from multiple log files). Returns an :term:`iterator` @@ -91,6 +91,18 @@ The module also offers three general purpose functions based on heaps. not pull the data into memory all at once, and assumes that each of the input streams is already sorted (smallest to largest). + Has two optional arguments which must be specified as keyword arguments. + + *key* specifies a :term:`key function` of one argument that is used to + extract a comparison key from each input element. The default value is + ``None`` (compare the elements directly). + + *reverse* is a boolean value. If set to ``True``, then the input elements + are merged as if each comparison were reversed. + + .. versionchanged:: 3.5 + Added the optional *key* and *reverse* parameters. + .. function:: nlargest(n, iterable, key=None) diff --git a/Lib/heapq.py b/Lib/heapq.py index ae7ac96..79b46fe 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -176,6 +176,16 @@ def heapify(x): for i in reversed(range(n//2)): _siftup(x, i) +def _heappop_max(heap): + """Maxheap version of a heappop.""" + lastelt = heap.pop() # raises appropriate IndexError if heap is empty + if heap: + returnitem = heap[0] + heap[0] = lastelt + _siftup_max(heap, 0) + return returnitem + return lastelt + def _heapreplace_max(heap, item): """Maxheap version of a heappop followed by a heappush.""" returnitem = heap[0] # raises appropriate IndexError if heap is empty @@ -311,7 +321,7 @@ try: except ImportError: pass -def merge(*iterables): +def merge(*iterables, key=None, reverse=False): '''Merge multiple sorted inputs into a single sorted output. Similar to sorted(itertools.chain(*iterables)) but returns a generator, @@ -321,31 +331,73 @@ def merge(*iterables): >>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25])) [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25] + If *key* is not None, applies a key function to each element to determine + its sort order. + + >>> list(merge(['dog', 'horse'], ['cat', 'fish', 'kangaroo'], key=len)) + ['dog', 'cat', 'fish', 'horse', 'kangaroo'] + ''' h = [] h_append = h.append + + if reverse: + _heapify = _heapify_max + _heappop = _heappop_max + _heapreplace = _heapreplace_max + direction = -1 + else: + _heapify = heapify + _heappop = heappop + _heapreplace = heapreplace + direction = 1 + + if key is None: + for order, it in enumerate(map(iter, iterables)): + try: + next = it.__next__ + h_append([next(), order * direction, next]) + except StopIteration: + pass + _heapify(h) + while len(h) > 1: + try: + while True: + value, order, next = s = h[0] + yield value + s[0] = next() # raises StopIteration when exhausted + _heapreplace(h, s) # restore heap condition + except StopIteration: + _heappop(h) # remove empty iterator + if h: + # fast case when only a single iterator remains + value, order, next = h[0] + yield value + yield from next.__self__ + return + for order, it in enumerate(map(iter, iterables)): try: next = it.__next__ - h_append([next(), order, next]) + value = next() + h_append([key(value), order * direction, value, next]) except StopIteration: pass - heapify(h) - - _heapreplace = heapreplace + _heapify(h) while len(h) > 1: try: while True: - value, order, next = s = h[0] + key_value, order, value, next = s = h[0] yield value - s[0] = next() # raises StopIteration when exhausted - _heapreplace(h, s) # restore heap condition + value = next() + s[0] = key(value) + s[2] = value + _heapreplace(h, s) except StopIteration: - heappop(h) # remove empty iterator + _heappop(h) if h: - # fast case when only a single iterator remains - value, order, next = h[0] + key_value, order, value, next = h[0] yield value yield from next.__self__ diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 59c7029..685797a 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -6,6 +6,7 @@ import unittest from test import support from unittest import TestCase, skipUnless +from operator import itemgetter py_heapq = support.import_fresh_module('heapq', blocked=['_heapq']) c_heapq = support.import_fresh_module('heapq', fresh=['_heapq']) @@ -152,11 +153,21 @@ class TestHeap: def test_merge(self): inputs = [] - for i in range(random.randrange(5)): - row = sorted(random.randrange(1000) for j in range(random.randrange(10))) + for i in range(random.randrange(25)): + row = [] + for j in range(random.randrange(100)): + tup = random.choice('ABC'), random.randrange(-500, 500) + row.append(tup) inputs.append(row) - self.assertEqual(sorted(chain(*inputs)), list(self.module.merge(*inputs))) - self.assertEqual(list(self.module.merge()), []) + + for key in [None, itemgetter(0), itemgetter(1), itemgetter(1, 0)]: + for reverse in [False, True]: + seqs = [] + for seq in inputs: + seqs.append(sorted(seq, key=key, reverse=reverse)) + self.assertEqual(sorted(chain(*inputs), key=key, reverse=reverse), + list(self.module.merge(*seqs, key=key, reverse=reverse))) + self.assertEqual(list(self.module.merge()), []) def test_merge_does_not_suppress_index_error(self): # Issue 19018: Heapq.merge suppresses IndexError from user generator @@ -94,6 +94,9 @@ Library error bubble up as this "bad data" appears in many real world zip files in the wild and is ignored by other zip tools. +- Issue #13742: Added "key" and "reverse" parameters to heapq.merge(). + (First draft of patch contributed by Simon Sapin.) + - Issue #21402: tkinter.ttk now works when default root window is not set. - Issue #3015: _tkinter.create() now creates tkapp object with wantobject=1 by |