From 35db43955cf231a4040d32c77b1ff5a4b639039f Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 30 May 2014 02:28:36 -0700 Subject: Issue #13742: Add key and reverse parameters to heapq.merge() --- Doc/glossary.rst | 7 +++-- Doc/library/heapq.rst | 14 +++++++++- Lib/heapq.py | 74 ++++++++++++++++++++++++++++++++++++++++++-------- Lib/test/test_heapq.py | 19 ++++++++++--- Misc/NEWS | 3 ++ 5 files changed, 98 insertions(+), 19 deletions(-) diff --git a/Doc/glossary.rst b/Doc/glossary.rst index f71a1f7..08ee9db 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -444,12 +444,13 @@ Glossary A number of tools in Python accept key functions to control how elements are ordered or grouped. They include :func:`min`, :func:`max`, - :func:`sorted`, :meth:`list.sort`, :func:`heapq.nsmallest`, - :func:`heapq.nlargest`, and :func:`itertools.groupby`. + :func:`sorted`, :meth:`list.sort`, :func:`heapq.merge`, + :func:`heapq.nsmallest`, :func:`heapq.nlargest`, and + :func:`itertools.groupby`. There are several ways to create a key function. For example. the :meth:`str.lower` method can serve as a key function for case insensitive - sorts. Alternatively, an ad-hoc key function can be built from a + sorts. Alternatively, a key function can be built from a :keyword:`lambda` expression such as ``lambda r: (r[0], r[2])``. Also, the :mod:`operator` module provides three key function constructors: :func:`~operator.attrgetter`, :func:`~operator.itemgetter`, and diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index 4f1a682..2a41434 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -81,7 +81,7 @@ The following functions are provided: The module also offers three general purpose functions based on heaps. -.. function:: merge(*iterables) +.. function:: merge(*iterables, key=None, reverse=False) Merge multiple sorted inputs into a single sorted output (for example, merge timestamped entries from multiple log files). Returns an :term:`iterator` @@ -91,6 +91,18 @@ The module also offers three general purpose functions based on heaps. not pull the data into memory all at once, and assumes that each of the input streams is already sorted (smallest to largest). + Has two optional arguments which must be specified as keyword arguments. + + *key* specifies a :term:`key function` of one argument that is used to + extract a comparison key from each input element. The default value is + ``None`` (compare the elements directly). + + *reverse* is a boolean value. If set to ``True``, then the input elements + are merged as if each comparison were reversed. + + .. versionchanged:: 3.5 + Added the optional *key* and *reverse* parameters. + .. function:: nlargest(n, iterable, key=None) diff --git a/Lib/heapq.py b/Lib/heapq.py index ae7ac96..79b46fe 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -176,6 +176,16 @@ def heapify(x): for i in reversed(range(n//2)): _siftup(x, i) +def _heappop_max(heap): + """Maxheap version of a heappop.""" + lastelt = heap.pop() # raises appropriate IndexError if heap is empty + if heap: + returnitem = heap[0] + heap[0] = lastelt + _siftup_max(heap, 0) + return returnitem + return lastelt + def _heapreplace_max(heap, item): """Maxheap version of a heappop followed by a heappush.""" returnitem = heap[0] # raises appropriate IndexError if heap is empty @@ -311,7 +321,7 @@ try: except ImportError: pass -def merge(*iterables): +def merge(*iterables, key=None, reverse=False): '''Merge multiple sorted inputs into a single sorted output. Similar to sorted(itertools.chain(*iterables)) but returns a generator, @@ -321,31 +331,73 @@ def merge(*iterables): >>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25])) [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25] + If *key* is not None, applies a key function to each element to determine + its sort order. + + >>> list(merge(['dog', 'horse'], ['cat', 'fish', 'kangaroo'], key=len)) + ['dog', 'cat', 'fish', 'horse', 'kangaroo'] + ''' h = [] h_append = h.append + + if reverse: + _heapify = _heapify_max + _heappop = _heappop_max + _heapreplace = _heapreplace_max + direction = -1 + else: + _heapify = heapify + _heappop = heappop + _heapreplace = heapreplace + direction = 1 + + if key is None: + for order, it in enumerate(map(iter, iterables)): + try: + next = it.__next__ + h_append([next(), order * direction, next]) + except StopIteration: + pass + _heapify(h) + while len(h) > 1: + try: + while True: + value, order, next = s = h[0] + yield value + s[0] = next() # raises StopIteration when exhausted + _heapreplace(h, s) # restore heap condition + except StopIteration: + _heappop(h) # remove empty iterator + if h: + # fast case when only a single iterator remains + value, order, next = h[0] + yield value + yield from next.__self__ + return + for order, it in enumerate(map(iter, iterables)): try: next = it.__next__ - h_append([next(), order, next]) + value = next() + h_append([key(value), order * direction, value, next]) except StopIteration: pass - heapify(h) - - _heapreplace = heapreplace + _heapify(h) while len(h) > 1: try: while True: - value, order, next = s = h[0] + key_value, order, value, next = s = h[0] yield value - s[0] = next() # raises StopIteration when exhausted - _heapreplace(h, s) # restore heap condition + value = next() + s[0] = key(value) + s[2] = value + _heapreplace(h, s) except StopIteration: - heappop(h) # remove empty iterator + _heappop(h) if h: - # fast case when only a single iterator remains - value, order, next = h[0] + key_value, order, value, next = h[0] yield value yield from next.__self__ diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 59c7029..685797a 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -6,6 +6,7 @@ import unittest from test import support from unittest import TestCase, skipUnless +from operator import itemgetter py_heapq = support.import_fresh_module('heapq', blocked=['_heapq']) c_heapq = support.import_fresh_module('heapq', fresh=['_heapq']) @@ -152,11 +153,21 @@ class TestHeap: def test_merge(self): inputs = [] - for i in range(random.randrange(5)): - row = sorted(random.randrange(1000) for j in range(random.randrange(10))) + for i in range(random.randrange(25)): + row = [] + for j in range(random.randrange(100)): + tup = random.choice('ABC'), random.randrange(-500, 500) + row.append(tup) inputs.append(row) - self.assertEqual(sorted(chain(*inputs)), list(self.module.merge(*inputs))) - self.assertEqual(list(self.module.merge()), []) + + for key in [None, itemgetter(0), itemgetter(1), itemgetter(1, 0)]: + for reverse in [False, True]: + seqs = [] + for seq in inputs: + seqs.append(sorted(seq, key=key, reverse=reverse)) + self.assertEqual(sorted(chain(*inputs), key=key, reverse=reverse), + list(self.module.merge(*seqs, key=key, reverse=reverse))) + self.assertEqual(list(self.module.merge()), []) def test_merge_does_not_suppress_index_error(self): # Issue 19018: Heapq.merge suppresses IndexError from user generator diff --git a/Misc/NEWS b/Misc/NEWS index 7afb332..9047b2b 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -94,6 +94,9 @@ Library error bubble up as this "bad data" appears in many real world zip files in the wild and is ignored by other zip tools. +- Issue #13742: Added "key" and "reverse" parameters to heapq.merge(). + (First draft of patch contributed by Simon Sapin.) + - Issue #21402: tkinter.ttk now works when default root window is not set. - Issue #3015: _tkinter.create() now creates tkapp object with wantobject=1 by -- cgit v0.12