diff options
author | Raymond Hettinger <python@rcn.com> | 2007-02-19 04:08:43 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2007-02-19 04:08:43 (GMT) |
commit | 00166c5532fc7562c07383a0ae2985b3ffaf253a (patch) | |
tree | 6f9c77ecb9639ad781c83d22d63e8014a71d5a76 /Lib | |
parent | d6fc72a5ae27048dae56c773896ccbd6152d9b9b (diff) | |
download | cpython-00166c5532fc7562c07383a0ae2985b3ffaf253a.zip cpython-00166c5532fc7562c07383a0ae2985b3ffaf253a.tar.gz cpython-00166c5532fc7562c07383a0ae2985b3ffaf253a.tar.bz2 |
Add merge() function to heapq.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/heapq.py | 42 | ||||
-rw-r--r-- | Lib/test/test_heapq.py | 10 |
2 files changed, 49 insertions, 3 deletions
diff --git a/Lib/heapq.py b/Lib/heapq.py index 753c3b7..b56d0f9 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -126,8 +126,8 @@ Believe me, real good tape sorts were quite spectacular to watch! From all times, sorting has always been a Great Art! :-) """ -__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'nlargest', - 'nsmallest'] +__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge', + 'nlargest', 'nsmallest'] from itertools import islice, repeat, count, imap, izip, tee from operator import itemgetter, neg @@ -308,6 +308,41 @@ try: except ImportError: pass +def merge(*iterables): + '''Merge multiple sorted inputs into a single sorted output. + + Similar to sorted(itertools.chain(*iterables)) but returns an iterable, + does not pull the data into memory all at once, and reduces the number + of comparisons by assuming that each of the input streams is already sorted. + + >>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25])) + [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25] + + ''' + _heappop, siftup, _StopIteration = heappop, _siftup, StopIteration + + h = [] + h_append = h.append + for it in map(iter, iterables): + try: + next = it.next + h_append([next(), next]) + except _StopIteration: + pass + heapify(h) + + while 1: + try: + while 1: + v, next = s = h[0] # raises IndexError when h is empty + yield v + s[0] = next() # raises StopIteration when exhausted + siftup(h, 0) # restore heap condition + except _StopIteration: + _heappop(h) # remove empty iterator + except IndexError: + return + # Extend the implementations of nsmallest and nlargest to use a key= argument _nsmallest = nsmallest def nsmallest(n, iterable, key=None): @@ -341,3 +376,6 @@ if __name__ == "__main__": while heap: sort.append(heappop(heap)) print sort + + import doctest + doctest.testmod() diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index e9f2798..b7c3ab2 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -1,6 +1,6 @@ """Unittests for heapq.""" -from heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest +from heapq import heappush, heappop, heapify, heapreplace, merge, nlargest, nsmallest import random import unittest from test import test_support @@ -103,6 +103,14 @@ class TestHeap(unittest.TestCase): heap_sorted = [heappop(heap) for i in range(size)] self.assertEqual(heap_sorted, sorted(data)) + def test_merge(self): + inputs = [] + for i in xrange(random.randrange(5)): + row = sorted(random.randrange(1000) for j in range(random.randrange(10))) + inputs.append(row) + self.assertEqual(sorted(chain(*inputs)), list(merge(*inputs))) + self.assertEqual(list(merge()), []) + def test_nsmallest(self): data = [(random.randrange(2000), i) for i in range(1000)] for f in (None, lambda x: x[0] * 547 % 2000): |