summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2007-02-19 04:08:43 (GMT)
committerRaymond Hettinger <python@rcn.com>2007-02-19 04:08:43 (GMT)
commit00166c5532fc7562c07383a0ae2985b3ffaf253a (patch)
tree6f9c77ecb9639ad781c83d22d63e8014a71d5a76 /Lib
parentd6fc72a5ae27048dae56c773896ccbd6152d9b9b (diff)
downloadcpython-00166c5532fc7562c07383a0ae2985b3ffaf253a.zip
cpython-00166c5532fc7562c07383a0ae2985b3ffaf253a.tar.gz
cpython-00166c5532fc7562c07383a0ae2985b3ffaf253a.tar.bz2
Add merge() function to heapq.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/heapq.py42
-rw-r--r--Lib/test/test_heapq.py10
2 files changed, 49 insertions, 3 deletions
diff --git a/Lib/heapq.py b/Lib/heapq.py
index 753c3b7..b56d0f9 100644
--- a/Lib/heapq.py
+++ b/Lib/heapq.py
@@ -126,8 +126,8 @@ Believe me, real good tape sorts were quite spectacular to watch!
From all times, sorting has always been a Great Art! :-)
"""
-__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'nlargest',
- 'nsmallest']
+__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
+ 'nlargest', 'nsmallest']
from itertools import islice, repeat, count, imap, izip, tee
from operator import itemgetter, neg
@@ -308,6 +308,41 @@ try:
except ImportError:
pass
+def merge(*iterables):
+ '''Merge multiple sorted inputs into a single sorted output.
+
+ Similar to sorted(itertools.chain(*iterables)) but returns an iterable,
+ does not pull the data into memory all at once, and reduces the number
+ of comparisons by assuming that each of the input streams is already sorted.
+
+ >>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
+ [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
+
+ '''
+ _heappop, siftup, _StopIteration = heappop, _siftup, StopIteration
+
+ h = []
+ h_append = h.append
+ for it in map(iter, iterables):
+ try:
+ next = it.next
+ h_append([next(), next])
+ except _StopIteration:
+ pass
+ heapify(h)
+
+ while 1:
+ try:
+ while 1:
+ v, next = s = h[0] # raises IndexError when h is empty
+ yield v
+ s[0] = next() # raises StopIteration when exhausted
+ siftup(h, 0) # restore heap condition
+ except _StopIteration:
+ _heappop(h) # remove empty iterator
+ except IndexError:
+ return
+
# Extend the implementations of nsmallest and nlargest to use a key= argument
_nsmallest = nsmallest
def nsmallest(n, iterable, key=None):
@@ -341,3 +376,6 @@ if __name__ == "__main__":
while heap:
sort.append(heappop(heap))
print sort
+
+ import doctest
+ doctest.testmod()
diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py
index e9f2798..b7c3ab2 100644
--- a/Lib/test/test_heapq.py
+++ b/Lib/test/test_heapq.py
@@ -1,6 +1,6 @@
"""Unittests for heapq."""
-from heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest
+from heapq import heappush, heappop, heapify, heapreplace, merge, nlargest, nsmallest
import random
import unittest
from test import test_support
@@ -103,6 +103,14 @@ class TestHeap(unittest.TestCase):
heap_sorted = [heappop(heap) for i in range(size)]
self.assertEqual(heap_sorted, sorted(data))
+ def test_merge(self):
+ inputs = []
+ for i in xrange(random.randrange(5)):
+ row = sorted(random.randrange(1000) for j in range(random.randrange(10)))
+ inputs.append(row)
+ self.assertEqual(sorted(chain(*inputs)), list(merge(*inputs)))
+ self.assertEqual(list(merge()), [])
+
def test_nsmallest(self):
data = [(random.randrange(2000), i) for i in range(1000)]
for f in (None, lambda x: x[0] * 547 % 2000):