diff options
author | Raymond Hettinger <python@rcn.com> | 2004-06-10 05:03:17 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2004-06-10 05:03:17 (GMT) |
commit | 33ecffb65ae43ece95e4d828f95819395187d579 (patch) | |
tree | 499adce5b4fc964973a8e72baf2c6214bcef89e3 /Lib | |
parent | 7d019664d7fcd3692eafef668fbc2e17126dee14 (diff) | |
download | cpython-33ecffb65ae43ece95e4d828f95819395187d579.zip cpython-33ecffb65ae43ece95e4d828f95819395187d579.tar.gz cpython-33ecffb65ae43ece95e4d828f95819395187d579.tar.bz2 |
SF patch #969791: Add nlargest() and nsmallest() to heapq.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/heapq.py | 36 | ||||
-rw-r--r-- | Lib/test/test_heapq.py | 11 |
2 files changed, 44 insertions, 3 deletions
diff --git a/Lib/heapq.py b/Lib/heapq.py index 3eb69d8..d1aad98 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -30,7 +30,7 @@ without surprises: heap[0] is the smallest item, and heap.sort() maintains the heap invariant! """ -# Original code by Kevin O'Connor, augmented by Tim Peters +# Original code by Kevin O'Connor, augmented by Tim Peters and Raymond Hettinger __about__ = """Heap queues @@ -126,7 +126,10 @@ Believe me, real good tape sorts were quite spectacular to watch! From all times, sorting has always been a Great Art! :-) """ -__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace'] +__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'nlargest', + 'nsmallest'] + +from itertools import islice, repeat def heappush(heap, item): """Push item onto heap, maintaining the heap invariant.""" @@ -168,6 +171,35 @@ def heapify(x): for i in reversed(xrange(n//2)): _siftup(x, i) +def nlargest(iterable, n): + """Find the n largest elements in a dataset. + + Equivalent to: sorted(iterable, reverse=True)[:n] + """ + it = iter(iterable) + result = list(islice(it, n)) + if not result: + return result + heapify(result) + _heapreplace = heapreplace + sol = result[0] # sol --> smallest of the nlargest + for elem in it: + if elem <= sol: + continue + _heapreplace(result, elem) + sol = result[0] + result.sort(reverse=True) + return result + +def nsmallest(iterable, n): + """Find the n smallest elements in a dataset. + + Equivalent to: sorted(iterable)[:n] + """ + h = list(iterable) + heapify(h) + return map(heappop, repeat(h, min(n, len(h)))) + # 'heap' is a heap at all indices >= startpos, except possibly for pos. pos # is the index of a leaf with a possibly out-of-order value. Restore the # heap invariant. diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 8f3c6f9..f04ea94 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -2,7 +2,7 @@ from test.test_support import verify, vereq, verbose, TestFailed -from heapq import heappush, heappop, heapify, heapreplace +from heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest import random def check_invariant(heap): @@ -84,6 +84,15 @@ def test_main(): data.sort() sorted = [heappop(heap) for i in range(size)] vereq(data, sorted) + + # 7) Check nlargest() and nsmallest() + data = [random.randrange(2000) for i in range(1000)] + copy = data[:] + copy.sort(reverse=True) + vereq(nlargest(data, 400), copy[:400]) + copy.sort() + vereq(nsmallest(data, 400), copy[:400]) + # Make user happy if verbose: print "All OK" |