diff options
author | Tim Peters <tim.peters@gmail.com> | 2002-08-02 21:48:06 (GMT) |
---|---|---|
committer | Tim Peters <tim.peters@gmail.com> | 2002-08-02 21:48:06 (GMT) |
commit | 28c25527c25babcf31e4b00dea316ca6f8612079 (patch) | |
tree | 79ec807bc9edd9bbcf44768a5b78dd3d104ce8b3 | |
parent | 940dc922c0be94c622d8e2cf1474471397073f4e (diff) | |
download | cpython-28c25527c25babcf31e4b00dea316ca6f8612079.zip cpython-28c25527c25babcf31e4b00dea316ca6f8612079.tar.gz cpython-28c25527c25babcf31e4b00dea316ca6f8612079.tar.bz2 |
Hmm! I thought I checked this in before! Oh well.
Added new heapify() function, which transforms an arbitrary list into a
heap in linear time; that's a fundamental tool for using heaps in real
life <wink>.
Added heapyify() test. Added a "less naive" N-best algorithm to the test
suite, and noted that this could actually go much faster (building on
heapify()) if we had max-heaps instead of min-heaps (the iterative method
is appropriate when all the data isn't known in advance, but when it is
known in advance the tradeoffs get murkier).
-rw-r--r-- | Lib/heapq.py | 38 | ||||
-rw-r--r-- | Lib/test/test_heapq.py | 20 |
2 files changed, 48 insertions, 10 deletions
diff --git a/Lib/heapq.py b/Lib/heapq.py index cb22a19..f30ce30 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -13,6 +13,7 @@ heap = [] # creates an empty heap heappush(heap, item) # pushes a new item on the heap item = heappop(heap) # pops the smallest item from the heap item = heap[0] # smallest item on the heap without popping it +heapify(heap) # transform list into a heap, in-place, in linear time Our API differs from textbook heap algorithms as follows: @@ -136,15 +137,13 @@ def heappush(heap, item): pos = parentpos heap[pos] = item -def heappop(heap): - """Pop the smallest item off the heap, maintaining the heap invariant.""" - endpos = len(heap) - 1 - if endpos <= 0: - return heap.pop() - returnitem = heap[0] - item = heap.pop() - pos = 0 - # Sift item into position, down from the root, moving the smaller +# The child indices of heap index pos are already heaps, and we want to make +# a heap at index pos too. +def _siftdown(heap, pos): + endpos = len(heap) + assert pos < endpos + item = heap[pos] + # Sift item into position, down from pos, moving the smaller # child up, until finding pos such that item <= pos's children. childpos = 2*pos + 1 # leftmost child position while childpos < endpos: @@ -164,8 +163,29 @@ def heappop(heap): pos = childpos childpos = 2*pos + 1 heap[pos] = item + +def heappop(heap): + """Pop the smallest item off the heap, maintaining the heap invariant.""" + lastelt = heap.pop() # raises appropriate IndexError if heap is empty + if heap: + returnitem = heap[0] + heap[0] = lastelt + _siftdown(heap, 0) + else: + returnitem = lastelt return returnitem +def heapify(heap): + """Transform list heap into a heap, in-place, in O(len(heap)) time.""" + n = len(heap) + # Transform bottom-up. The largest index there's any point to looking at + # is the largest with a child index in-range, so must have 2*i + 1 < n, + # or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so + # j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is + # (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1. + for i in xrange(n//2 - 1, -1, -1): + _siftdown(heap, i) + if __name__ == "__main__": # Simple sanity test heap = [] diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 879899e..1330f12 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -2,7 +2,7 @@ from test.test_support import verify, vereq, verbose, TestFailed -from heapq import heappush, heappop +from heapq import heappush, heappop, heapify import random def check_invariant(heap): @@ -40,6 +40,24 @@ def test_main(): heappop(heap) heap.sort() vereq(heap, data_sorted[-10:]) + # 4) Test heapify. + for size in range(30): + heap = [random.random() for dummy in range(size)] + heapify(heap) + check_invariant(heap) + # 5) Less-naive "N-best" algorithm, much faster (if len(data) is big + # enough <wink>) than sorting all of data. However, if we had a max + # heap instead of a min heap, it would go much faster still via + # heapify'ing all of data (linear time), then doing 10 heappops + # (10 log-time steps). + heap = data[:10] + heapify(heap) + for item in data[10:]: + if item > heap[0]: # this gets rarer and rarer the longer we run + heappush(heap, item) + heappop(heap) + heap.sort() + vereq(heap, data_sorted[-10:]) # Make user happy if verbose: print "All OK" |