summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2002-08-02 21:48:06 (GMT)
committerTim Peters <tim.peters@gmail.com>2002-08-02 21:48:06 (GMT)
commit28c25527c25babcf31e4b00dea316ca6f8612079 (patch)
tree79ec807bc9edd9bbcf44768a5b78dd3d104ce8b3
parent940dc922c0be94c622d8e2cf1474471397073f4e (diff)
downloadcpython-28c25527c25babcf31e4b00dea316ca6f8612079.zip
cpython-28c25527c25babcf31e4b00dea316ca6f8612079.tar.gz
cpython-28c25527c25babcf31e4b00dea316ca6f8612079.tar.bz2
Hmm! I thought I checked this in before! Oh well.
Added new heapify() function, which transforms an arbitrary list into a heap in linear time; that's a fundamental tool for using heaps in real life <wink>. Added heapyify() test. Added a "less naive" N-best algorithm to the test suite, and noted that this could actually go much faster (building on heapify()) if we had max-heaps instead of min-heaps (the iterative method is appropriate when all the data isn't known in advance, but when it is known in advance the tradeoffs get murkier).
-rw-r--r--Lib/heapq.py38
-rw-r--r--Lib/test/test_heapq.py20
2 files changed, 48 insertions, 10 deletions
diff --git a/Lib/heapq.py b/Lib/heapq.py
index cb22a19..f30ce30 100644
--- a/Lib/heapq.py
+++ b/Lib/heapq.py
@@ -13,6 +13,7 @@ heap = [] # creates an empty heap
heappush(heap, item) # pushes a new item on the heap
item = heappop(heap) # pops the smallest item from the heap
item = heap[0] # smallest item on the heap without popping it
+heapify(heap) # transform list into a heap, in-place, in linear time
Our API differs from textbook heap algorithms as follows:
@@ -136,15 +137,13 @@ def heappush(heap, item):
pos = parentpos
heap[pos] = item
-def heappop(heap):
- """Pop the smallest item off the heap, maintaining the heap invariant."""
- endpos = len(heap) - 1
- if endpos <= 0:
- return heap.pop()
- returnitem = heap[0]
- item = heap.pop()
- pos = 0
- # Sift item into position, down from the root, moving the smaller
+# The child indices of heap index pos are already heaps, and we want to make
+# a heap at index pos too.
+def _siftdown(heap, pos):
+ endpos = len(heap)
+ assert pos < endpos
+ item = heap[pos]
+ # Sift item into position, down from pos, moving the smaller
# child up, until finding pos such that item <= pos's children.
childpos = 2*pos + 1 # leftmost child position
while childpos < endpos:
@@ -164,8 +163,29 @@ def heappop(heap):
pos = childpos
childpos = 2*pos + 1
heap[pos] = item
+
+def heappop(heap):
+ """Pop the smallest item off the heap, maintaining the heap invariant."""
+ lastelt = heap.pop() # raises appropriate IndexError if heap is empty
+ if heap:
+ returnitem = heap[0]
+ heap[0] = lastelt
+ _siftdown(heap, 0)
+ else:
+ returnitem = lastelt
return returnitem
+def heapify(heap):
+ """Transform list heap into a heap, in-place, in O(len(heap)) time."""
+ n = len(heap)
+ # Transform bottom-up. The largest index there's any point to looking at
+ # is the largest with a child index in-range, so must have 2*i + 1 < n,
+ # or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so
+ # j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is
+ # (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1.
+ for i in xrange(n//2 - 1, -1, -1):
+ _siftdown(heap, i)
+
if __name__ == "__main__":
# Simple sanity test
heap = []
diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py
index 879899e..1330f12 100644
--- a/Lib/test/test_heapq.py
+++ b/Lib/test/test_heapq.py
@@ -2,7 +2,7 @@
from test.test_support import verify, vereq, verbose, TestFailed
-from heapq import heappush, heappop
+from heapq import heappush, heappop, heapify
import random
def check_invariant(heap):
@@ -40,6 +40,24 @@ def test_main():
heappop(heap)
heap.sort()
vereq(heap, data_sorted[-10:])
+ # 4) Test heapify.
+ for size in range(30):
+ heap = [random.random() for dummy in range(size)]
+ heapify(heap)
+ check_invariant(heap)
+ # 5) Less-naive "N-best" algorithm, much faster (if len(data) is big
+ # enough <wink>) than sorting all of data. However, if we had a max
+ # heap instead of a min heap, it would go much faster still via
+ # heapify'ing all of data (linear time), then doing 10 heappops
+ # (10 log-time steps).
+ heap = data[:10]
+ heapify(heap)
+ for item in data[10:]:
+ if item > heap[0]: # this gets rarer and rarer the longer we run
+ heappush(heap, item)
+ heappop(heap)
+ heap.sort()
+ vereq(heap, data_sorted[-10:])
# Make user happy
if verbose:
print "All OK"