From fb4c604cacf24621b8c7ddff8fa3db06e82a4971 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 7 Aug 2010 23:35:52 +0000 Subject: Document implementation notes for priority queues --- Doc/library/heapq.rst | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index 9a44047..8e6fd2d 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -6,6 +6,7 @@ .. moduleauthor:: Kevin O'Connor .. sectionauthor:: Guido van Rossum .. sectionauthor:: François Pinard +.. sectionauthor:: Raymond Hettinger .. versionadded:: 2.3 @@ -151,6 +152,68 @@ values, it is more efficient to use the :func:`sorted` function. Also, when functions. +Priority Queue Implementation Notes +----------------------------------- + +A `priority queue `_ is common use +for a heap, and it presents several implementation challenges: + +* Sort stability: how do you get two tasks with equal priorities to be returned + in the order they were originally added? + +* In the future with Python 3, tuple comparison breaks for (priority, task) + pairs if the priorities are equal and the tasks do not have a default + comparison order. + +* If the priority of a task changes, how do you move it to a new position in + the heap? + +* Or if a pending task needs to be deleted, how do you find it and remove it + from the queue? + +A solution to the first two challenges is to store entries as 3-element list +including the priority, an entry count, and the task. The entry count serves as +a tie-breaker so that two tasks with the same priority are returned in the order +they were added. And since no two entry counts are the same, the tuple +comparison will never attempt to directly compare two tasks. + +The remaining challenges revolve around finding a pending task and making +changes to its priority or removing it entirely. Finding a task can be done +with a dictionary pointing to an entry in the queue. + +Removing the entry or changing its priority is more difficult because it would +break the heap structure invariants. So, a possible solution is to mark an +entry as invalid and optionally add a new entry with the revised priority:: + + pq = [] # the priority queue list + counter = itertools.count(1) # unique sequence count + task_finder = {} # mapping of tasks to entries + INVALID = 0 # mark an entry as deleted + + def add_task(priority, task, count=None): + if count is None: + count = next(counter) + entry = [priority, count, task] + task_finder[task] = entry + heappush(pq, entry) + + def get_top_priority(): + while True: + priority, count, task = heappop(pq) + del task_finder[task] + if count is not INVALID: + return task + + def delete_task(task): + entry = task_finder[task] + entry[1] = INVALID + + def reprioritize(priority, task): + entry = task_finder[task] + add_task(priority, task, entry[1]) + entry[1] = INVALID + + Theory ------ -- cgit v0.12