summaryrefslogtreecommitdiffstats
path: root/Lib/heapq.py
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2004-06-12 08:33:36 (GMT)
committerRaymond Hettinger <python@rcn.com>2004-06-12 08:33:36 (GMT)
commitb25aa36f83a3cd2200f2bc479e594458e27794a3 (patch)
treeafdd8051a7b152562b6a4a006fe7fe814113021e /Lib/heapq.py
parent2e6694086f07d293d1907891f68cec6076d44f73 (diff)
downloadcpython-b25aa36f83a3cd2200f2bc479e594458e27794a3.zip
cpython-b25aa36f83a3cd2200f2bc479e594458e27794a3.tar.gz
cpython-b25aa36f83a3cd2200f2bc479e594458e27794a3.tar.bz2
Improve the memory performance and speed of heapq.nsmallest() by using
an alternate algorithm when the number of selected items is small relative to the full iterable.
Diffstat (limited to 'Lib/heapq.py')
-rw-r--r--Lib/heapq.py23
1 files changed, 23 insertions, 0 deletions
diff --git a/Lib/heapq.py b/Lib/heapq.py
index d1aad98..65f4155 100644
--- a/Lib/heapq.py
+++ b/Lib/heapq.py
@@ -130,6 +130,7 @@ __all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'nlargest',
'nsmallest']
from itertools import islice, repeat
+import bisect
def heappush(heap, item):
"""Push item onto heap, maintaining the heap invariant."""
@@ -196,6 +197,28 @@ def nsmallest(iterable, n):
Equivalent to: sorted(iterable)[:n]
"""
+ if hasattr(iterable, '__len__') and n * 10 <= len(iterable):
+ # For smaller values of n, the bisect method is faster than a minheap.
+ # It is also memory efficient, consuming only n elements of space.
+ it = iter(iterable)
+ result = sorted(islice(it, 0, n))
+ if not result:
+ return result
+ insort = bisect.insort
+ pop = result.pop
+ los = result[-1] # los --> Largest of the nsmallest
+ for elem in it:
+ if los <= elem:
+ continue
+ insort(result, elem)
+ pop()
+ los = result[-1]
+ return result
+ # An alternative approach manifests the whole iterable in memory but
+ # saves comparisons by heapifying all at once. Also, saves time
+ # over bisect.insort() which has O(n) data movement time for every
+ # insertion. Finding the n smallest of an m length iterable requires
+ # O(m) + O(n log m) comparisons.
h = list(iterable)
heapify(h)
return map(heappop, repeat(h, min(n, len(h))))