summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2004-06-12 08:33:36 (GMT)
committerRaymond Hettinger <python@rcn.com>2004-06-12 08:33:36 (GMT)
commitb25aa36f83a3cd2200f2bc479e594458e27794a3 (patch)
treeafdd8051a7b152562b6a4a006fe7fe814113021e /Lib
parent2e6694086f07d293d1907891f68cec6076d44f73 (diff)
downloadcpython-b25aa36f83a3cd2200f2bc479e594458e27794a3.zip
cpython-b25aa36f83a3cd2200f2bc479e594458e27794a3.tar.gz
cpython-b25aa36f83a3cd2200f2bc479e594458e27794a3.tar.bz2
Improve the memory performance and speed of heapq.nsmallest() by using
an alternate algorithm when the number of selected items is small relative to the full iterable.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/heapq.py23
-rw-r--r--Lib/test/test_heapq.py1
2 files changed, 24 insertions, 0 deletions
diff --git a/Lib/heapq.py b/Lib/heapq.py
index d1aad98..65f4155 100644
--- a/Lib/heapq.py
+++ b/Lib/heapq.py
@@ -130,6 +130,7 @@ __all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'nlargest',
'nsmallest']
from itertools import islice, repeat
+import bisect
def heappush(heap, item):
"""Push item onto heap, maintaining the heap invariant."""
@@ -196,6 +197,28 @@ def nsmallest(iterable, n):
Equivalent to: sorted(iterable)[:n]
"""
+ if hasattr(iterable, '__len__') and n * 10 <= len(iterable):
+ # For smaller values of n, the bisect method is faster than a minheap.
+ # It is also memory efficient, consuming only n elements of space.
+ it = iter(iterable)
+ result = sorted(islice(it, 0, n))
+ if not result:
+ return result
+ insort = bisect.insort
+ pop = result.pop
+ los = result[-1] # los --> Largest of the nsmallest
+ for elem in it:
+ if los <= elem:
+ continue
+ insort(result, elem)
+ pop()
+ los = result[-1]
+ return result
+ # An alternative approach manifests the whole iterable in memory but
+ # saves comparisons by heapifying all at once. Also, saves time
+ # over bisect.insort() which has O(n) data movement time for every
+ # insertion. Finding the n smallest of an m length iterable requires
+ # O(m) + O(n log m) comparisons.
h = list(iterable)
heapify(h)
return map(heappop, repeat(h, min(n, len(h))))
diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py
index 944b17d..1cdaabe 100644
--- a/Lib/test/test_heapq.py
+++ b/Lib/test/test_heapq.py
@@ -92,6 +92,7 @@ class TestHeap(unittest.TestCase):
def test_nsmallest(self):
data = [random.randrange(2000) for i in range(1000)]
self.assertEqual(nsmallest(data, 400), sorted(data)[:400])
+ self.assertEqual(nsmallest(data, 50), sorted(data)[:50])
def test_largest(self):
data = [random.randrange(2000) for i in range(1000)]