diff options
author | Raymond Hettinger <python@rcn.com> | 2004-06-12 08:33:36 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2004-06-12 08:33:36 (GMT) |
commit | b25aa36f83a3cd2200f2bc479e594458e27794a3 (patch) | |
tree | afdd8051a7b152562b6a4a006fe7fe814113021e /Lib | |
parent | 2e6694086f07d293d1907891f68cec6076d44f73 (diff) | |
download | cpython-b25aa36f83a3cd2200f2bc479e594458e27794a3.zip cpython-b25aa36f83a3cd2200f2bc479e594458e27794a3.tar.gz cpython-b25aa36f83a3cd2200f2bc479e594458e27794a3.tar.bz2 |
Improve the memory performance and speed of heapq.nsmallest() by using
an alternate algorithm when the number of selected items is small
relative to the full iterable.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/heapq.py | 23 | ||||
-rw-r--r-- | Lib/test/test_heapq.py | 1 |
2 files changed, 24 insertions, 0 deletions
diff --git a/Lib/heapq.py b/Lib/heapq.py index d1aad98..65f4155 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -130,6 +130,7 @@ __all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'nlargest', 'nsmallest'] from itertools import islice, repeat +import bisect def heappush(heap, item): """Push item onto heap, maintaining the heap invariant.""" @@ -196,6 +197,28 @@ def nsmallest(iterable, n): Equivalent to: sorted(iterable)[:n] """ + if hasattr(iterable, '__len__') and n * 10 <= len(iterable): + # For smaller values of n, the bisect method is faster than a minheap. + # It is also memory efficient, consuming only n elements of space. + it = iter(iterable) + result = sorted(islice(it, 0, n)) + if not result: + return result + insort = bisect.insort + pop = result.pop + los = result[-1] # los --> Largest of the nsmallest + for elem in it: + if los <= elem: + continue + insort(result, elem) + pop() + los = result[-1] + return result + # An alternative approach manifests the whole iterable in memory but + # saves comparisons by heapifying all at once. Also, saves time + # over bisect.insort() which has O(n) data movement time for every + # insertion. Finding the n smallest of an m length iterable requires + # O(m) + O(n log m) comparisons. h = list(iterable) heapify(h) return map(heappop, repeat(h, min(n, len(h)))) diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 944b17d..1cdaabe 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -92,6 +92,7 @@ class TestHeap(unittest.TestCase): def test_nsmallest(self): data = [random.randrange(2000) for i in range(1000)] self.assertEqual(nsmallest(data, 400), sorted(data)[:400]) + self.assertEqual(nsmallest(data, 50), sorted(data)[:50]) def test_largest(self): data = [random.randrange(2000) for i in range(1000)] |