summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2004-06-13 05:26:33 (GMT)
committerRaymond Hettinger <python@rcn.com>2004-06-13 05:26:33 (GMT)
commit2e3dfaf7070900c459d5879530dbdb3680f7fb9d (patch)
treec6d22113751862d95c53cd2a19fb42aa8d9f5b2c
parentc929766361b4e0ea5bf5dc625e326fb954956f0b (diff)
downloadcpython-2e3dfaf7070900c459d5879530dbdb3680f7fb9d.zip
cpython-2e3dfaf7070900c459d5879530dbdb3680f7fb9d.tar.gz
cpython-2e3dfaf7070900c459d5879530dbdb3680f7fb9d.tar.bz2
Install C version of heapq.nsmallest().
-rw-r--r--Doc/lib/libheapq.tex5
-rw-r--r--Lib/heapq.py2
-rw-r--r--Lib/test/test_heapq.py25
-rw-r--r--Modules/_heapqmodule.c167
4 files changed, 182 insertions, 17 deletions
diff --git a/Doc/lib/libheapq.tex b/Doc/lib/libheapq.tex
index 4585058..5a140ab 100644
--- a/Doc/lib/libheapq.tex
+++ b/Doc/lib/libheapq.tex
@@ -97,11 +97,6 @@ by \var{iterable}. Equivalent to: \code{sorted(iterable)[:n]}
\versionadded{2.4}
\end{funcdesc}
-Though the above functions appear symmetrical, they each have different
-speed and space requirements. In particular, \function{nsmallest()}
-operates on a full copy of the dataset. In contrast, \function{nlargest()}
-only requires storage space for \var{n} elements.
-
Both functions perform best for smaller values of \var{n}. For larger
values, it is more efficient to use the \function{sorted()} function. Also,
when \code{n==1}, it is more efficient to use the builtin \function{min()}
diff --git a/Lib/heapq.py b/Lib/heapq.py
index 65f4155..09f996a 100644
--- a/Lib/heapq.py
+++ b/Lib/heapq.py
@@ -300,7 +300,7 @@ def _siftup(heap, pos):
# If available, use C implementation
try:
- from _heapq import heappush, heappop, heapify, heapreplace
+ from _heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest
except ImportError:
pass
diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py
index 1cdaabe..b6fec9f 100644
--- a/Lib/test/test_heapq.py
+++ b/Lib/test/test_heapq.py
@@ -4,6 +4,7 @@ from heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest
import random
import unittest
from test import test_support
+import sys
def heapiter(heap):
@@ -91,16 +92,28 @@ class TestHeap(unittest.TestCase):
def test_nsmallest(self):
data = [random.randrange(2000) for i in range(1000)]
- self.assertEqual(nsmallest(data, 400), sorted(data)[:400])
- self.assertEqual(nsmallest(data, 50), sorted(data)[:50])
+ for i in (0, 1, 2, 10, 100, 400, 999, 1000, 1100):
+ self.assertEqual(nsmallest(data, i), sorted(data)[:i])
def test_largest(self):
data = [random.randrange(2000) for i in range(1000)]
- self.assertEqual(nlargest(data, 400), sorted(data, reverse=True)[:400])
+ for i in (0, 1, 2, 10, 100, 400, 999, 1000, 1100):
+ self.assertEqual(nlargest(data, i), sorted(data, reverse=True)[:i])
-def test_main():
- test_support.run_unittest(TestHeap)
+def test_main(verbose=None):
+ test_classes = [TestHeap]
+ test_support.run_unittest(*test_classes)
+
+ # verify reference counting
+ if verbose and hasattr(sys, "gettotalrefcount"):
+ import gc
+ counts = [None] * 5
+ for i in xrange(len(counts)):
+ test_support.run_unittest(*test_classes)
+ gc.collect()
+ counts[i] = sys.gettotalrefcount()
+ print counts
if __name__ == "__main__":
- test_main()
+ test_main(verbose=True)
diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c
index 8fe742f..61ee413 100644
--- a/Modules/_heapqmodule.c
+++ b/Modules/_heapqmodule.c
@@ -219,7 +219,7 @@ PyDoc_STRVAR(heapify_doc,
static PyObject *
nlargest(PyObject *self, PyObject *args)
{
- PyObject *heap=NULL, *elem, *rv, *iterable, *sol, *it, *oldelem;
+ PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
int i, n;
if (!PyArg_ParseTuple(args, "Oi:nlargest", &iterable, &n))
@@ -246,10 +246,9 @@ nlargest(PyObject *self, PyObject *args)
if (PyList_GET_SIZE(heap) == 0)
goto sortit;
- rv = heapify(self, heap);
- if (rv == NULL)
- goto fail;
- Py_DECREF(rv);
+ for (i=n/2-1 ; i>=0 ; i--)
+ if(_siftup((PyListObject *)heap, i) == -1)
+ goto fail;
sol = PyList_GET_ITEM(heap, 0);
while (1) {
@@ -290,6 +289,162 @@ PyDoc_STRVAR(nlargest_doc,
\n\
Equivalent to: sorted(iterable, reverse=True)[:n]\n");
+static int
+_siftdownmax(PyListObject *heap, int startpos, int pos)
+{
+ PyObject *newitem, *parent;
+ int cmp, parentpos;
+
+ assert(PyList_Check(heap));
+ if (pos >= PyList_GET_SIZE(heap)) {
+ PyErr_SetString(PyExc_IndexError, "index out of range");
+ return -1;
+ }
+
+ newitem = PyList_GET_ITEM(heap, pos);
+ Py_INCREF(newitem);
+ /* Follow the path to the root, moving parents down until finding
+ a place newitem fits. */
+ while (pos > startpos){
+ parentpos = (pos - 1) >> 1;
+ parent = PyList_GET_ITEM(heap, parentpos);
+ cmp = PyObject_RichCompareBool(newitem, parent, Py_LE);
+ if (cmp == -1)
+ return -1;
+ if (cmp == 1)
+ break;
+ Py_INCREF(parent);
+ Py_DECREF(PyList_GET_ITEM(heap, pos));
+ PyList_SET_ITEM(heap, pos, parent);
+ pos = parentpos;
+ }
+ Py_DECREF(PyList_GET_ITEM(heap, pos));
+ PyList_SET_ITEM(heap, pos, newitem);
+ return 0;
+}
+
+static int
+_siftupmax(PyListObject *heap, int pos)
+{
+ int startpos, endpos, childpos, rightpos;
+ int cmp;
+ PyObject *newitem, *tmp;
+
+ assert(PyList_Check(heap));
+ endpos = PyList_GET_SIZE(heap);
+ startpos = pos;
+ if (pos >= endpos) {
+ PyErr_SetString(PyExc_IndexError, "index out of range");
+ return -1;
+ }
+ newitem = PyList_GET_ITEM(heap, pos);
+ Py_INCREF(newitem);
+
+ /* Bubble up the smaller child until hitting a leaf. */
+ childpos = 2*pos + 1; /* leftmost child position */
+ while (childpos < endpos) {
+ /* Set childpos to index of smaller child. */
+ rightpos = childpos + 1;
+ if (rightpos < endpos) {
+ cmp = PyObject_RichCompareBool(
+ PyList_GET_ITEM(heap, childpos),
+ PyList_GET_ITEM(heap, rightpos),
+ Py_LE);
+ if (cmp == -1)
+ return -1;
+ if (cmp == 1)
+ childpos = rightpos;
+ }
+ /* Move the smaller child up. */
+ tmp = PyList_GET_ITEM(heap, childpos);
+ Py_INCREF(tmp);
+ Py_DECREF(PyList_GET_ITEM(heap, pos));
+ PyList_SET_ITEM(heap, pos, tmp);
+ pos = childpos;
+ childpos = 2*pos + 1;
+ }
+
+ /* The leaf at pos is empty now. Put newitem there, and and bubble
+ it up to its final resting place (by sifting its parents down). */
+ Py_DECREF(PyList_GET_ITEM(heap, pos));
+ PyList_SET_ITEM(heap, pos, newitem);
+ return _siftdownmax(heap, startpos, pos);
+}
+
+static PyObject *
+nsmallest(PyObject *self, PyObject *args)
+{
+ PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
+ int i, n;
+
+ if (!PyArg_ParseTuple(args, "Oi:nsmallest", &iterable, &n))
+ return NULL;
+
+ it = PyObject_GetIter(iterable);
+ if (it == NULL)
+ return NULL;
+
+ heap = PyList_New(0);
+ if (it == NULL)
+ goto fail;
+
+ for (i=0 ; i<n ; i++ ){
+ elem = PyIter_Next(it);
+ if (elem == NULL)
+ goto sortit;
+ if (PyList_Append(heap, elem) == -1) {
+ Py_DECREF(elem);
+ goto fail;
+ }
+ Py_DECREF(elem);
+ }
+ n = PyList_GET_SIZE(heap);
+ if (n == 0)
+ goto sortit;
+
+ for (i=n/2-1 ; i>=0 ; i--)
+ if(_siftupmax((PyListObject *)heap, i) == -1)
+ goto fail;
+
+ los = PyList_GET_ITEM(heap, 0);
+ while (1) {
+ elem = PyIter_Next(it);
+ if (elem == NULL) {
+ if (PyErr_Occurred())
+ goto fail;
+ else
+ goto sortit;
+ }
+ if (PyObject_RichCompareBool(los, elem, Py_LE)) {
+ Py_DECREF(elem);
+ continue;
+ }
+
+ oldelem = PyList_GET_ITEM(heap, 0);
+ PyList_SET_ITEM(heap, 0, elem);
+ Py_DECREF(oldelem);
+ if (_siftupmax((PyListObject *)heap, 0) == -1)
+ goto fail;
+ los = PyList_GET_ITEM(heap, 0);
+ }
+
+sortit:
+ Py_DECREF(it);
+ if (PyList_Sort(heap) == -1)
+ goto fail;
+ return heap;
+
+fail:
+ Py_DECREF(it);
+ Py_XDECREF(heap);
+ return NULL;
+}
+
+PyDoc_STRVAR(nsmallest_doc,
+"Find the n smallest elements in a dataset.\n\
+\n\
+Equivalent to: sorted(iterable)[:n]\n");
+
static PyMethodDef heapq_methods[] = {
{"heappush", (PyCFunction)heappush,
METH_VARARGS, heappush_doc},
@@ -301,6 +456,8 @@ static PyMethodDef heapq_methods[] = {
METH_O, heapify_doc},
{"nlargest", (PyCFunction)nlargest,
METH_VARARGS, nlargest_doc},
+ {"nsmallest", (PyCFunction)nsmallest,
+ METH_VARARGS, nsmallest_doc},
{NULL, NULL} /* sentinel */
};