summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2003-10-16 03:41:09 (GMT)
committerRaymond Hettinger <python@rcn.com>2003-10-16 03:41:09 (GMT)
commit42b1ba31aff86af6257a0fca455d5569bce9d8fc (patch)
tree0497ccd614d5ed8a4cfbb2bce4362f61faf0aeb1
parent90f7d254a9ae20d6d783138eb8567f39e6ff7e04 (diff)
downloadcpython-42b1ba31aff86af6257a0fca455d5569bce9d8fc.zip
cpython-42b1ba31aff86af6257a0fca455d5569bce9d8fc.tar.gz
cpython-42b1ba31aff86af6257a0fca455d5569bce9d8fc.tar.bz2
* list.sort() now supports three keyword arguments: cmp, key, and reverse.
key provides C support for the decorate-sort-undecorate pattern. reverse provide a stable sort of the list with the comparisions reversed. * Amended the docs to guarantee sort stability.
-rw-r--r--Doc/lib/libstdtypes.tex64
-rw-r--r--Lib/test/test_sort.py146
-rw-r--r--Misc/NEWS9
-rw-r--r--Objects/listobject.c233
4 files changed, 366 insertions, 86 deletions
diff --git a/Doc/lib/libstdtypes.tex b/Doc/lib/libstdtypes.tex
index c3a0305..62f1644 100644
--- a/Doc/lib/libstdtypes.tex
+++ b/Doc/lib/libstdtypes.tex
@@ -970,7 +970,8 @@ The following operations are defined on mutable sequence types (where
{same as \code{del \var{s}[\var{s}.index(\var{x})]}}{(4)}
\lineiii{\var{s}.reverse()}
{reverses the items of \var{s} in place}{(7)}
- \lineiii{\var{s}.sort(\optional{\var{cmpfunc=None}})}
+ \lineiii{\var{s}.sort(\optional{\var{cmp}=None\optional{, \var{key}=None
+ \optional{, \var{reverse}=False}}})}
{sort the items of \var{s} in place}{(7), (8), (9), (10)}
\end{tableiii}
\indexiv{operations on}{mutable}{sequence}{types}
@@ -1021,47 +1022,38 @@ Notes:
list. To remind you that they operate by side effect, they don't return
the sorted or reversed list.
-\item[(8)] The \method{sort()} method takes an optional argument
- specifying a comparison function of two arguments (list items) which
- should return a negative, zero or positive number depending on whether
- the first argument is considered smaller than, equal to, or larger
- than the second argument. Note that this slows the sorting process
- down considerably; for example to sort a list in reverse order it is much
- faster to call \method{sort()} followed by \method{reverse()}
- than to use \method{sort()} with a comparison function that
- reverses the ordering of the elements. Passing \constant{None} as the
- comparison function is semantically equivalent to calling
- \method{sort()} with no comparison function.
- \versionchanged[Support for \code{None} as an equivalent to omitting
- \var{cmpfunc} was added]{2.3}
+\item[(8)] The \method{sort()} method takes optional arguments for
+ controlling the comparisions.
- As an example of using the \var{cmpfunc} argument to the
- \method{sort()} method, consider sorting a list of sequences by the
- second element of that list:
+ \var{cmp} specifies a custom comparison function of two arguments
+ (list items) which should return a negative, zero or positive number
+ depending on whether the first argument is considered smaller than,
+ equal to, or larger than the second argument:
+ \samp{\var{cmp}=\keyword{lambda} \var{x},\var{y}:
+ \function{cmp}(x.lower(), y.lower())}
+
+ \var{key} specifies a function of one argument that is used to
+ extract a comparison key from each list element:
+ \samp{\var{cmp}=\function{str.lower}}
-\begin{verbatim}
-def mycmp(a, b):
- return cmp(a[1], b[1])
+ \var{reverse} is a boolean value. If set to \code{True}, then the
+ list elements are sorted as if each comparison were reversed.
-mylist.sort(mycmp)
-\end{verbatim}
+ In general, the \var{key} and \var{reverse} conversion processes are
+ much faster than specifying an equivalent \var{cmp} function. This is
+ because \var{cmp} is called multiple times for each list element while
+ \var{key} and \{reverse} touch each element only once.
- A more time-efficient approach for reasonably-sized data structures can
- often be used:
+ \versionchanged[Support for \code{None} as an equivalent to omitting
+ \var{cmpfunc} was added]{2.3}
-\begin{verbatim}
-tmplist = [(x[1], x) for x in mylist]
-tmplist.sort()
-mylist = [x for (key, x) in tmplist]
-\end{verbatim}
+ \versionadded[Support for \var{key} and \var{reverse} was added]{2.4}
-\item[(9)] Whether the \method{sort()} method is stable is not defined by
- the language (a sort is stable if it guarantees not to change the
- relative order of elements that compare equal). In the C
- implementation of Python, sorts were stable only by accident through
- Python 2.2. The C implementation of Python 2.3 introduced a stable
- \method{sort()} method, but code that intends to be portable across
- implementations and versions must not rely on stability.
+\item[(9)] Starting with Python 2.3, the \method{sort()} method is
+ guaranteed to be stable. A sort is stable if it guarantees not to
+ change the relative order of elements that compare equal --- this is
+ helpful for sorting in multiple passes (for example, sort by
+ department, then by salary grade).
\item[(10)] While a list is being sorted, the effect of attempting to
mutate, or even inspect, the list is undefined. The C implementation
diff --git a/Lib/test/test_sort.py b/Lib/test/test_sort.py
index 6c35f42..ca6fc801 100644
--- a/Lib/test/test_sort.py
+++ b/Lib/test/test_sort.py
@@ -116,56 +116,112 @@ for n in sizes:
x = [e for e, i in augmented] # a stable sort of s
check("stability", x, s)
-def bug453523():
- global nerrors
- from random import random
- # If this fails, the most likely outcome is a core dump.
- if verbose:
- print "Testing bug 453523 -- list.sort() crasher."
-
- class C:
- def __lt__(self, other):
- if L and random() < 0.75:
- pop()
- else:
- push(3)
- return random() < 0.5
-
- L = [C() for i in range(50)]
- pop = L.pop
- push = L.append
- try:
- L.sort()
- except ValueError:
- pass
- else:
- print " Mutation during list.sort() wasn't caught."
- nerrors += 1
+import unittest
+from test import test_support
+import sys
-bug453523()
+#==============================================================================
-def cmpNone():
- global nerrors
+class TestBugs(unittest.TestCase):
- if verbose:
- print "Testing None as a comparison function."
+ def test_bug453523(self):
+ # bug 453523 -- list.sort() crasher.
+ # If this fails, the most likely outcome is a core dump.
+ # Mutations during a list sort should raise a ValueError.
- L = range(50)
- random.shuffle(L)
- try:
+ class C:
+ def __lt__(self, other):
+ if L and random.random() < 0.75:
+ L.pop()
+ else:
+ L.append(3)
+ return random.random() < 0.5
+
+ L = [C() for i in range(50)]
+ self.assertRaises(ValueError, L.sort)
+
+ def test_cmpNone(self):
+ # Testing None as a comparison function.
+
+ L = range(50)
+ random.shuffle(L)
L.sort(None)
- except TypeError:
- print " Passing None as cmpfunc failed."
- nerrors += 1
- else:
- if L != range(50):
- print " Passing None as cmpfunc failed."
- nerrors += 1
+ self.assertEqual(L, range(50))
+
+#==============================================================================
+
+class TestDecorateSortUndecorate(unittest.TestCase):
+
+ def test_decorated(self):
+ data = 'The quick Brown fox Jumped over The lazy Dog'.split()
+ copy = data[:]
+ random.shuffle(data)
+ data.sort(key=str.lower)
+ copy.sort(cmp=lambda x,y: cmp(x.lower(), y.lower()))
+
+ def test_baddecorator(self):
+ data = 'The quick Brown fox Jumped over The lazy Dog'.split()
+ self.assertRaises(TypeError, data.sort, None, lambda x,y: 0)
+
+ def test_stability(self):
+ data = [(random.randrange(100), i) for i in xrange(200)]
+ copy = data[:]
+ data.sort(key=lambda (x,y): x) # sort on the random first field
+ copy.sort() # sort using both fields
+ self.assertEqual(data, copy) # should get the same result
+
+ def test_cmp_and_key_combination(self):
+ # Verify that the wrapper has been removed
+ def compare(x, y):
+ self.assertEqual(type(x), str)
+ self.assertEqual(type(x), str)
+ return cmp(x, y)
+ data = 'The quick Brown fox Jumped over The lazy Dog'.split()
+ data.sort(cmp=compare, key=str.lower)
+
+ def test_badcmp_with_key(self):
+ # Verify that the wrapper has been removed
+ data = 'The quick Brown fox Jumped over The lazy Dog'.split()
+ self.assertRaises(TypeError, data.sort, "bad", str.lower)
+
+ def test_reverse(self):
+ data = range(100)
+ random.shuffle(data)
+ data.sort(reverse=True)
+ self.assertEqual(data, range(99,-1,-1))
+
+ def test_reverse_stability(self):
+ data = [(random.randrange(100), i) for i in xrange(200)]
+ copy1 = data[:]
+ copy2 = data[:]
+ data.sort(cmp=lambda x,y: cmp(x[0],y[0]), reverse=True)
+ copy1.sort(cmp=lambda x,y: cmp(y[0],x[0]))
+ self.assertEqual(data, copy1)
+ copy2.sort(key=lambda x: x[0], reverse=True)
+ self.assertEqual(data, copy2)
+
+#==============================================================================
+
+def test_main(verbose=None):
+ test_classes = (
+ TestDecorateSortUndecorate,
+ TestBugs,
+ )
+
+ test_support.run_unittest(*test_classes)
+
+ # verify reference counting
+ if verbose and hasattr(sys, "gettotalrefcount"):
+ import gc
+ counts = [None] * 5
+ for i in xrange(len(counts)):
+ test_support.run_unittest(*test_classes)
+ gc.collect()
+ counts[i] = sys.gettotalrefcount()
+ print counts
+
+if __name__ == "__main__":
+ test_main(verbose=True)
-cmpNone()
-if nerrors:
- print "Test failed", nerrors
-elif verbose:
- print "Test passed -- no errors."
diff --git a/Misc/NEWS b/Misc/NEWS
index eb57e68..02f541f 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,15 @@ What's New in Python 2.4 alpha 1?
Core and builtins
-----------------
+- list.sort() now supports three keyword arguments: cmp, key, and reverse.
+ The key argument can be a function of one argument that extracts a
+ comparison key from the original record: mylist.sort(key=str.lower).
+ The reverse argument is a boolean value and if True will change the
+ sort order as if the comparison arguments were reversed. In addition,
+ the documentation has been amended to provide a guarantee that all sorts
+ starting with Py2.3 are guaranteed to be stable (the relative order of
+ records with equal keys is unchanged).
+
- Added test whether wchar_t is signed or not. A signed wchar_t is not
usable as internal unicode type base for Py_UNICODE since the
unicode implementation assumes an unsigned type.
diff --git a/Objects/listobject.c b/Objects/listobject.c
index 727c9e6..2edbedf 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1656,13 +1656,186 @@ merge_compute_minrun(int n)
return n + r;
}
+/* Special wrapper to support stable sorting using the decorate-sort-undecorate
+ pattern. Holds a key which is used for comparisions and the original record
+ which is returned during the undecorate phase. By exposing only the key
+ during comparisons, the underlying sort stability characteristics are left
+ unchanged. Also, if a custom comparison function is used, it will only see
+ the key instead of a full record. */
+
+typedef struct {
+ PyObject_HEAD
+ PyObject *key;
+ PyObject *value;
+} sortwrapperobject;
+
+static PyTypeObject sortwrapper_type;
+
+static PyObject *
+sortwrapper_richcompare(sortwrapperobject *a, sortwrapperobject *b, int op)
+{
+ if (!PyObject_TypeCheck(b, &sortwrapper_type)) {
+ PyErr_SetString(PyExc_TypeError,
+ "expected a sortwrapperobject");
+ return NULL;
+ }
+ return PyObject_RichCompare(a->key, b->key, op);
+}
+
+static void
+sortwrapper_dealloc(sortwrapperobject *so)
+{
+ Py_XDECREF(so->key);
+ Py_XDECREF(so->value);
+ PyObject_Del(so);
+}
+
+PyDoc_STRVAR(sortwrapper_doc, "Object wrapper with a custom sort key.");
+
+static PyTypeObject sortwrapper_type = {
+ PyObject_HEAD_INIT(&PyType_Type)
+ 0, /* ob_size */
+ "sortwrapper", /* tp_name */
+ sizeof(sortwrapperobject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)sortwrapper_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT |
+ Py_TPFLAGS_HAVE_RICHCOMPARE, /* tp_flags */
+ sortwrapper_doc, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ (richcmpfunc)sortwrapper_richcompare, /* tp_richcompare */
+};
+
+/* Returns a new reference to a sortwrapper.
+ Consumes the references to the two underlying objects. */
+
+static PyObject *
+build_sortwrapper(PyObject *key, PyObject *value)
+{
+ sortwrapperobject *so;
+
+ so = PyObject_New(sortwrapperobject, &sortwrapper_type);
+ if (so == NULL)
+ return NULL;
+ so->key = key;
+ so->value = value;
+ return (PyObject *)so;
+}
+
+/* Returns a new reference to the value underlying the wrapper. */
+static PyObject *
+sortwrapper_getvalue(PyObject *so)
+{
+ PyObject *value;
+
+ if (!PyObject_TypeCheck(so, &sortwrapper_type)) {
+ PyErr_SetString(PyExc_TypeError,
+ "expected a sortwrapperobject");
+ return NULL;
+ }
+ value = ((sortwrapperobject *)so)->value;
+ Py_INCREF(value);
+ return value;
+}
+
+/* Wrapper for user specified cmp functions in combination with a
+ specified key function. Makes sure the cmp function is presented
+ with the actual key instead of the sortwrapper */
+
+typedef struct {
+ PyObject_HEAD
+ PyObject *func;
+} cmpwrapperobject;
+
+static void
+cmpwrapper_dealloc(cmpwrapperobject *co)
+{
+ Py_XDECREF(co->func);
+ PyObject_Del(co);
+}
+
+static PyObject *
+cmpwrapper_call(cmpwrapperobject *co, PyObject *args, PyObject *kwds)
+{
+ PyObject *x, *y, *xx, *yy;
+
+ if (!PyArg_UnpackTuple(args, "", 2, 2, &x, &y))
+ return NULL;
+ if (!PyObject_TypeCheck(x, &sortwrapper_type) ||
+ !PyObject_TypeCheck(x, &sortwrapper_type)) {
+ PyErr_SetString(PyExc_TypeError,
+ "expected a sortwrapperobject");
+ return NULL;
+ }
+ xx = ((sortwrapperobject *)x)->key;
+ yy = ((sortwrapperobject *)y)->key;
+ return PyObject_CallFunctionObjArgs(co->func, xx, yy, NULL);
+}
+
+PyDoc_STRVAR(cmpwrapper_doc, "cmp() wrapper for sort with custom keys.");
+
+static PyTypeObject cmpwrapper_type = {
+ PyObject_HEAD_INIT(&PyType_Type)
+ 0, /* ob_size */
+ "cmpwrapper", /* tp_name */
+ sizeof(cmpwrapperobject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)cmpwrapper_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ (ternaryfunc)cmpwrapper_call, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ cmpwrapper_doc, /* tp_doc */
+};
+
+static PyObject *
+build_cmpwrapper(PyObject *cmpfunc)
+{
+ cmpwrapperobject *co;
+
+ co = PyObject_New(cmpwrapperobject, &cmpwrapper_type);
+ if (co == NULL)
+ return NULL;
+ Py_INCREF(cmpfunc);
+ co->func = cmpfunc;
+ return (PyObject *)co;
+}
+
/* An adaptive, stable, natural mergesort. See listsort.txt.
* Returns Py_None on success, NULL on error. Even in case of error, the
* list will be some permutation of its input state (nothing is lost or
* duplicated).
*/
static PyObject *
-listsort(PyListObject *self, PyObject *args)
+listsort(PyListObject *self, PyObject *args, PyObject *kwds)
{
MergeState ms;
PyObject **lo, **hi;
@@ -1673,14 +1846,48 @@ listsort(PyListObject *self, PyObject *args)
PyObject **empty_ob_item;
PyObject *compare = NULL;
PyObject *result = NULL; /* guilty until proved innocent */
+ int reverse = 0;
+ PyObject *keyfunc = NULL;
+ int i, n;
+ PyObject *key, *value, *kvpair;
+ static char *kwlist[] = {"cmp", "key", "reverse", 0};
assert(self != NULL);
+ assert (PyList_Check(self));
if (args != NULL) {
- if (!PyArg_UnpackTuple(args, "sort", 0, 1, &compare))
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOi:sort",
+ kwlist, &compare, &keyfunc, &reverse))
return NULL;
}
if (compare == Py_None)
compare = NULL;
+ if (keyfunc == Py_None)
+ keyfunc = NULL;
+ if (compare != NULL && keyfunc != NULL) {
+ compare = build_cmpwrapper(compare);
+ if (compare == NULL)
+ goto dsu_fail;
+ } else
+ Py_XINCREF(compare);
+
+ if (keyfunc != NULL) {
+ n = PyList_GET_SIZE(self);
+ for (i=0 ; i<n ; i++) {
+ value = PyList_GET_ITEM(self, i);
+ key = PyObject_CallFunctionObjArgs(keyfunc, value, NULL);
+ if (key == NULL)
+ goto dsu_fail;
+ kvpair = build_sortwrapper(key, value);
+ if (kvpair == NULL)
+ goto dsu_fail;
+ PyList_SET_ITEM(self, i, kvpair);
+ }
+ }
+
+ /* Reverse sort stability achieved by initialially reversing the list,
+ applying a stable forward sort, then reversing the final result. */
+ if (reverse && self->ob_size > 1)
+ reverse_slice(self->ob_item, self->ob_item + self->ob_size);
merge_init(&ms, compare);
@@ -1758,6 +1965,21 @@ fail:
self->ob_size = saved_ob_size;
self->ob_item = saved_ob_item;
merge_freemem(&ms);
+
+ if (keyfunc != NULL) {
+ for (i=0 ; i<n ; i++) {
+ kvpair = PyList_GET_ITEM(self, i);
+ value = sortwrapper_getvalue(kvpair);
+ PyList_SET_ITEM(self, i, value);
+ Py_DECREF(kvpair);
+ }
+ }
+
+ if (reverse && self->ob_size > 1)
+ reverse_slice(self->ob_item, self->ob_item + self->ob_size);
+
+dsu_fail:
+ Py_XDECREF(compare);
Py_XINCREF(result);
return result;
}
@@ -1771,7 +1993,7 @@ PyList_Sort(PyObject *v)
PyErr_BadInternalCall();
return -1;
}
- v = listsort((PyListObject *)v, (PyObject *)NULL);
+ v = listsort((PyListObject *)v, (PyObject *)NULL, (PyObject *)NULL);
if (v == NULL)
return -1;
Py_DECREF(v);
@@ -2111,7 +2333,8 @@ PyDoc_STRVAR(count_doc,
PyDoc_STRVAR(reverse_doc,
"L.reverse() -- reverse *IN PLACE*");
PyDoc_STRVAR(sort_doc,
-"L.sort(cmpfunc=None) -- stable sort *IN PLACE*; cmpfunc(x, y) -> -1, 0, 1");
+"L.sort(cmp=None, key=None, reverse=False) -- stable sort *IN PLACE*;\n\
+cmp(x, y) -> -1, 0, 1");
static PyMethodDef list_methods[] = {
{"append", (PyCFunction)listappend, METH_O, append_doc},
@@ -2122,7 +2345,7 @@ static PyMethodDef list_methods[] = {
{"index", (PyCFunction)listindex, METH_VARARGS, index_doc},
{"count", (PyCFunction)listcount, METH_O, count_doc},
{"reverse", (PyCFunction)listreverse, METH_NOARGS, reverse_doc},
- {"sort", (PyCFunction)listsort, METH_VARARGS, sort_doc},
+ {"sort", (PyCFunction)listsort, METH_VARARGS | METH_KEYWORDS, sort_doc},
{NULL, NULL} /* sentinel */
};