* list.sort() now supports three keyword arguments: cmp, key, and reverse.

key provides C support for the decorate-sort-undecorate pattern. reverse provide a stable sort of the list with the comparisions reversed. * Amended the docs to guarantee sort stability.
author: Raymond Hettinger <python@rcn.com> 2003-10-16 03:41:09 (GMT)
committer: Raymond Hettinger <python@rcn.com> 2003-10-16 03:41:09 (GMT)
commit: 42b1ba31aff86af6257a0fca455d5569bce9d8fc (patch)
tree: 0497ccd614d5ed8a4cfbb2bce4362f61faf0aeb1
parent: 90f7d254a9ae20d6d783138eb8567f39e6ff7e04 (diff)
download: cpython-42b1ba31aff86af6257a0fca455d5569bce9d8fc.zip
cpython-42b1ba31aff86af6257a0fca455d5569bce9d8fc.tar.gz
cpython-42b1ba31aff86af6257a0fca455d5569bce9d8fc.tar.bz2
4 files changed, 366 insertions, 86 deletions
diff --git a/Doc/lib/libstdtypes.tex b/Doc/lib/libstdtypes.tex
index c3a0305..62f1644 100644
--- a/Doc/lib/libstdtypes.tex
+++ b/Doc/lib/libstdtypes.tex
@@ -970,7 +970,8 @@ The following operations are defined on mutable sequence types (where
 	{same as \code{del \var{s}[\var{s}.index(\var{x})]}}{(4)}
   \lineiii{\var{s}.reverse()}
 	{reverses the items of \var{s} in place}{(7)}
-  \lineiii{\var{s}.sort(\optional{\var{cmpfunc=None}})}
+  \lineiii{\var{s}.sort(\optional{\var{cmp}=None\optional{, \var{key}=None
+                        \optional{, \var{reverse}=False}}})}
 	{sort the items of \var{s} in place}{(7), (8), (9), (10)}
 \end{tableiii}
 \indexiv{operations on}{mutable}{sequence}{types}
@@ -1021,47 +1022,38 @@ Notes:
   list.  To remind you that they operate by side effect, they don't return
   the sorted or reversed list.
 
-\item[(8)] The \method{sort()} method takes an optional argument
-  specifying a comparison function of two arguments (list items) which
-  should return a negative, zero or positive number depending on whether
-  the first argument is considered smaller than, equal to, or larger
-  than the second argument.  Note that this slows the sorting process
-  down considerably; for example to sort a list in reverse order it is much
-  faster to call \method{sort()} followed by \method{reverse()}
-  than to use \method{sort()} with a comparison function that
-  reverses the ordering of the elements.  Passing \constant{None} as the
-  comparison function is semantically equivalent to calling
-  \method{sort()} with no comparison function.
-  \versionchanged[Support for \code{None} as an equivalent to omitting
-  \var{cmpfunc} was added]{2.3}
+\item[(8)] The \method{sort()} method takes optional arguments for
+  controlling the comparisions.
 
-  As an example of using the \var{cmpfunc} argument to the
-  \method{sort()} method, consider sorting a list of sequences by the
-  second element of that list:
+  \var{cmp} specifies a custom comparison function of two arguments
+     (list items) which should return a negative, zero or positive number
+     depending on whether the first argument is considered smaller than,
+     equal to, or larger than the second argument:
+     \samp{\var{cmp}=\keyword{lambda} \var{x},\var{y}:
+     \function{cmp}(x.lower(), y.lower())}
+     
+  \var{key} specifies a function of one argument that is used to
+     extract a comparison key from each list element:
+     \samp{\var{cmp}=\function{str.lower}}
 
-\begin{verbatim}
-def mycmp(a, b):
-    return cmp(a[1], b[1])
+  \var{reverse} is a boolean value.  If set to \code{True}, then the
+     list elements are sorted as if each comparison were reversed.
 
-mylist.sort(mycmp)
-\end{verbatim}
+  In general, the \var{key} and \var{reverse} conversion processes are
+  much faster than specifying an equivalent \var{cmp} function.  This is
+  because \var{cmp} is called multiple times for each list element while
+  \var{key} and \{reverse} touch each element only once.
 
-  A more time-efficient approach for reasonably-sized data structures can
-  often be used:
+  \versionchanged[Support for \code{None} as an equivalent to omitting
+  \var{cmpfunc} was added]{2.3}
 
-\begin{verbatim}
-tmplist = [(x[1], x) for x in mylist]
-tmplist.sort()
-mylist = [x for (key, x) in tmplist]
-\end{verbatim}
+  \versionadded[Support for \var{key} and \var{reverse} was added]{2.4}
 
-\item[(9)] Whether the \method{sort()} method is stable is not defined by
-  the language (a sort is stable if it guarantees not to change the
-  relative order of elements that compare equal).  In the C
-  implementation of Python, sorts were stable only by accident through
-  Python 2.2.  The C implementation of Python 2.3 introduced a stable
-  \method{sort()} method, but code that intends to be portable across
-  implementations and versions must not rely on stability.
+\item[(9)]  Starting with Python 2.3, the \method{sort()} method is
+  guaranteed to be stable.  A sort is stable if it guarantees not to
+  change the relative order of elements that compare equal --- this is
+  helpful for sorting in multiple passes (for example, sort by
+  department, then by salary grade).
 
 \item[(10)] While a list is being sorted, the effect of attempting to
   mutate, or even inspect, the list is undefined.  The C implementation
diff --git a/Lib/test/test_sort.py b/Lib/test/test_sort.py
index 6c35f42..ca6fc801 100644
--- a/Lib/test/test_sort.py
+++ b/Lib/test/test_sort.py
@@ -116,56 +116,112 @@ for n in sizes:
     x = [e for e, i in augmented] # a stable sort of s
     check("stability", x, s)
 
-def bug453523():
-    global nerrors
-    from random import random
 
-    # If this fails, the most likely outcome is a core dump.
-    if verbose:
-        print "Testing bug 453523 -- list.sort() crasher."
-
-    class C:
-        def __lt__(self, other):
-            if L and random() < 0.75:
-                pop()
-            else:
-                push(3)
-            return random() < 0.5
-
-    L = [C() for i in range(50)]
-    pop = L.pop
-    push = L.append
-    try:
-        L.sort()
-    except ValueError:
-        pass
-    else:
-        print "    Mutation during list.sort() wasn't caught."
-        nerrors += 1
+import unittest
+from test import test_support
+import sys
 
-bug453523()
+#==============================================================================
 
-def cmpNone():
-    global nerrors
+class TestBugs(unittest.TestCase):
 
-    if verbose:
-        print "Testing None as a comparison function."
+    def test_bug453523(self):
+        # bug 453523 -- list.sort() crasher.
+        # If this fails, the most likely outcome is a core dump.
+        # Mutations during a list sort should raise a ValueError.
 
-    L = range(50)
-    random.shuffle(L)
-    try:
+        class C:
+            def __lt__(self, other):
+                if L and random.random() < 0.75:
+                    L.pop()
+                else:
+                    L.append(3)
+                return random.random() < 0.5
+
+        L = [C() for i in range(50)]
+        self.assertRaises(ValueError, L.sort)
+
+    def test_cmpNone(self):
+        # Testing None as a comparison function.
+
+        L = range(50)
+        random.shuffle(L)
         L.sort(None)
-    except TypeError:
-        print "    Passing None as cmpfunc failed."
-        nerrors += 1
-    else:
-        if L != range(50):
-            print "    Passing None as cmpfunc failed."
-            nerrors += 1
+        self.assertEqual(L, range(50))
+
+#==============================================================================
+
+class TestDecorateSortUndecorate(unittest.TestCase):
+
+    def test_decorated(self):
+        data = 'The quick Brown fox Jumped over The lazy Dog'.split()
+        copy = data[:]
+        random.shuffle(data)
+        data.sort(key=str.lower)
+        copy.sort(cmp=lambda x,y: cmp(x.lower(), y.lower()))
+
+    def test_baddecorator(self):
+        data = 'The quick Brown fox Jumped over The lazy Dog'.split()
+        self.assertRaises(TypeError, data.sort, None, lambda x,y: 0)
+
+    def test_stability(self):
+        data = [(random.randrange(100), i) for i in xrange(200)]
+        copy = data[:]
+        data.sort(key=lambda (x,y): x)  # sort on the random first field
+        copy.sort()                     # sort using both fields
+        self.assertEqual(data, copy)    # should get the same result
+
+    def test_cmp_and_key_combination(self):
+        # Verify that the wrapper has been removed
+        def compare(x, y):
+            self.assertEqual(type(x), str)
+            self.assertEqual(type(x), str)
+            return cmp(x, y)
+        data = 'The quick Brown fox Jumped over The lazy Dog'.split()
+        data.sort(cmp=compare, key=str.lower)
+
+    def test_badcmp_with_key(self):
+        # Verify that the wrapper has been removed
+        data = 'The quick Brown fox Jumped over The lazy Dog'.split()
+        self.assertRaises(TypeError, data.sort, "bad", str.lower)
+
+    def test_reverse(self):
+        data = range(100)
+        random.shuffle(data)
+        data.sort(reverse=True)
+        self.assertEqual(data, range(99,-1,-1))
+
+    def test_reverse_stability(self):
+        data = [(random.randrange(100), i) for i in xrange(200)]
+        copy1 = data[:]
+        copy2 = data[:]
+        data.sort(cmp=lambda x,y: cmp(x[0],y[0]), reverse=True)
+        copy1.sort(cmp=lambda x,y: cmp(y[0],x[0]))
+        self.assertEqual(data, copy1)
+        copy2.sort(key=lambda x: x[0], reverse=True)
+        self.assertEqual(data, copy2)
+
+#==============================================================================
+
+def test_main(verbose=None):
+    test_classes = (
+        TestDecorateSortUndecorate,
+        TestBugs,
+    )
+
+    test_support.run_unittest(*test_classes)
+
+    # verify reference counting
+    if verbose and hasattr(sys, "gettotalrefcount"):
+        import gc
+        counts = [None] * 5
+        for i in xrange(len(counts)):
+            test_support.run_unittest(*test_classes)
+            gc.collect()
+            counts[i] = sys.gettotalrefcount()
+        print counts
+
+if __name__ == "__main__":
+    test_main(verbose=True)
 
-cmpNone()
 
-if nerrors:
-    print "Test failed", nerrors
-elif verbose:
-    print "Test passed -- no errors."
diff --git a/Misc/NEWS b/Misc/NEWS
index eb57e68..02f541f 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,15 @@ What's New in Python 2.4 alpha 1?
 Core and builtins
 -----------------
 
+- list.sort() now supports three keyword arguments:  cmp, key, and reverse.
+  The key argument can be a function of one argument that extracts a
+  comparison key from the original record:  mylist.sort(key=str.lower).
+  The reverse argument is a boolean value and if True will change the
+  sort order as if the comparison arguments were reversed.  In addition,
+  the documentation has been amended to provide a guarantee that all sorts
+  starting with Py2.3 are guaranteed to be stable (the relative order of
+  records with equal keys is unchanged).
+
 - Added test whether wchar_t is signed or not. A signed wchar_t is not
   usable as internal unicode type base for Py_UNICODE since the
   unicode implementation assumes an unsigned type.
diff --git a/Objects/listobject.c b/Objects/listobject.c
index 727c9e6..2edbedf 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1656,13 +1656,186 @@ merge_compute_minrun(int n)
 	return n + r;
 }
 
+/* Special wrapper to support stable sorting using the decorate-sort-undecorate
+   pattern.  Holds a key which is used for comparisions and the original record
+   which is returned during the undecorate phase.  By exposing only the key 
+   during comparisons, the underlying sort stability characteristics are left 
+   unchanged.  Also, if a custom comparison function is used, it will only see 
+   the key instead of a full record. */
+
+typedef struct {
+	PyObject_HEAD
+	PyObject *key;
+	PyObject *value;
+} sortwrapperobject;
+
+static PyTypeObject sortwrapper_type;
+
+static PyObject *
+sortwrapper_richcompare(sortwrapperobject *a, sortwrapperobject *b, int op)
+{
+	if (!PyObject_TypeCheck(b, &sortwrapper_type)) {
+		PyErr_SetString(PyExc_TypeError, 
+			"expected a sortwrapperobject");
+		return NULL;
+	}
+	return PyObject_RichCompare(a->key, b->key, op);
+}
+
+static void
+sortwrapper_dealloc(sortwrapperobject *so)
+{
+	Py_XDECREF(so->key);
+	Py_XDECREF(so->value);
+	PyObject_Del(so);
+}
+
+PyDoc_STRVAR(sortwrapper_doc, "Object wrapper with a custom sort key.");
+
+static PyTypeObject sortwrapper_type = {
+	PyObject_HEAD_INIT(&PyType_Type)
+	0,					/* ob_size */
+	"sortwrapper",				/* tp_name */
+	sizeof(sortwrapperobject),		/* tp_basicsize */
+	0,					/* tp_itemsize */
+	/* methods */
+	(destructor)sortwrapper_dealloc,	/* tp_dealloc */
+	0,					/* tp_print */
+	0,					/* tp_getattr */
+	0,					/* tp_setattr */
+	0,					/* tp_compare */
+	0,					/* tp_repr */
+	0,					/* tp_as_number */
+	0,					/* tp_as_sequence */
+	0,					/* tp_as_mapping */
+	0,					/* tp_hash */
+	0,					/* tp_call */
+	0,					/* tp_str */
+	PyObject_GenericGetAttr,		/* tp_getattro */
+	0,					/* tp_setattro */
+	0,					/* tp_as_buffer */
+	Py_TPFLAGS_DEFAULT | 
+	Py_TPFLAGS_HAVE_RICHCOMPARE, 		/* tp_flags */
+	sortwrapper_doc,			/* tp_doc */
+	0,					/* tp_traverse */
+	0,					/* tp_clear */
+	(richcmpfunc)sortwrapper_richcompare,	/* tp_richcompare */
+};
+
+/* Returns a new reference to a sortwrapper.
+   Consumes the references to the two underlying objects. */
+
+static PyObject *
+build_sortwrapper(PyObject *key, PyObject *value)
+{
+	sortwrapperobject *so;
+	
+	so = PyObject_New(sortwrapperobject, &sortwrapper_type);
+	if (so == NULL)
+		return NULL;
+	so->key = key;
+	so->value = value;
+	return (PyObject *)so;
+}
+
+/* Returns a new reference to the value underlying the wrapper. */
+static PyObject *
+sortwrapper_getvalue(PyObject *so)
+{
+	PyObject *value;
+
+	if (!PyObject_TypeCheck(so, &sortwrapper_type)) {
+		PyErr_SetString(PyExc_TypeError, 
+			"expected a sortwrapperobject");
+		return NULL;
+	}
+	value = ((sortwrapperobject *)so)->value;
+	Py_INCREF(value);
+	return value;
+}
+
+/* Wrapper for user specified cmp functions in combination with a
+   specified key function.  Makes sure the cmp function is presented
+   with the actual key instead of the sortwrapper */
+
+typedef struct {
+	PyObject_HEAD
+	PyObject *func;
+} cmpwrapperobject;
+
+static void
+cmpwrapper_dealloc(cmpwrapperobject *co)
+{
+	Py_XDECREF(co->func);
+	PyObject_Del(co);
+}
+
+static PyObject *
+cmpwrapper_call(cmpwrapperobject *co, PyObject *args, PyObject *kwds)
+{
+	PyObject *x, *y, *xx, *yy;
+
+	if (!PyArg_UnpackTuple(args, "", 2, 2, &x, &y))
+		return NULL;
+	if (!PyObject_TypeCheck(x, &sortwrapper_type) ||
+	    !PyObject_TypeCheck(x, &sortwrapper_type)) {
+		PyErr_SetString(PyExc_TypeError, 
+			"expected a sortwrapperobject");
+		return NULL;
+	}
+	xx = ((sortwrapperobject *)x)->key;
+	yy = ((sortwrapperobject *)y)->key;
+	return PyObject_CallFunctionObjArgs(co->func, xx, yy, NULL);
+}
+
+PyDoc_STRVAR(cmpwrapper_doc, "cmp() wrapper for sort with custom keys.");
+
+static PyTypeObject cmpwrapper_type = {
+	PyObject_HEAD_INIT(&PyType_Type)
+	0,					/* ob_size */
+	"cmpwrapper",				/* tp_name */
+	sizeof(cmpwrapperobject),		/* tp_basicsize */
+	0,					/* tp_itemsize */
+	/* methods */
+	(destructor)cmpwrapper_dealloc,		/* tp_dealloc */
+	0,					/* tp_print */
+	0,					/* tp_getattr */
+	0,					/* tp_setattr */
+	0,					/* tp_compare */
+	0,					/* tp_repr */
+	0,					/* tp_as_number */
+	0,					/* tp_as_sequence */
+	0,					/* tp_as_mapping */
+	0,					/* tp_hash */
+	(ternaryfunc)cmpwrapper_call,		/* tp_call */
+	0,					/* tp_str */
+	PyObject_GenericGetAttr,		/* tp_getattro */
+	0,					/* tp_setattro */
+	0,					/* tp_as_buffer */
+	Py_TPFLAGS_DEFAULT,			/* tp_flags */
+	cmpwrapper_doc,				/* tp_doc */
+};
+
+static PyObject *
+build_cmpwrapper(PyObject *cmpfunc)
+{
+	cmpwrapperobject *co;
+	
+	co = PyObject_New(cmpwrapperobject, &cmpwrapper_type);
+	if (co == NULL)
+		return NULL;
+	Py_INCREF(cmpfunc);
+	co->func = cmpfunc;
+	return (PyObject *)co;
+}
+
 /* An adaptive, stable, natural mergesort.  See listsort.txt.
  * Returns Py_None on success, NULL on error.  Even in case of error, the
  * list will be some permutation of its input state (nothing is lost or
  * duplicated).
  */
 static PyObject *
-listsort(PyListObject *self, PyObject *args)
+listsort(PyListObject *self, PyObject *args, PyObject *kwds)
 {
 	MergeState ms;
 	PyObject **lo, **hi;
@@ -1673,14 +1846,48 @@ listsort(PyListObject *self, PyObject *args)
 	PyObject **empty_ob_item;
 	PyObject *compare = NULL;
 	PyObject *result = NULL;	/* guilty until proved innocent */
+	int reverse = 0;
+	PyObject *keyfunc = NULL;
+	int i, n;
+	PyObject *key, *value, *kvpair;
+	static char *kwlist[] = {"cmp", "key", "reverse", 0};
 
 	assert(self != NULL);
+	assert (PyList_Check(self));
 	if (args != NULL) {
-		if (!PyArg_UnpackTuple(args, "sort", 0, 1, &compare))
+		if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOi:sort",
+			kwlist, &compare, &keyfunc, &reverse))
 			return NULL;
 	}
 	if (compare == Py_None)
 		compare = NULL;
+	if (keyfunc == Py_None)
+		keyfunc = NULL;
+	if (compare != NULL && keyfunc != NULL) {
+		compare = build_cmpwrapper(compare);
+		if (compare == NULL)
+			goto dsu_fail;
+	} else
+		Py_XINCREF(compare);
+
+	if (keyfunc != NULL) {
+		n = PyList_GET_SIZE(self);
+		for (i=0 ; i<n ; i++) {
+			value = PyList_GET_ITEM(self, i);
+			key = PyObject_CallFunctionObjArgs(keyfunc, value, NULL);
+			if (key == NULL)
+				goto dsu_fail;
+			kvpair = build_sortwrapper(key, value);
+			if (kvpair == NULL)
+				goto dsu_fail;
+			PyList_SET_ITEM(self, i, kvpair);
+		}
+	}
+
+	/* Reverse sort stability achieved by initialially reversing the list,
+	applying a stable forward sort, then reversing the final result. */
+	if (reverse && self->ob_size > 1)
+		reverse_slice(self->ob_item, self->ob_item + self->ob_size);
 
 	merge_init(&ms, compare);
 
@@ -1758,6 +1965,21 @@ fail:
 	self->ob_size = saved_ob_size;
 	self->ob_item = saved_ob_item;
 	merge_freemem(&ms);
+
+	if (keyfunc != NULL) {
+		for (i=0 ; i<n ; i++) {
+			kvpair = PyList_GET_ITEM(self, i);
+			value = sortwrapper_getvalue(kvpair);
+			PyList_SET_ITEM(self, i, value);
+			Py_DECREF(kvpair);
+		}
+	}
+
+	if (reverse && self->ob_size > 1)
+		reverse_slice(self->ob_item, self->ob_item + self->ob_size);
+
+dsu_fail:
+	Py_XDECREF(compare);
 	Py_XINCREF(result);
 	return result;
 }
@@ -1771,7 +1993,7 @@ PyList_Sort(PyObject *v)
 		PyErr_BadInternalCall();
 		return -1;
 	}
-	v = listsort((PyListObject *)v, (PyObject *)NULL);
+	v = listsort((PyListObject *)v, (PyObject *)NULL, (PyObject *)NULL);
 	if (v == NULL)
 		return -1;
 	Py_DECREF(v);
@@ -2111,7 +2333,8 @@ PyDoc_STRVAR(count_doc,
 PyDoc_STRVAR(reverse_doc,
 "L.reverse() -- reverse *IN PLACE*");
 PyDoc_STRVAR(sort_doc,
-"L.sort(cmpfunc=None) -- stable sort *IN PLACE*; cmpfunc(x, y) -> -1, 0, 1");
+"L.sort(cmp=None, key=None, reverse=False) -- stable sort *IN PLACE*;\n\
+cmp(x, y) -> -1, 0, 1");
 
 static PyMethodDef list_methods[] = {
 	{"append",	(PyCFunction)listappend,  METH_O, append_doc},
@@ -2122,7 +2345,7 @@ static PyMethodDef list_methods[] = {
 	{"index",	(PyCFunction)listindex,   METH_VARARGS, index_doc},
 	{"count",	(PyCFunction)listcount,   METH_O, count_doc},
 	{"reverse",	(PyCFunction)listreverse, METH_NOARGS, reverse_doc},
-	{"sort",	(PyCFunction)listsort, 	  METH_VARARGS, sort_doc},
+	{"sort",	(PyCFunction)listsort, 	  METH_VARARGS | METH_KEYWORDS, sort_doc},
  	{NULL,		NULL}		/* sentinel */
 };
author	Raymond Hettinger <python@rcn.com>	2003-10-16 03:41:09 (GMT)
committer	Raymond Hettinger <python@rcn.com>	2003-10-16 03:41:09 (GMT)
commit	42b1ba31aff86af6257a0fca455d5569bce9d8fc (patch)
tree	0497ccd614d5ed8a4cfbb2bce4362f61faf0aeb1
parent	90f7d254a9ae20d6d783138eb8567f39e6ff7e04 (diff)
download	cpython-42b1ba31aff86af6257a0fca455d5569bce9d8fc.zip cpython-42b1ba31aff86af6257a0fca455d5569bce9d8fc.tar.gz cpython-42b1ba31aff86af6257a0fca455d5569bce9d8fc.tar.bz2