From 42b1ba31aff86af6257a0fca455d5569bce9d8fc Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 16 Oct 2003 03:41:09 +0000 Subject: * list.sort() now supports three keyword arguments: cmp, key, and reverse. key provides C support for the decorate-sort-undecorate pattern. reverse provide a stable sort of the list with the comparisions reversed. * Amended the docs to guarantee sort stability. --- Doc/lib/libstdtypes.tex | 64 ++++++------- Lib/test/test_sort.py | 146 ++++++++++++++++++++---------- Misc/NEWS | 9 ++ Objects/listobject.c | 233 ++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 366 insertions(+), 86 deletions(-) diff --git a/Doc/lib/libstdtypes.tex b/Doc/lib/libstdtypes.tex index c3a0305..62f1644 100644 --- a/Doc/lib/libstdtypes.tex +++ b/Doc/lib/libstdtypes.tex @@ -970,7 +970,8 @@ The following operations are defined on mutable sequence types (where {same as \code{del \var{s}[\var{s}.index(\var{x})]}}{(4)} \lineiii{\var{s}.reverse()} {reverses the items of \var{s} in place}{(7)} - \lineiii{\var{s}.sort(\optional{\var{cmpfunc=None}})} + \lineiii{\var{s}.sort(\optional{\var{cmp}=None\optional{, \var{key}=None + \optional{, \var{reverse}=False}}})} {sort the items of \var{s} in place}{(7), (8), (9), (10)} \end{tableiii} \indexiv{operations on}{mutable}{sequence}{types} @@ -1021,47 +1022,38 @@ Notes: list. To remind you that they operate by side effect, they don't return the sorted or reversed list. -\item[(8)] The \method{sort()} method takes an optional argument - specifying a comparison function of two arguments (list items) which - should return a negative, zero or positive number depending on whether - the first argument is considered smaller than, equal to, or larger - than the second argument. Note that this slows the sorting process - down considerably; for example to sort a list in reverse order it is much - faster to call \method{sort()} followed by \method{reverse()} - than to use \method{sort()} with a comparison function that - reverses the ordering of the elements. Passing \constant{None} as the - comparison function is semantically equivalent to calling - \method{sort()} with no comparison function. - \versionchanged[Support for \code{None} as an equivalent to omitting - \var{cmpfunc} was added]{2.3} +\item[(8)] The \method{sort()} method takes optional arguments for + controlling the comparisions. - As an example of using the \var{cmpfunc} argument to the - \method{sort()} method, consider sorting a list of sequences by the - second element of that list: + \var{cmp} specifies a custom comparison function of two arguments + (list items) which should return a negative, zero or positive number + depending on whether the first argument is considered smaller than, + equal to, or larger than the second argument: + \samp{\var{cmp}=\keyword{lambda} \var{x},\var{y}: + \function{cmp}(x.lower(), y.lower())} + + \var{key} specifies a function of one argument that is used to + extract a comparison key from each list element: + \samp{\var{cmp}=\function{str.lower}} -\begin{verbatim} -def mycmp(a, b): - return cmp(a[1], b[1]) + \var{reverse} is a boolean value. If set to \code{True}, then the + list elements are sorted as if each comparison were reversed. -mylist.sort(mycmp) -\end{verbatim} + In general, the \var{key} and \var{reverse} conversion processes are + much faster than specifying an equivalent \var{cmp} function. This is + because \var{cmp} is called multiple times for each list element while + \var{key} and \{reverse} touch each element only once. - A more time-efficient approach for reasonably-sized data structures can - often be used: + \versionchanged[Support for \code{None} as an equivalent to omitting + \var{cmpfunc} was added]{2.3} -\begin{verbatim} -tmplist = [(x[1], x) for x in mylist] -tmplist.sort() -mylist = [x for (key, x) in tmplist] -\end{verbatim} + \versionadded[Support for \var{key} and \var{reverse} was added]{2.4} -\item[(9)] Whether the \method{sort()} method is stable is not defined by - the language (a sort is stable if it guarantees not to change the - relative order of elements that compare equal). In the C - implementation of Python, sorts were stable only by accident through - Python 2.2. The C implementation of Python 2.3 introduced a stable - \method{sort()} method, but code that intends to be portable across - implementations and versions must not rely on stability. +\item[(9)] Starting with Python 2.3, the \method{sort()} method is + guaranteed to be stable. A sort is stable if it guarantees not to + change the relative order of elements that compare equal --- this is + helpful for sorting in multiple passes (for example, sort by + department, then by salary grade). \item[(10)] While a list is being sorted, the effect of attempting to mutate, or even inspect, the list is undefined. The C implementation diff --git a/Lib/test/test_sort.py b/Lib/test/test_sort.py index 6c35f42..ca6fc801 100644 --- a/Lib/test/test_sort.py +++ b/Lib/test/test_sort.py @@ -116,56 +116,112 @@ for n in sizes: x = [e for e, i in augmented] # a stable sort of s check("stability", x, s) -def bug453523(): - global nerrors - from random import random - # If this fails, the most likely outcome is a core dump. - if verbose: - print "Testing bug 453523 -- list.sort() crasher." - - class C: - def __lt__(self, other): - if L and random() < 0.75: - pop() - else: - push(3) - return random() < 0.5 - - L = [C() for i in range(50)] - pop = L.pop - push = L.append - try: - L.sort() - except ValueError: - pass - else: - print " Mutation during list.sort() wasn't caught." - nerrors += 1 +import unittest +from test import test_support +import sys -bug453523() +#============================================================================== -def cmpNone(): - global nerrors +class TestBugs(unittest.TestCase): - if verbose: - print "Testing None as a comparison function." + def test_bug453523(self): + # bug 453523 -- list.sort() crasher. + # If this fails, the most likely outcome is a core dump. + # Mutations during a list sort should raise a ValueError. - L = range(50) - random.shuffle(L) - try: + class C: + def __lt__(self, other): + if L and random.random() < 0.75: + L.pop() + else: + L.append(3) + return random.random() < 0.5 + + L = [C() for i in range(50)] + self.assertRaises(ValueError, L.sort) + + def test_cmpNone(self): + # Testing None as a comparison function. + + L = range(50) + random.shuffle(L) L.sort(None) - except TypeError: - print " Passing None as cmpfunc failed." - nerrors += 1 - else: - if L != range(50): - print " Passing None as cmpfunc failed." - nerrors += 1 + self.assertEqual(L, range(50)) + +#============================================================================== + +class TestDecorateSortUndecorate(unittest.TestCase): + + def test_decorated(self): + data = 'The quick Brown fox Jumped over The lazy Dog'.split() + copy = data[:] + random.shuffle(data) + data.sort(key=str.lower) + copy.sort(cmp=lambda x,y: cmp(x.lower(), y.lower())) + + def test_baddecorator(self): + data = 'The quick Brown fox Jumped over The lazy Dog'.split() + self.assertRaises(TypeError, data.sort, None, lambda x,y: 0) + + def test_stability(self): + data = [(random.randrange(100), i) for i in xrange(200)] + copy = data[:] + data.sort(key=lambda (x,y): x) # sort on the random first field + copy.sort() # sort using both fields + self.assertEqual(data, copy) # should get the same result + + def test_cmp_and_key_combination(self): + # Verify that the wrapper has been removed + def compare(x, y): + self.assertEqual(type(x), str) + self.assertEqual(type(x), str) + return cmp(x, y) + data = 'The quick Brown fox Jumped over The lazy Dog'.split() + data.sort(cmp=compare, key=str.lower) + + def test_badcmp_with_key(self): + # Verify that the wrapper has been removed + data = 'The quick Brown fox Jumped over The lazy Dog'.split() + self.assertRaises(TypeError, data.sort, "bad", str.lower) + + def test_reverse(self): + data = range(100) + random.shuffle(data) + data.sort(reverse=True) + self.assertEqual(data, range(99,-1,-1)) + + def test_reverse_stability(self): + data = [(random.randrange(100), i) for i in xrange(200)] + copy1 = data[:] + copy2 = data[:] + data.sort(cmp=lambda x,y: cmp(x[0],y[0]), reverse=True) + copy1.sort(cmp=lambda x,y: cmp(y[0],x[0])) + self.assertEqual(data, copy1) + copy2.sort(key=lambda x: x[0], reverse=True) + self.assertEqual(data, copy2) + +#============================================================================== + +def test_main(verbose=None): + test_classes = ( + TestDecorateSortUndecorate, + TestBugs, + ) + + test_support.run_unittest(*test_classes) + + # verify reference counting + if verbose and hasattr(sys, "gettotalrefcount"): + import gc + counts = [None] * 5 + for i in xrange(len(counts)): + test_support.run_unittest(*test_classes) + gc.collect() + counts[i] = sys.gettotalrefcount() + print counts + +if __name__ == "__main__": + test_main(verbose=True) -cmpNone() -if nerrors: - print "Test failed", nerrors -elif verbose: - print "Test passed -- no errors." diff --git a/Misc/NEWS b/Misc/NEWS index eb57e68..02f541f 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,15 @@ What's New in Python 2.4 alpha 1? Core and builtins ----------------- +- list.sort() now supports three keyword arguments: cmp, key, and reverse. + The key argument can be a function of one argument that extracts a + comparison key from the original record: mylist.sort(key=str.lower). + The reverse argument is a boolean value and if True will change the + sort order as if the comparison arguments were reversed. In addition, + the documentation has been amended to provide a guarantee that all sorts + starting with Py2.3 are guaranteed to be stable (the relative order of + records with equal keys is unchanged). + - Added test whether wchar_t is signed or not. A signed wchar_t is not usable as internal unicode type base for Py_UNICODE since the unicode implementation assumes an unsigned type. diff --git a/Objects/listobject.c b/Objects/listobject.c index 727c9e6..2edbedf 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1656,13 +1656,186 @@ merge_compute_minrun(int n) return n + r; } +/* Special wrapper to support stable sorting using the decorate-sort-undecorate + pattern. Holds a key which is used for comparisions and the original record + which is returned during the undecorate phase. By exposing only the key + during comparisons, the underlying sort stability characteristics are left + unchanged. Also, if a custom comparison function is used, it will only see + the key instead of a full record. */ + +typedef struct { + PyObject_HEAD + PyObject *key; + PyObject *value; +} sortwrapperobject; + +static PyTypeObject sortwrapper_type; + +static PyObject * +sortwrapper_richcompare(sortwrapperobject *a, sortwrapperobject *b, int op) +{ + if (!PyObject_TypeCheck(b, &sortwrapper_type)) { + PyErr_SetString(PyExc_TypeError, + "expected a sortwrapperobject"); + return NULL; + } + return PyObject_RichCompare(a->key, b->key, op); +} + +static void +sortwrapper_dealloc(sortwrapperobject *so) +{ + Py_XDECREF(so->key); + Py_XDECREF(so->value); + PyObject_Del(so); +} + +PyDoc_STRVAR(sortwrapper_doc, "Object wrapper with a custom sort key."); + +static PyTypeObject sortwrapper_type = { + PyObject_HEAD_INIT(&PyType_Type) + 0, /* ob_size */ + "sortwrapper", /* tp_name */ + sizeof(sortwrapperobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)sortwrapper_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | + Py_TPFLAGS_HAVE_RICHCOMPARE, /* tp_flags */ + sortwrapper_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + (richcmpfunc)sortwrapper_richcompare, /* tp_richcompare */ +}; + +/* Returns a new reference to a sortwrapper. + Consumes the references to the two underlying objects. */ + +static PyObject * +build_sortwrapper(PyObject *key, PyObject *value) +{ + sortwrapperobject *so; + + so = PyObject_New(sortwrapperobject, &sortwrapper_type); + if (so == NULL) + return NULL; + so->key = key; + so->value = value; + return (PyObject *)so; +} + +/* Returns a new reference to the value underlying the wrapper. */ +static PyObject * +sortwrapper_getvalue(PyObject *so) +{ + PyObject *value; + + if (!PyObject_TypeCheck(so, &sortwrapper_type)) { + PyErr_SetString(PyExc_TypeError, + "expected a sortwrapperobject"); + return NULL; + } + value = ((sortwrapperobject *)so)->value; + Py_INCREF(value); + return value; +} + +/* Wrapper for user specified cmp functions in combination with a + specified key function. Makes sure the cmp function is presented + with the actual key instead of the sortwrapper */ + +typedef struct { + PyObject_HEAD + PyObject *func; +} cmpwrapperobject; + +static void +cmpwrapper_dealloc(cmpwrapperobject *co) +{ + Py_XDECREF(co->func); + PyObject_Del(co); +} + +static PyObject * +cmpwrapper_call(cmpwrapperobject *co, PyObject *args, PyObject *kwds) +{ + PyObject *x, *y, *xx, *yy; + + if (!PyArg_UnpackTuple(args, "", 2, 2, &x, &y)) + return NULL; + if (!PyObject_TypeCheck(x, &sortwrapper_type) || + !PyObject_TypeCheck(x, &sortwrapper_type)) { + PyErr_SetString(PyExc_TypeError, + "expected a sortwrapperobject"); + return NULL; + } + xx = ((sortwrapperobject *)x)->key; + yy = ((sortwrapperobject *)y)->key; + return PyObject_CallFunctionObjArgs(co->func, xx, yy, NULL); +} + +PyDoc_STRVAR(cmpwrapper_doc, "cmp() wrapper for sort with custom keys."); + +static PyTypeObject cmpwrapper_type = { + PyObject_HEAD_INIT(&PyType_Type) + 0, /* ob_size */ + "cmpwrapper", /* tp_name */ + sizeof(cmpwrapperobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)cmpwrapper_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + (ternaryfunc)cmpwrapper_call, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + cmpwrapper_doc, /* tp_doc */ +}; + +static PyObject * +build_cmpwrapper(PyObject *cmpfunc) +{ + cmpwrapperobject *co; + + co = PyObject_New(cmpwrapperobject, &cmpwrapper_type); + if (co == NULL) + return NULL; + Py_INCREF(cmpfunc); + co->func = cmpfunc; + return (PyObject *)co; +} + /* An adaptive, stable, natural mergesort. See listsort.txt. * Returns Py_None on success, NULL on error. Even in case of error, the * list will be some permutation of its input state (nothing is lost or * duplicated). */ static PyObject * -listsort(PyListObject *self, PyObject *args) +listsort(PyListObject *self, PyObject *args, PyObject *kwds) { MergeState ms; PyObject **lo, **hi; @@ -1673,14 +1846,48 @@ listsort(PyListObject *self, PyObject *args) PyObject **empty_ob_item; PyObject *compare = NULL; PyObject *result = NULL; /* guilty until proved innocent */ + int reverse = 0; + PyObject *keyfunc = NULL; + int i, n; + PyObject *key, *value, *kvpair; + static char *kwlist[] = {"cmp", "key", "reverse", 0}; assert(self != NULL); + assert (PyList_Check(self)); if (args != NULL) { - if (!PyArg_UnpackTuple(args, "sort", 0, 1, &compare)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOi:sort", + kwlist, &compare, &keyfunc, &reverse)) return NULL; } if (compare == Py_None) compare = NULL; + if (keyfunc == Py_None) + keyfunc = NULL; + if (compare != NULL && keyfunc != NULL) { + compare = build_cmpwrapper(compare); + if (compare == NULL) + goto dsu_fail; + } else + Py_XINCREF(compare); + + if (keyfunc != NULL) { + n = PyList_GET_SIZE(self); + for (i=0 ; iob_size > 1) + reverse_slice(self->ob_item, self->ob_item + self->ob_size); merge_init(&ms, compare); @@ -1758,6 +1965,21 @@ fail: self->ob_size = saved_ob_size; self->ob_item = saved_ob_item; merge_freemem(&ms); + + if (keyfunc != NULL) { + for (i=0 ; iob_size > 1) + reverse_slice(self->ob_item, self->ob_item + self->ob_size); + +dsu_fail: + Py_XDECREF(compare); Py_XINCREF(result); return result; } @@ -1771,7 +1993,7 @@ PyList_Sort(PyObject *v) PyErr_BadInternalCall(); return -1; } - v = listsort((PyListObject *)v, (PyObject *)NULL); + v = listsort((PyListObject *)v, (PyObject *)NULL, (PyObject *)NULL); if (v == NULL) return -1; Py_DECREF(v); @@ -2111,7 +2333,8 @@ PyDoc_STRVAR(count_doc, PyDoc_STRVAR(reverse_doc, "L.reverse() -- reverse *IN PLACE*"); PyDoc_STRVAR(sort_doc, -"L.sort(cmpfunc=None) -- stable sort *IN PLACE*; cmpfunc(x, y) -> -1, 0, 1"); +"L.sort(cmp=None, key=None, reverse=False) -- stable sort *IN PLACE*;\n\ +cmp(x, y) -> -1, 0, 1"); static PyMethodDef list_methods[] = { {"append", (PyCFunction)listappend, METH_O, append_doc}, @@ -2122,7 +2345,7 @@ static PyMethodDef list_methods[] = { {"index", (PyCFunction)listindex, METH_VARARGS, index_doc}, {"count", (PyCFunction)listcount, METH_O, count_doc}, {"reverse", (PyCFunction)listreverse, METH_NOARGS, reverse_doc}, - {"sort", (PyCFunction)listsort, METH_VARARGS, sort_doc}, + {"sort", (PyCFunction)listsort, METH_VARARGS | METH_KEYWORDS, sort_doc}, {NULL, NULL} /* sentinel */ }; -- cgit v0.12