From 7e9fde3466a3436d634d966a64f73d900e274d88 Mon Sep 17 00:00:00 2001 From: dkf Date: Mon, 29 Sep 2008 13:32:55 +0000 Subject: TIP #326 IMPLEMENTATION --- ChangeLog | 6 +++ doc/lsort.n | 60 ++++++++++++++++++++++--- generic/tclCmdIL.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++------ tests/cmdIL.test | 24 ++++++++-- 4 files changed, 196 insertions(+), 22 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2eed027..4a1c197 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,11 @@ 2008-09-29 Donal K. Fellows + TIP #326 IMPLEMENTATION + + * generic/tclCmdIL.c (Tcl_LsortObjCmd): Added -stride option to carry + * doc/lsort.n, tests/cmdIL.test: out sorting of lists where the + elements are grouped. Adapted from [Patch 2082681] + TIP #313 IMPLEMENTATION * generic/tclCmdIL.c (Tcl_LsearchObjCmd): Added -bisect option to diff --git a/doc/lsort.n b/doc/lsort.n index ec80885..6253a5e 100644 --- a/doc/lsort.n +++ b/doc/lsort.n @@ -7,7 +7,7 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" RCS: @(#) $Id: lsort.n,v 1.30 2008/06/29 22:28:24 dkf Exp $ +'\" RCS: @(#) $Id: lsort.n,v 1.31 2008/09/29 13:33:18 dkf Exp $ '\" .so man.macros .TH lsort n 8.5 Tcl "Tcl Built-In Commands" @@ -81,11 +81,11 @@ the values themselves. \fB\-index\0\fIindexList\fR . If this option is specified, each of the elements of \fIlist\fR must -itself be a proper Tcl sublist. Instead of sorting based on whole -sublists, \fBlsort\fR will extract the \fIindexList\fR'th element from -each sublist -(as if the overall element and the \fIindexList\fR were passed to -\fBlindex\fR) and sort based on the given element. +itself be a proper Tcl sublist (unless \fB-stride\fR is used). +Instead of sorting based on whole sublists, \fBlsort\fR will extract +the \fIindexList\fR'th element from each sublist (as if the overall +element and the \fIindexList\fR were passed to \fBlindex\fR) and sort +based on the given element. For example, .RS .CS @@ -115,6 +115,33 @@ This option is much more efficient than using \fB\-command\fR to achieve the same effect. .RE .TP 20 +\fB\-stride\0\fIstrideLength\fR +. +If this option is specified, the list is treated as consisting of +groups of \fIstrideLength\fR elements and the groups are sorted by +either their first element or, if the \fB\-index\fR option is used, +by the element within each group given by the first index passed to +\fB\-index\fR (which is then ignored by \fB\-index\fR). Elements +always remain in the same position within their group. +.RS +.PP +The list length must be an integer multiple of \fIstrideLength\fR, which +in turn must be at least 2. +.PP +For example, +.CS +lsort \-stride 2 {carrot 10 apple 50 banana 25} +.CE +returns +.QW "apple 50 banana 25 carrot 10" , +and +.CS +lsort \-stride 2 \-index 1 \-integer {carrot 10 apple 50 banana 25} +.CE +returns +.QW "carrot 10 banana 25 apple 50" . +.RE +.TP 20 \fB\-nocase\fR . Causes comparisons to be handled in a case-insensitive manner. Has no @@ -180,6 +207,24 @@ Sorting using indices: {e 1} {d 2} { c 3} {b 4} {a 5} .CE .PP +.VS 8.6 +Sorting a dictionary: +.CS +% set d [dict create c d a b h i f g c e] +c e a b h i f g +% \fBlsort\fR -stride 2 $d +a b c e f g h i +.CE +.PP +Sorting using striding and multiple indices: +.CS +% # Note the first index value is relative to the group +% \fBlsort\fR \-stride 3 \-index {0 1} \e + {{Bob Smith} 25 Audi {Jane Doe} 40 Ford} +{{Jane Doe} 40 Ford {Bob Smith} 25 Audi} +.CE +.VE 8.6 +.PP Stripping duplicate values using sorting: .CS % \fBlsort\fR -unique {a b c a b c a b c} @@ -207,3 +252,6 @@ list(n), lappend(n), lindex(n), linsert(n), llength(n), lsearch(n), lset(n), lrange(n), lreplace(n) .SH KEYWORDS element, list, order, sort +'\" Local Variables: +'\" mode: nroff +'\" End: diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index a66eb0a..4041512 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -16,7 +16,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdIL.c,v 1.156 2008/09/29 12:25:20 dkf Exp $ + * RCS: @(#) $Id: tclCmdIL.c,v 1.157 2008/09/29 13:33:16 dkf Exp $ */ #include "tclInt.h" @@ -3518,18 +3518,20 @@ Tcl_LsortObjCmd( Tcl_Obj *const objv[]) /* Argument values. */ { int i, j, index, unique, indices, length, nocase = 0, sortMode, indexc; + int group, groupSize, groupOffset, idx; Tcl_Obj *resultPtr, *cmdPtr, **listObjPtrs, *listObj, *indexPtr; SortElement *elementArray, *elementPtr; SortInfo sortInfo; /* Information about this sort that needs to * be passed to the comparison function. */ static const char *switches[] = { "-ascii", "-command", "-decreasing", "-dictionary", "-increasing", - "-index", "-indices", "-integer", "-nocase", "-real", "-unique", NULL + "-index", "-indices", "-integer", "-nocase", "-real", "-stride", + "-unique", NULL }; enum Lsort_Switches { LSORT_ASCII, LSORT_COMMAND, LSORT_DECREASING, LSORT_DICTIONARY, LSORT_INCREASING, LSORT_INDEX, LSORT_INDICES, LSORT_INTEGER, - LSORT_NOCASE, LSORT_REAL, LSORT_UNIQUE + LSORT_NOCASE, LSORT_REAL, LSORT_STRIDE, LSORT_UNIQUE }; /* @@ -3558,6 +3560,9 @@ Tcl_LsortObjCmd( cmdPtr = NULL; unique = 0; indices = 0; + group = 0; + groupSize = 1; + groupOffset = 0; for (i = 1; i < objc-1; i++) { if (Tcl_GetIndexFromObj(interp, objv[i], switches, "option", 0, &index) != TCL_OK) { @@ -3658,6 +3663,33 @@ Tcl_LsortObjCmd( case LSORT_INDICES: indices = 1; break; + case LSORT_STRIDE: + if (i == (objc-2)) { + if (sortInfo.indexc > 1) { + ckfree((char *) sortInfo.indexv); + } + Tcl_AppendResult(interp, + "\"-stride\" option must be followed by stride length", + NULL); + return TCL_ERROR; + } + if (Tcl_GetIntFromObj(interp, objv[i+1], &groupSize) != TCL_OK) { + if (sortInfo.indexc > 1) { + ckfree((char *) sortInfo.indexv); + } + return TCL_ERROR; + } + if (groupSize < 2) { + if (sortInfo.indexc > 1) { + ckfree((char *) sortInfo.indexv); + } + Tcl_AppendResult(interp, "stride length must be at least 2", + NULL); + return TCL_ERROR; + } + group = 1; + i++; + break; } } if (nocase && (sortInfo.sortMode == SORTMODE_ASCII)) { @@ -3712,6 +3744,55 @@ Tcl_LsortObjCmd( if (sortInfo.resultCode != TCL_OK || length <= 0) { goto done; } + + /* + * Check for sanity when grouping elements of the overall list together + * because of the -stride option. [TIP #326] + */ + + if (group) { + if (length % groupSize) { + Tcl_AppendResult(interp, + "list size must be a multiple of the stride length", + NULL); + sortInfo.resultCode = TCL_ERROR; + goto done; + } + length = length / groupSize; + if (sortInfo.indexc > 0) { + /* + * Use the first value in the list supplied to -index as the + * offset of the element within each group by which to sort. + */ + + groupOffset = sortInfo.indexv[0]; + if (groupOffset <= SORTIDX_END) { + groupOffset = (groupOffset - SORTIDX_END) + groupSize - 1; + } + if (groupOffset < 0 || groupOffset >= groupSize) { + Tcl_AppendResult(interp, "when used with \"-stride\", the " + "leading \"-index\" value must be within the group", + NULL); + sortInfo.resultCode = TCL_ERROR; + goto done; + } + if (sortInfo.indexc == 1) { + sortInfo.indexc = 0; + sortInfo.indexv = NULL; + } else { + int *new_indexv; + + sortInfo.indexc--; + new_indexv = (int *) ckalloc(sizeof(int) * sortInfo.indexc); + for (i = 0; i < sortInfo.indexc; i++) { + new_indexv[i] = sortInfo.indexv[i+1]; + } + ckfree((char *) sortInfo.indexv); + sortInfo.indexv = new_indexv; + } + } + } + sortInfo.numElements = length; indexc = sortInfo.indexc; @@ -3743,16 +3824,17 @@ Tcl_LsortObjCmd( elementArray = (SortElement *) ckalloc( length * sizeof(SortElement)); for (i=0; i < length; i++){ + idx = groupSize * i + groupOffset; if (indexc) { /* * If this is an indexed sort, retrieve the corresponding element */ - indexPtr = SelectObjFromSublist(listObjPtrs[i], &sortInfo); + indexPtr = SelectObjFromSublist(listObjPtrs[idx], &sortInfo); if (sortInfo.resultCode != TCL_OK) { goto done1; } } else { - indexPtr = listObjPtrs[i]; + indexPtr = listObjPtrs[idx]; } /* @@ -3763,6 +3845,7 @@ Tcl_LsortObjCmd( elementArray[i].index.strValuePtr = TclGetString(indexPtr); } else if (sortMode == SORTMODE_INTEGER) { long a; + if (TclGetLongFromObj(sortInfo.interp, indexPtr, &a) != TCL_OK) { sortInfo.resultCode = TCL_ERROR; goto done1; @@ -3770,6 +3853,7 @@ Tcl_LsortObjCmd( elementArray[i].index.intValue = a; } else if (sortInfo.sortMode == SORTMODE_REAL) { double a; + if (Tcl_GetDoubleFromObj(sortInfo.interp, indexPtr, &a) != TCL_OK) { sortInfo.resultCode = TCL_ERROR; goto done1; @@ -3784,7 +3868,11 @@ Tcl_LsortObjCmd( * the objPtr itself, or its index in the original list. */ - elementArray[i].objPtr = (indices ? INT2PTR(i) : listObjPtrs[i]); + if (indices || group) { + elementArray[i].objPtr = INT2PTR(idx); + } else { + elementArray[i].objPtr = listObjPtrs[idx]; + } /* * Merge this element in the pre-existing sublists (and merge together @@ -3812,7 +3900,6 @@ Tcl_LsortObjCmd( elementPtr = MergeLists(subList[j], elementPtr, &sortInfo); } - /* * Now store the sorted elements in the result list. */ @@ -3822,17 +3909,32 @@ Tcl_LsortObjCmd( Tcl_Obj **newArray, *objPtr; int i; - resultPtr = Tcl_NewListObj(sortInfo.numElements, NULL); - listRepPtr = (List *) resultPtr->internalRep.twoPtrValue.ptr1; + resultPtr = Tcl_NewListObj(sortInfo.numElements * groupSize, NULL); + listRepPtr = resultPtr->internalRep.twoPtrValue.ptr1; newArray = &listRepPtr->elements; - if (indices) { - for (i = 0; elementPtr != NULL ; elementPtr = elementPtr->nextPtr){ + if (group) { + for (i=0; elementPtr!=NULL ; elementPtr=elementPtr->nextPtr) { + idx = PTR2INT(elementPtr->objPtr); + for (j = 0; j < groupSize; j++) { + if (indices) { + objPtr = Tcl_NewIntObj(idx + j - groupOffset); + newArray[i++] = objPtr; + Tcl_IncrRefCount(objPtr); + } else { + objPtr = listObjPtrs[idx + j - groupOffset]; + newArray[i++] = objPtr; + Tcl_IncrRefCount(objPtr); + } + } + } + } else if (indices) { + for (i=0; elementPtr != NULL ; elementPtr = elementPtr->nextPtr) { objPtr = Tcl_NewIntObj(PTR2INT(elementPtr->objPtr)); newArray[i++] = objPtr; Tcl_IncrRefCount(objPtr); } } else { - for (i = 0; elementPtr != NULL ; elementPtr = elementPtr->nextPtr){ + for (i=0; elementPtr != NULL ; elementPtr = elementPtr->nextPtr) { objPtr = elementPtr->objPtr; newArray[i++] = objPtr; Tcl_IncrRefCount(objPtr); @@ -3843,7 +3945,7 @@ Tcl_LsortObjCmd( } done1: - ckfree((char *)elementArray); + ckfree((char *) elementArray); done: if (sortInfo.sortMode == SORTMODE_COMMAND) { diff --git a/tests/cmdIL.test b/tests/cmdIL.test index 01fc09e..17b0d9e 100644 --- a/tests/cmdIL.test +++ b/tests/cmdIL.test @@ -8,7 +8,7 @@ # See the file "license.terms" for information on usage and redistribution of # this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: cmdIL.test,v 1.41 2008/09/26 19:12:42 dgp Exp $ +# RCS: @(#) $Id: cmdIL.test,v 1.42 2008/09/29 13:33:17 dkf Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest 2 @@ -24,7 +24,7 @@ test cmdIL-1.1 {Tcl_LsortObjCmd procedure} -returnCodes error -body { } -result {wrong # args: should be "lsort ?-option value ...? list"} test cmdIL-1.2 {Tcl_LsortObjCmd procedure} -returnCodes error -body { lsort -foo {1 3 2 5} -} -result {bad option "-foo": must be -ascii, -command, -decreasing, -dictionary, -increasing, -index, -indices, -integer, -nocase, -real, or -unique} +} -result {bad option "-foo": must be -ascii, -command, -decreasing, -dictionary, -increasing, -index, -indices, -integer, -nocase, -real, -stride, or -unique} test cmdIL-1.3 {Tcl_LsortObjCmd procedure, default options} { lsort {d e c b a \{ d35 d300} } {a b c d d300 d35 e \{} @@ -122,6 +122,24 @@ test cmdIL-1.29 {Tcl_LsortObjCmd procedure, loss of list rep during sorting} { set l {1 2 3} string length [lsort -command {apply {args {string length $::l}}} $l] } 5 +test cmdIL-1.30 {Tcl_LsortObjCmd procedure, -stride option} { + lsort -stride 2 {f e d c b a} +} {b a d c f e} +test cmdIL-1.31 {Tcl_LsortObjCmd procedure, -stride option} { + lsort -stride 3 {f e d c b a} +} {c b a f e d} +test cmdIL-1.32 {lsort -stride errors} -returnCodes error -body { + lsort -stride foo bar +} -result {expected integer but got "foo"} +test cmdIL-1.33 {lsort -stride errors} -returnCodes error -body { + lsort -stride 1 bar +} -result {stride length must be at least 2} +test cmdIL-1.34 {lsort -stride errors} -returnCodes error -body { + lsort -stride 2 {a b c} +} -result {list size must be a multiple of the stride length} +test cmdIL-1.35 {lsort -stride errors} -returnCodes error -body { + lsort -stride 2 -index 3 {a b c d} +} -result {when used with "-stride", the leading "-index" value must be within the group} # Can't think of any good tests for the MergeSort and MergeLists procedures, # except a bunch of random lists to sort. @@ -149,7 +167,7 @@ test cmdIL-2.1 {MergeSort and MergeLists procedures} -setup { set old $el } } - set result + string trim $result } -cleanup { rename rand "" } -result {} -- cgit v0.12