From da82965bf2c1b65b3e1bb8e1f82e944317f0a047 Mon Sep 17 00:00:00 2001 From: dkf Date: Mon, 29 Sep 2008 12:25:18 +0000 Subject: TIP #313 IMPLEMENTATION --- ChangeLog | 7 +++++++ doc/lsearch.n | 16 +++++++++++++-- generic/tclCmdIL.c | 58 ++++++++++++++++++++++++++++++++++++------------------ tests/lsearch.test | 52 ++++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 108 insertions(+), 25 deletions(-) diff --git a/ChangeLog b/ChangeLog index e3c731c..2eed027 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,12 @@ 2008-09-29 Donal K. Fellows + TIP #313 IMPLEMENTATION + + * generic/tclCmdIL.c (Tcl_LsearchObjCmd): Added -bisect option to + * doc/lsearch.n, tests/lsearch.test: allow the finding of the + place to insert an element in a sorted list when that element is + not already there. [Patch 1894241] + TIP #318 IMPLEMENTATION * generic/tclCmdMZ.c (StringTrimCmd,StringTrimLCmd,StringTrimRCmd): diff --git a/doc/lsearch.n b/doc/lsearch.n index b5f950d..cd4363d 100644 --- a/doc/lsearch.n +++ b/doc/lsearch.n @@ -7,10 +7,10 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" RCS: @(#) $Id: lsearch.n,v 1.35 2008/06/29 22:28:24 dkf Exp $ +'\" RCS: @(#) $Id: lsearch.n,v 1.36 2008/09/29 12:25:22 dkf Exp $ '\" .so man.macros -.TH lsearch n 8.5 Tcl "Tcl Built-In Commands" +.TH lsearch n 8.6 Tcl "Tcl Built-In Commands" .BS '\" Note: do not modify the .SH NAME line immediately below! .SH NAME @@ -134,6 +134,17 @@ meaningful when used with \fB\-sorted\fR. . The list elements are sorted in increasing order. This option is only meaningful when used with \fB\-sorted\fR. +.TP +\fB\-bisect\fR +.VS 8.6 +Inexact search when the list elements are in sorted order. For an increasing +list the last index where the element is less than or equal to the pattern +is returned. For a decreasing list the last index where the element is greater +than or equal to the pattern is returned. If the pattern is before the first +element or the list is empty, -1 is returned. +This option implies \fB\-sorted\fR and cannot be used with either \fB\-all\fR +or \fB\-not\fR. +.VE 8.6 .SS "NESTED LIST OPTIONS" .PP These options are used to search lists of lists. They may be used @@ -199,6 +210,7 @@ foreach(n), list(n), lappend(n), lindex(n), linsert(n), llength(n), lset(n), lsort(n), lrange(n), lreplace(n), string(n) .SH KEYWORDS +binary search, linear search, list, match, pattern, regular expression, search, string '\" Local Variables: '\" mode: nroff diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index af3c3bb..a66eb0a 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -16,7 +16,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdIL.c,v 1.155 2008/09/27 19:34:59 dkf Exp $ + * RCS: @(#) $Id: tclCmdIL.c,v 1.156 2008/09/29 12:25:20 dkf Exp $ */ #include "tclInt.h" @@ -2749,7 +2749,7 @@ Tcl_LsearchObjCmd( Tcl_Obj *const objv[]) /* Argument values. */ { char *bytes, *patternBytes; - int i, match, mode, index, result, listc, length, elemLen; + int i, match, index, result, listc, length, elemLen, bisect; int dataType, isIncreasing, lower, upper, patInt, objInt, offset; int allMatches, inlineReturn, negatedMatch, returnSubindices, noCase; double patDouble, objDouble; @@ -2758,18 +2758,18 @@ Tcl_LsearchObjCmd( SortStrCmpFn_t strCmpFn = strcmp; Tcl_RegExp regexp = NULL; static const char *options[] = { - "-all", "-ascii", "-decreasing", "-dictionary", + "-all", "-ascii", "-bisect", "-decreasing", "-dictionary", "-exact", "-glob", "-increasing", "-index", "-inline", "-integer", "-nocase", "-not", "-real", "-regexp", "-sorted", "-start", "-subindices", NULL }; enum options { - LSEARCH_ALL, LSEARCH_ASCII, LSEARCH_DECREASING, LSEARCH_DICTIONARY, - LSEARCH_EXACT, LSEARCH_GLOB, LSEARCH_INCREASING, LSEARCH_INDEX, - LSEARCH_INLINE, LSEARCH_INTEGER, LSEARCH_NOCASE, LSEARCH_NOT, - LSEARCH_REAL, LSEARCH_REGEXP, LSEARCH_SORTED, LSEARCH_START, - LSEARCH_SUBINDICES + LSEARCH_ALL, LSEARCH_ASCII, LSEARCH_BISECT, LSEARCH_DECREASING, + LSEARCH_DICTIONARY, LSEARCH_EXACT, LSEARCH_GLOB, LSEARCH_INCREASING, + LSEARCH_INDEX, LSEARCH_INLINE, LSEARCH_INTEGER, LSEARCH_NOCASE, + LSEARCH_NOT, LSEARCH_REAL, LSEARCH_REGEXP, LSEARCH_SORTED, + LSEARCH_START, LSEARCH_SUBINDICES }; enum datatypes { ASCII, DICTIONARY, INTEGER, REAL @@ -2777,6 +2777,7 @@ Tcl_LsearchObjCmd( enum modes { EXACT, GLOB, REGEXP, SORTED }; + enum modes mode; mode = GLOB; dataType = ASCII; @@ -2785,6 +2786,7 @@ Tcl_LsearchObjCmd( inlineReturn = 0; returnSubindices = 0; negatedMatch = 0; + bisect = 0; listPtr = NULL; startPtr = NULL; offset = 0; @@ -2820,6 +2822,10 @@ Tcl_LsearchObjCmd( case LSEARCH_ASCII: /* -ascii */ dataType = ASCII; break; + case LSEARCH_BISECT: /* -bisect */ + mode = SORTED; + bisect = 1; + break; case LSEARCH_DECREASING: /* -decreasing */ isIncreasing = 0; sortInfo.isIncreasing = 0; @@ -2971,7 +2977,13 @@ Tcl_LsearchObjCmd( return TCL_ERROR; } - if ((enum modes) mode == REGEXP) { + if (bisect && (allMatches || negatedMatch)) { + Tcl_AppendResult(interp, + "-bisect is not compatible with -all or -not", NULL); + return TCL_ERROR; + } + + if (mode == REGEXP) { /* * We can shimmer regexp/list if listv[i] == pattern, so get the * regexp rep before the list rep. First time round, omit the interp @@ -3057,7 +3069,7 @@ Tcl_LsearchObjCmd( patObj = objv[objc - 1]; patternBytes = NULL; - if ((enum modes) mode == EXACT || (enum modes) mode == SORTED) { + if (mode == EXACT || mode == SORTED) { switch ((enum datatypes) dataType) { case ASCII: case DICTIONARY: @@ -3108,7 +3120,7 @@ Tcl_LsearchObjCmd( index = -1; match = 0; - if ((enum modes) mode == SORTED && !allMatches && !negatedMatch) { + if (mode == SORTED && !allMatches && !negatedMatch) { /* * If the data is sorted, we can do a more intelligent search. Note * that there is no point in being smart when -all was specified; in @@ -3186,10 +3198,16 @@ Tcl_LsearchObjCmd( * variation means that a search always makes log n * comparisons (normal binary search might "get lucky" with an * early comparison). + * + * In bisect mode though, we want the last of equals. */ index = i; - upper = i; + if (bisect) { + lower = i; + } else { + upper = i; + } } else if (match > 0) { if (isIncreasing) { lower = i; @@ -3204,7 +3222,9 @@ Tcl_LsearchObjCmd( } } } - + if (bisect && index < 0) { + index = lower; + } } else { /* * We need to do a linear search, because (at least one) of: @@ -3232,8 +3252,8 @@ Tcl_LsearchObjCmd( } else { itemPtr = listv[i]; } - - switch ((enum modes) mode) { + + switch (mode) { case SORTED: case EXACT: switch ((enum datatypes) dataType) { @@ -3849,16 +3869,16 @@ Tcl_LsortObjCmd( * The unified list of SortElement structures. * * Side effects: - * If infoPtr->unique is set then infoPtr->numElements may be updated. + * If infoPtr->unique is set then infoPtr->numElements may be updated. * Possibly others, if a user-defined comparison command does something - * weird. + * weird. * * Note: - * If infoPtr->unique is set, the merge assumes that there are no + * If infoPtr->unique is set, the merge assumes that there are no * "repeated" elements in each of the left and right lists. In that case, * if any element of the left list is equivalent to one in the right list * it is omitted from the merged list. - * This simplified mechanism works because of the special way + * This simplified mechanism works because of the special way * our MergeSort creates the sublists to be merged and will fail to * eliminate all repeats in the general case where they are already * present in either the left or right list. A general code would need to diff --git a/tests/lsearch.test b/tests/lsearch.test index 93e2117..634adda 100644 --- a/tests/lsearch.test +++ b/tests/lsearch.test @@ -11,7 +11,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: lsearch.test,v 1.21 2008/07/13 23:15:22 nijtmans Exp $ +# RCS: @(#) $Id: lsearch.test,v 1.22 2008/09/29 12:25:21 dkf Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -61,7 +61,7 @@ test lsearch-2.9 {search modes} { } 1 test lsearch-2.10 {search modes} { list [catch {lsearch -glib {b.x bx xy bcx} b.x} msg] $msg -} {1 {bad option "-glib": must be -all, -ascii, -decreasing, -dictionary, -exact, -glob, -increasing, -index, -inline, -integer, -nocase, -not, -real, -regexp, -sorted, -start, or -subindices}} +} {1 {bad option "-glib": must be -all, -ascii, -bisect, -decreasing, -dictionary, -exact, -glob, -increasing, -index, -inline, -integer, -nocase, -not, -real, -regexp, -sorted, -start, or -subindices}} test lsearch-2.11 {search modes with -nocase} { lsearch -exact -nocase {a b c A B C} A } 0 @@ -89,10 +89,10 @@ test lsearch-3.2 {lsearch errors} { } {1 {wrong # args: should be "lsearch ?-option value ...? list pattern"}} test lsearch-3.3 {lsearch errors} { list [catch {lsearch a b c} msg] $msg -} {1 {bad option "a": must be -all, -ascii, -decreasing, -dictionary, -exact, -glob, -increasing, -index, -inline, -integer, -nocase, -not, -real, -regexp, -sorted, -start, or -subindices}} +} {1 {bad option "a": must be -all, -ascii, -bisect, -decreasing, -dictionary, -exact, -glob, -increasing, -index, -inline, -integer, -nocase, -not, -real, -regexp, -sorted, -start, or -subindices}} test lsearch-3.4 {lsearch errors} { list [catch {lsearch a b c d} msg] $msg -} {1 {bad option "a": must be -all, -ascii, -decreasing, -dictionary, -exact, -glob, -increasing, -index, -inline, -integer, -nocase, -not, -real, -regexp, -sorted, -start, or -subindices}} +} {1 {bad option "a": must be -all, -ascii, -bisect, -decreasing, -dictionary, -exact, -glob, -increasing, -index, -inline, -integer, -nocase, -not, -real, -regexp, -sorted, -start, or -subindices}} test lsearch-3.5 {lsearch errors} { list [catch {lsearch "\{" b} msg] $msg } {1 {unmatched open brace in list}} @@ -472,6 +472,46 @@ test lsearch-21.2 {lsearch shimmering crash} { lsearch -exact -real $x $x } 0 +test lsearch-22.1 {lsearch -bisect} -setup { + set res {} +} -body { + foreach i {0 1 5 6 7 8 15 16} { + lappend res [lsearch -bisect -integer {1 4 5 7 9 15} $i] + } + return $res +} -result {-1 0 2 2 3 3 5 5} +test lsearch-22.2 {lsearch -bisect, last of equals} -setup { + set res {} +} -body { + foreach i {0 1 2 3} { + lappend res [lsearch -bisect -integer {0 0 1 1 1 2 2 2 3 3 3} $i] + } + return $res +} -result {1 4 7 10} +test lsearch-22.3 {lsearch -bisect decreasing order} -setup { + set res {} +} -body { + foreach i {0 1 5 6 7 8 15 16} { + lappend res [lsearch -bisect -integer -decreasing {15 9 7 5 4 1} $i] + } + return $res +} -result {5 5 3 2 2 1 0 -1} +test lsearch-22.4 {lsearch -bisect, last of equals, decreasing} -setup { + set res {} +} -body { + foreach i {0 1 2 3} { + lappend res [lsearch -bisect -integer -decreasing \ + {3 3 3 2 2 2 1 1 1 0 0} $i] + } + return $res +} -result {10 8 5 2} +test lsearch-22.5 {lsearch -bisect, all equal} { + lsearch -bisect -integer {5 5 5 5} 5 +} {3} +test lsearch-22.6 {lsearch -sorted, all equal} { + lsearch -sorted -integer {5 5 5 5} 5 +} {0} + # cleanup catch {unset res} catch {unset increasingIntegers} @@ -484,3 +524,7 @@ catch {unset increasingDictionary} catch {unset decreasingDictionary} ::tcltest::cleanupTests return + +# Local Variables: +# mode: tcl +# End: -- cgit v0.12