summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2008-09-29 12:25:18 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2008-09-29 12:25:18 (GMT)
commitda82965bf2c1b65b3e1bb8e1f82e944317f0a047 (patch)
tree12a60c58fdc32b0745cb3420d12a25e183777e35
parent883a79a9d4ece9f76dbbcd5a9b7e8c6e5bd43b57 (diff)
downloadtcl-da82965bf2c1b65b3e1bb8e1f82e944317f0a047.zip
tcl-da82965bf2c1b65b3e1bb8e1f82e944317f0a047.tar.gz
tcl-da82965bf2c1b65b3e1bb8e1f82e944317f0a047.tar.bz2
TIP #313 IMPLEMENTATION
-rw-r--r--ChangeLog7
-rw-r--r--doc/lsearch.n16
-rw-r--r--generic/tclCmdIL.c58
-rw-r--r--tests/lsearch.test52
4 files changed, 108 insertions, 25 deletions
diff --git a/ChangeLog b/ChangeLog
index e3c731c..2eed027 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
2008-09-29 Donal K. Fellows <dkf@users.sf.net>
+ TIP #313 IMPLEMENTATION
+
+ * generic/tclCmdIL.c (Tcl_LsearchObjCmd): Added -bisect option to
+ * doc/lsearch.n, tests/lsearch.test: allow the finding of the
+ place to insert an element in a sorted list when that element is
+ not already there. [Patch 1894241]
+
TIP #318 IMPLEMENTATION
* generic/tclCmdMZ.c (StringTrimCmd,StringTrimLCmd,StringTrimRCmd):
diff --git a/doc/lsearch.n b/doc/lsearch.n
index b5f950d..cd4363d 100644
--- a/doc/lsearch.n
+++ b/doc/lsearch.n
@@ -7,10 +7,10 @@
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
-'\" RCS: @(#) $Id: lsearch.n,v 1.35 2008/06/29 22:28:24 dkf Exp $
+'\" RCS: @(#) $Id: lsearch.n,v 1.36 2008/09/29 12:25:22 dkf Exp $
'\"
.so man.macros
-.TH lsearch n 8.5 Tcl "Tcl Built-In Commands"
+.TH lsearch n 8.6 Tcl "Tcl Built-In Commands"
.BS
'\" Note: do not modify the .SH NAME line immediately below!
.SH NAME
@@ -134,6 +134,17 @@ meaningful when used with \fB\-sorted\fR.
.
The list elements are sorted in increasing order. This option is only
meaningful when used with \fB\-sorted\fR.
+.TP
+\fB\-bisect\fR
+.VS 8.6
+Inexact search when the list elements are in sorted order. For an increasing
+list the last index where the element is less than or equal to the pattern
+is returned. For a decreasing list the last index where the element is greater
+than or equal to the pattern is returned. If the pattern is before the first
+element or the list is empty, -1 is returned.
+This option implies \fB\-sorted\fR and cannot be used with either \fB\-all\fR
+or \fB\-not\fR.
+.VE 8.6
.SS "NESTED LIST OPTIONS"
.PP
These options are used to search lists of lists. They may be used
@@ -199,6 +210,7 @@ foreach(n), list(n), lappend(n), lindex(n), linsert(n), llength(n),
lset(n), lsort(n), lrange(n), lreplace(n),
string(n)
.SH KEYWORDS
+binary search, linear search,
list, match, pattern, regular expression, search, string
'\" Local Variables:
'\" mode: nroff
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index af3c3bb..a66eb0a 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -16,7 +16,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdIL.c,v 1.155 2008/09/27 19:34:59 dkf Exp $
+ * RCS: @(#) $Id: tclCmdIL.c,v 1.156 2008/09/29 12:25:20 dkf Exp $
*/
#include "tclInt.h"
@@ -2749,7 +2749,7 @@ Tcl_LsearchObjCmd(
Tcl_Obj *const objv[]) /* Argument values. */
{
char *bytes, *patternBytes;
- int i, match, mode, index, result, listc, length, elemLen;
+ int i, match, index, result, listc, length, elemLen, bisect;
int dataType, isIncreasing, lower, upper, patInt, objInt, offset;
int allMatches, inlineReturn, negatedMatch, returnSubindices, noCase;
double patDouble, objDouble;
@@ -2758,18 +2758,18 @@ Tcl_LsearchObjCmd(
SortStrCmpFn_t strCmpFn = strcmp;
Tcl_RegExp regexp = NULL;
static const char *options[] = {
- "-all", "-ascii", "-decreasing", "-dictionary",
+ "-all", "-ascii", "-bisect", "-decreasing", "-dictionary",
"-exact", "-glob", "-increasing", "-index",
"-inline", "-integer", "-nocase", "-not",
"-real", "-regexp", "-sorted", "-start",
"-subindices", NULL
};
enum options {
- LSEARCH_ALL, LSEARCH_ASCII, LSEARCH_DECREASING, LSEARCH_DICTIONARY,
- LSEARCH_EXACT, LSEARCH_GLOB, LSEARCH_INCREASING, LSEARCH_INDEX,
- LSEARCH_INLINE, LSEARCH_INTEGER, LSEARCH_NOCASE, LSEARCH_NOT,
- LSEARCH_REAL, LSEARCH_REGEXP, LSEARCH_SORTED, LSEARCH_START,
- LSEARCH_SUBINDICES
+ LSEARCH_ALL, LSEARCH_ASCII, LSEARCH_BISECT, LSEARCH_DECREASING,
+ LSEARCH_DICTIONARY, LSEARCH_EXACT, LSEARCH_GLOB, LSEARCH_INCREASING,
+ LSEARCH_INDEX, LSEARCH_INLINE, LSEARCH_INTEGER, LSEARCH_NOCASE,
+ LSEARCH_NOT, LSEARCH_REAL, LSEARCH_REGEXP, LSEARCH_SORTED,
+ LSEARCH_START, LSEARCH_SUBINDICES
};
enum datatypes {
ASCII, DICTIONARY, INTEGER, REAL
@@ -2777,6 +2777,7 @@ Tcl_LsearchObjCmd(
enum modes {
EXACT, GLOB, REGEXP, SORTED
};
+ enum modes mode;
mode = GLOB;
dataType = ASCII;
@@ -2785,6 +2786,7 @@ Tcl_LsearchObjCmd(
inlineReturn = 0;
returnSubindices = 0;
negatedMatch = 0;
+ bisect = 0;
listPtr = NULL;
startPtr = NULL;
offset = 0;
@@ -2820,6 +2822,10 @@ Tcl_LsearchObjCmd(
case LSEARCH_ASCII: /* -ascii */
dataType = ASCII;
break;
+ case LSEARCH_BISECT: /* -bisect */
+ mode = SORTED;
+ bisect = 1;
+ break;
case LSEARCH_DECREASING: /* -decreasing */
isIncreasing = 0;
sortInfo.isIncreasing = 0;
@@ -2971,7 +2977,13 @@ Tcl_LsearchObjCmd(
return TCL_ERROR;
}
- if ((enum modes) mode == REGEXP) {
+ if (bisect && (allMatches || negatedMatch)) {
+ Tcl_AppendResult(interp,
+ "-bisect is not compatible with -all or -not", NULL);
+ return TCL_ERROR;
+ }
+
+ if (mode == REGEXP) {
/*
* We can shimmer regexp/list if listv[i] == pattern, so get the
* regexp rep before the list rep. First time round, omit the interp
@@ -3057,7 +3069,7 @@ Tcl_LsearchObjCmd(
patObj = objv[objc - 1];
patternBytes = NULL;
- if ((enum modes) mode == EXACT || (enum modes) mode == SORTED) {
+ if (mode == EXACT || mode == SORTED) {
switch ((enum datatypes) dataType) {
case ASCII:
case DICTIONARY:
@@ -3108,7 +3120,7 @@ Tcl_LsearchObjCmd(
index = -1;
match = 0;
- if ((enum modes) mode == SORTED && !allMatches && !negatedMatch) {
+ if (mode == SORTED && !allMatches && !negatedMatch) {
/*
* If the data is sorted, we can do a more intelligent search. Note
* that there is no point in being smart when -all was specified; in
@@ -3186,10 +3198,16 @@ Tcl_LsearchObjCmd(
* variation means that a search always makes log n
* comparisons (normal binary search might "get lucky" with an
* early comparison).
+ *
+ * In bisect mode though, we want the last of equals.
*/
index = i;
- upper = i;
+ if (bisect) {
+ lower = i;
+ } else {
+ upper = i;
+ }
} else if (match > 0) {
if (isIncreasing) {
lower = i;
@@ -3204,7 +3222,9 @@ Tcl_LsearchObjCmd(
}
}
}
-
+ if (bisect && index < 0) {
+ index = lower;
+ }
} else {
/*
* We need to do a linear search, because (at least one) of:
@@ -3232,8 +3252,8 @@ Tcl_LsearchObjCmd(
} else {
itemPtr = listv[i];
}
-
- switch ((enum modes) mode) {
+
+ switch (mode) {
case SORTED:
case EXACT:
switch ((enum datatypes) dataType) {
@@ -3849,16 +3869,16 @@ Tcl_LsortObjCmd(
* The unified list of SortElement structures.
*
* Side effects:
- * If infoPtr->unique is set then infoPtr->numElements may be updated.
+ * If infoPtr->unique is set then infoPtr->numElements may be updated.
* Possibly others, if a user-defined comparison command does something
- * weird.
+ * weird.
*
* Note:
- * If infoPtr->unique is set, the merge assumes that there are no
+ * If infoPtr->unique is set, the merge assumes that there are no
* "repeated" elements in each of the left and right lists. In that case,
* if any element of the left list is equivalent to one in the right list
* it is omitted from the merged list.
- * This simplified mechanism works because of the special way
+ * This simplified mechanism works because of the special way
* our MergeSort creates the sublists to be merged and will fail to
* eliminate all repeats in the general case where they are already
* present in either the left or right list. A general code would need to
diff --git a/tests/lsearch.test b/tests/lsearch.test
index 93e2117..634adda 100644
--- a/tests/lsearch.test
+++ b/tests/lsearch.test
@@ -11,7 +11,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: lsearch.test,v 1.21 2008/07/13 23:15:22 nijtmans Exp $
+# RCS: @(#) $Id: lsearch.test,v 1.22 2008/09/29 12:25:21 dkf Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest
@@ -61,7 +61,7 @@ test lsearch-2.9 {search modes} {
} 1
test lsearch-2.10 {search modes} {
list [catch {lsearch -glib {b.x bx xy bcx} b.x} msg] $msg
-} {1 {bad option "-glib": must be -all, -ascii, -decreasing, -dictionary, -exact, -glob, -increasing, -index, -inline, -integer, -nocase, -not, -real, -regexp, -sorted, -start, or -subindices}}
+} {1 {bad option "-glib": must be -all, -ascii, -bisect, -decreasing, -dictionary, -exact, -glob, -increasing, -index, -inline, -integer, -nocase, -not, -real, -regexp, -sorted, -start, or -subindices}}
test lsearch-2.11 {search modes with -nocase} {
lsearch -exact -nocase {a b c A B C} A
} 0
@@ -89,10 +89,10 @@ test lsearch-3.2 {lsearch errors} {
} {1 {wrong # args: should be "lsearch ?-option value ...? list pattern"}}
test lsearch-3.3 {lsearch errors} {
list [catch {lsearch a b c} msg] $msg
-} {1 {bad option "a": must be -all, -ascii, -decreasing, -dictionary, -exact, -glob, -increasing, -index, -inline, -integer, -nocase, -not, -real, -regexp, -sorted, -start, or -subindices}}
+} {1 {bad option "a": must be -all, -ascii, -bisect, -decreasing, -dictionary, -exact, -glob, -increasing, -index, -inline, -integer, -nocase, -not, -real, -regexp, -sorted, -start, or -subindices}}
test lsearch-3.4 {lsearch errors} {
list [catch {lsearch a b c d} msg] $msg
-} {1 {bad option "a": must be -all, -ascii, -decreasing, -dictionary, -exact, -glob, -increasing, -index, -inline, -integer, -nocase, -not, -real, -regexp, -sorted, -start, or -subindices}}
+} {1 {bad option "a": must be -all, -ascii, -bisect, -decreasing, -dictionary, -exact, -glob, -increasing, -index, -inline, -integer, -nocase, -not, -real, -regexp, -sorted, -start, or -subindices}}
test lsearch-3.5 {lsearch errors} {
list [catch {lsearch "\{" b} msg] $msg
} {1 {unmatched open brace in list}}
@@ -472,6 +472,46 @@ test lsearch-21.2 {lsearch shimmering crash} {
lsearch -exact -real $x $x
} 0
+test lsearch-22.1 {lsearch -bisect} -setup {
+ set res {}
+} -body {
+ foreach i {0 1 5 6 7 8 15 16} {
+ lappend res [lsearch -bisect -integer {1 4 5 7 9 15} $i]
+ }
+ return $res
+} -result {-1 0 2 2 3 3 5 5}
+test lsearch-22.2 {lsearch -bisect, last of equals} -setup {
+ set res {}
+} -body {
+ foreach i {0 1 2 3} {
+ lappend res [lsearch -bisect -integer {0 0 1 1 1 2 2 2 3 3 3} $i]
+ }
+ return $res
+} -result {1 4 7 10}
+test lsearch-22.3 {lsearch -bisect decreasing order} -setup {
+ set res {}
+} -body {
+ foreach i {0 1 5 6 7 8 15 16} {
+ lappend res [lsearch -bisect -integer -decreasing {15 9 7 5 4 1} $i]
+ }
+ return $res
+} -result {5 5 3 2 2 1 0 -1}
+test lsearch-22.4 {lsearch -bisect, last of equals, decreasing} -setup {
+ set res {}
+} -body {
+ foreach i {0 1 2 3} {
+ lappend res [lsearch -bisect -integer -decreasing \
+ {3 3 3 2 2 2 1 1 1 0 0} $i]
+ }
+ return $res
+} -result {10 8 5 2}
+test lsearch-22.5 {lsearch -bisect, all equal} {
+ lsearch -bisect -integer {5 5 5 5} 5
+} {3}
+test lsearch-22.6 {lsearch -sorted, all equal} {
+ lsearch -sorted -integer {5 5 5 5} 5
+} {0}
+
# cleanup
catch {unset res}
catch {unset increasingIntegers}
@@ -484,3 +524,7 @@ catch {unset increasingDictionary}
catch {unset decreasingDictionary}
::tcltest::cleanupTests
return
+
+# Local Variables:
+# mode: tcl
+# End: