summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorericm <ericm>2000-05-09 17:50:38 (GMT)
committerericm <ericm>2000-05-09 17:50:38 (GMT)
commita406bb38849430633c8cfb33dceac73e8a7903c4 (patch)
tree4f7c1988a264961da2e766d9c6fbc70cddb7627c
parent427c904742d9d5aec8068fce38a28be9ae65af08 (diff)
downloadtcl-a406bb38849430633c8cfb33dceac73e8a7903c4.zip
tcl-a406bb38849430633c8cfb33dceac73e8a7903c4.tar.gz
tcl-a406bb38849430633c8cfb33dceac73e8a7903c4.tar.bz2
* tests/lsearch.test:
* doc/lsearch.n: * generic/tclCmdIL.c (Tcl_LsearchObjCmd): Extended [lsearch] to support sorted list searching and typed list searching. [RFE: 4098].
-rw-r--r--doc/lsearch.n52
-rw-r--r--generic/tclCmdIL.c249
-rw-r--r--tests/lsearch.test192
3 files changed, 448 insertions, 45 deletions
diff --git a/doc/lsearch.n b/doc/lsearch.n
index 39eddac..2e2024e 100644
--- a/doc/lsearch.n
+++ b/doc/lsearch.n
@@ -5,7 +5,7 @@
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
-'\" RCS: @(#) $Id: lsearch.n,v 1.3 1999/06/24 21:15:13 jpeek Exp $
+'\" RCS: @(#) $Id: lsearch.n,v 1.4 2000/05/09 17:50:38 ericm Exp $
'\"
.so man.macros
.TH lsearch n 7.0 Tcl "Tcl Built-In Commands"
@@ -14,33 +14,69 @@
.SH NAME
lsearch \- See if a list contains a particular element
.SH SYNOPSIS
-\fBlsearch \fR?\fImode\fR? \fIlist pattern\fR
+\fBlsearch \fR?\fIoptions\fR? \fIlist pattern\fR
.BE
.SH DESCRIPTION
.PP
This command searches the elements of \fIlist\fR to see if one
-of them matches \fIpattern\fR.
-If so, the command returns the index of the first matching
-element.
-If not, the command returns \fB\-1\fR.
-The \fImode\fR argument indicates how the elements of the list are to
+of them matches \fIpattern\fR. If so, the command returns the index
+of the first matching element. If not, the command returns \fB\-1\fR.
+The \fIoption\fR arguments indicates how the elements of the list are to
be matched against \fIpattern\fR and it must have one of the following
values:
.TP
+\fB\-ascii\fR
+The list elements are to be examined as ASCII strings. This option is only
+meaningful when used with \fB\-exact\fR or \fB\-sorted\fR.
+.TP
+\fB\-decreasing\fR
+The list elements are sorted in decreasing order. This option is only
+meaningful when used with \fB\-sorted\fR.
+.TP
+\fB\-dictionary\fR
+The list elements are to be compared using dictionary-style
+comparisons. This option is only meaningful when used with
+\fB\-exact\fR or \fB\-sorted\fR.
+.TP
\fB\-exact\fR
The list element must contain exactly the same string as \fIpattern\fR.
.TP
+\fB\-increasing\fR
+The list elements are sorted in increasing order. This option is only
+meaningful when used with \fB\-sorted\fR.
+.TP
+\fB\-integer\fR
+The list elements are to be compared as integers. This option is only
+meaningful when used with \fB\-exact\fR or \fB\-sorted\fR.
+.TP
\fB\-glob\fR
\fIPattern\fR is a glob-style pattern which is matched against each list
element using the same rules as the \fBstring match\fR command.
.TP
+\fB\-real\fR
+The list elements are to be compared as floating-point values. This
+option is only meaningful when used with \fB\-exact\fR or \fB\-sorted\fR.
+.TP
\fB\-regexp\fR
\fIPattern\fR is treated as a regular expression and matched against
each list element using the rules described in the \fBre_syntax\fR
reference page.
+.TP
+\fB\-sorted\fR
+The list elements are in sorted order. If this option is specified,
+\fBlsearch\fR will use a more efficient searching algorithm to search
+\fIlist\fR. If no other options are specified, \fIlist\fR is assumed
+to be sorted in increasing order, and to contain ASCII strings. This
+option cannot be used with \fB\-glob\fR or \fB\-regexp\fR.
.PP
-If \fImode\fR is omitted then it defaults to \fB\-glob\fR.
+If \fIoption\fR is omitted then it defaults to \fB\-glob\fR. If more
+than one of \fB\-exact\fR, \fB\-glob\fR, \fB\-regexp\fR, and
+\fB\-sorted\fR is specified, whichever option is specified last takes
+precendence. If more than one of \fB\-ascii\fR, \fB\-dictionary\fR,
+\fB\-integer\fR and \fB\-real\fR is specified, the option specified
+last takes precendence. If more than one of \fB\-increasing\fR and
+\fB\-decreasing\fR is specified, the option specified last takes precedence.
.SH KEYWORDS
list, match, pattern, regular expression, search, string
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index 03aa7e4..d9a29b6 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -14,7 +14,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdIL.c,v 1.25 2000/05/08 22:21:15 hobbs Exp $
+ * RCS: @(#) $Id: tclCmdIL.c,v 1.26 2000/05/09 17:50:38 ericm Exp $
*/
#include "tclInt.h"
@@ -2399,23 +2399,73 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv)
{
char *bytes, *patternBytes;
int i, match, mode, index, result, listc, length, elemLen;
+ int dataType, isIncreasing, lower, upper, patInt, objInt;
+ double patDouble, objDouble;
Tcl_Obj *patObj, **listv;
static char *options[] = {
- "-exact", "-glob", "-regexp", NULL
+ "-ascii", "-decreasing", "-dictionary", "-exact", "-increasing",
+ "-integer", "-glob", "-real", "-regexp", "-sorted", NULL
};
enum options {
- LSEARCH_EXACT, LSEARCH_GLOB, LSEARCH_REGEXP
+ LSEARCH_ASCII, LSEARCH_DECREASING, LSEARCH_DICTIONARY, LSEARCH_EXACT,
+ LSEARCH_INCREASING, LSEARCH_INTEGER, LSEARCH_GLOB,
+ LSEARCH_REAL, LSEARCH_REGEXP, LSEARCH_SORTED
};
- mode = LSEARCH_GLOB;
- if (objc == 4) {
- if (Tcl_GetIndexFromObj(interp, objv[1], options, "search mode", 0,
- &mode) != TCL_OK) {
+ enum datatypes {
+ ASCII, DICTIONARY, INTEGER, REAL
+ };
+
+ enum modes {
+ EXACT, GLOB, REGEXP, SORTED
+ };
+
+ mode = GLOB;
+ dataType = ASCII;
+ isIncreasing = 1;
+
+ if (objc < 3) {
+ Tcl_WrongNumArgs(interp, 1, objv, "?options? list pattern");
+ return TCL_ERROR;
+ }
+
+ for (i = 1; i < objc-2; i++) {
+ if (Tcl_GetIndexFromObj(interp, objv[i], options, "option", 0, &index)
+ != TCL_OK) {
return TCL_ERROR;
}
- } else if (objc != 3) {
- Tcl_WrongNumArgs(interp, 1, objv, "?mode? list pattern");
- return TCL_ERROR;
+ switch ((enum options) index) {
+ case LSEARCH_ASCII: /* -ascii */
+ dataType = ASCII;
+ break;
+ case LSEARCH_DECREASING: /* -decreasing */
+ isIncreasing = 0;
+ break;
+ case LSEARCH_DICTIONARY: /* -dictionary */
+ dataType = DICTIONARY;
+ break;
+ case LSEARCH_EXACT: /* -increasing */
+ mode = EXACT;
+ break;
+ case LSEARCH_INCREASING: /* -increasing */
+ isIncreasing = 1;
+ break;
+ case LSEARCH_INTEGER: /* -integer */
+ dataType = INTEGER;
+ break;
+ case LSEARCH_GLOB: /* -glob */
+ mode = GLOB;
+ break;
+ case LSEARCH_REAL: /* -real */
+ dataType = REAL;
+ break;
+ case LSEARCH_REGEXP: /* -regexp */
+ mode = REGEXP;
+ break;
+ case LSEARCH_SORTED: /* -sorted */
+ mode = SORTED;
+ break;
+ }
}
/*
@@ -2429,36 +2479,173 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv)
}
patObj = objv[objc - 1];
- patternBytes = Tcl_GetStringFromObj(patObj, &length);
+ patternBytes = NULL;
+ if ((enum modes) mode == EXACT || (enum modes) mode == SORTED) {
+ switch ((enum datatypes) dataType) {
+ case ASCII:
+ case DICTIONARY:
+ patternBytes = Tcl_GetStringFromObj(patObj, &length);
+ break;
+ case INTEGER:
+ result = Tcl_GetIntFromObj(interp, patObj, &patInt);
+ if (result != TCL_OK) {
+ return result;
+ }
+ break;
+ case REAL:
+ result = Tcl_GetDoubleFromObj(interp, patObj, &patDouble);
+ if (result != TCL_OK) {
+ return result;
+ }
+ break;
+ }
+ } else {
+ patternBytes = Tcl_GetStringFromObj(patObj, &length);
+ }
+ /*
+ * Set default index value to -1, indicating failure; if we find the
+ * item in the course of our search, index will be set to the correct
+ * value.
+ */
index = -1;
- for (i = 0; i < listc; i++) {
- match = 0;
- switch ((enum options) mode) {
- case LSEARCH_EXACT: {
- bytes = Tcl_GetStringFromObj(listv[i], &elemLen);
- if (length == elemLen) {
- match = (memcmp(bytes, patternBytes,
- (size_t) length) == 0);
+ match = 0;
+ if ((enum modes) mode == SORTED) {
+ /* If the data is sorted, we can do a more intelligent search */
+ lower = -1;
+ upper = listc;
+ while (lower + 1 != upper) {
+ i = (lower + upper)/2;
+ switch ((enum datatypes) dataType) {
+ case ASCII: {
+ bytes = Tcl_GetString(listv[i]);
+ match = strcmp(patternBytes, bytes);
+ break;
+ }
+ case DICTIONARY: {
+ bytes = Tcl_GetString(listv[i]);
+ match = DictionaryCompare(patternBytes, bytes);
+ break;
+ }
+ case INTEGER: {
+ result = Tcl_GetIntFromObj(interp, listv[i], &objInt);
+ if (result != TCL_OK) {
+ return result;
+ }
+ if (patInt == objInt) {
+ match = 0;
+ } else if (patInt < objInt) {
+ match = -1;
+ } else {
+ match = 1;
+ }
+ break;
+ }
+ case REAL: {
+ result = Tcl_GetDoubleFromObj(interp, listv[i],
+ &objDouble);
+ if (result != TCL_OK) {
+ return result;
+ }
+ if (patDouble == objDouble) {
+ match = 0;
+ } else if (patDouble < objDouble) {
+ match = -1;
+ } else {
+ match = 1;
+ }
+ break;
}
- break;
}
- case LSEARCH_GLOB: {
- match = Tcl_StringMatch(Tcl_GetString(listv[i]), patternBytes);
- break;
+ if (match == 0) {
+ /*
+ * Normally, binary search is written to stop when it
+ * finds a match. If there are duplicates of an element in
+ * the list, our first match might not be the first occurance.
+ * Consider: 0 0 0 1 1 1 2 2 2
+ * To maintain consistancy with standard lsearch semantics,
+ * we must find the leftmost occurance of the pattern in the
+ * list. Thus we don't just stop searching here. This
+ * variation means that a search always makes log n
+ * comparisons (normal binary search might "get lucky" with
+ * an early comparison).
+ */
+ index = i;
+ upper = i;
+ } else if (match > 0) {
+ if (isIncreasing) {
+ lower = i;
+ } else {
+ upper = i;
+ }
+ } else {
+ if (isIncreasing) {
+ upper = i;
+ } else {
+ lower = i;
+ }
}
- case LSEARCH_REGEXP: {
- match = Tcl_RegExpMatchObj(interp, listv[i], patObj);
- if (match < 0) {
- return TCL_ERROR;
+ }
+ } else {
+ for (i = 0; i < listc; i++) {
+ match = 0;
+ switch ((enum modes) mode) {
+ case SORTED:
+ case EXACT: {
+ switch ((enum datatypes) dataType) {
+ case ASCII: {
+ bytes = Tcl_GetStringFromObj(listv[i], &elemLen);
+ if (length == elemLen) {
+ match = (memcmp(bytes, patternBytes,
+ (size_t) length) == 0);
+ }
+ break;
+ }
+ case DICTIONARY: {
+ bytes = Tcl_GetString(listv[i]);
+ match =
+ (DictionaryCompare(bytes, patternBytes) == 0);
+ break;
+ }
+ case INTEGER: {
+ result = Tcl_GetIntFromObj(interp, listv[i],
+ &objInt);
+ if (result != TCL_OK) {
+ return result;
+ }
+ match = (objInt == patInt);
+ break;
+ }
+ case REAL: {
+ result = Tcl_GetDoubleFromObj(interp, listv[i],
+ &objDouble);
+ if (result != TCL_OK) {
+ return result;
+ }
+ match = (objDouble == patDouble);
+ break;
+ }
+ }
+ break;
+ }
+ case GLOB: {
+ match = Tcl_StringMatch(Tcl_GetString(listv[i]),
+ patternBytes);
+ break;
+ }
+ case REGEXP: {
+ match = Tcl_RegExpMatchObj(interp, listv[i], patObj);
+ if (match < 0) {
+ return TCL_ERROR;
+ }
+ break;
}
+ }
+ if (match != 0) {
+ index = i;
break;
}
}
- if (match != 0) {
- index = i;
- break;
- }
}
Tcl_SetIntObj(Tcl_GetObjResult(interp), index);
return TCL_OK;
diff --git a/tests/lsearch.test b/tests/lsearch.test
index a5ba1b7..275a569 100644
--- a/tests/lsearch.test
+++ b/tests/lsearch.test
@@ -11,7 +11,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: lsearch.test,v 1.5 2000/04/10 17:19:01 ericm Exp $
+# RCS: @(#) $Id: lsearch.test,v 1.6 2000/05/09 17:50:39 ericm Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest
@@ -61,20 +61,20 @@ test lsearch-2.9 {search modes} {
} 1
test lsearch-2.10 {search modes} {
list [catch {lsearch -glib {b.x bx xy bcx} b.x} msg] $msg
-} {1 {bad search mode "-glib": must be -exact, -glob, or -regexp}}
+} {1 {bad option "-glib": must be -ascii, -decreasing, -dictionary, -exact, -increasing, -integer, -glob, -real, -regexp, or -sorted}}
test lsearch-3.1 {lsearch errors} {
list [catch lsearch msg] $msg
-} {1 {wrong # args: should be "lsearch ?mode? list pattern"}}
+} {1 {wrong # args: should be "lsearch ?options? list pattern"}}
test lsearch-3.2 {lsearch errors} {
list [catch {lsearch a} msg] $msg
-} {1 {wrong # args: should be "lsearch ?mode? list pattern"}}
+} {1 {wrong # args: should be "lsearch ?options? list pattern"}}
test lsearch-3.3 {lsearch errors} {
list [catch {lsearch a b c} msg] $msg
-} {1 {bad search mode "a": must be -exact, -glob, or -regexp}}
+} {1 {bad option "a": must be -ascii, -decreasing, -dictionary, -exact, -increasing, -integer, -glob, -real, -regexp, or -sorted}}
test lsearch-3.4 {lsearch errors} {
list [catch {lsearch a b c d} msg] $msg
-} {1 {wrong # args: should be "lsearch ?mode? list pattern"}}
+} {1 {bad option "a": must be -ascii, -decreasing, -dictionary, -exact, -increasing, -integer, -glob, -real, -regexp, or -sorted}}
test lsearch-3.5 {lsearch errors} {
list [catch {lsearch "\{" b} msg] $msg
} {1 {unmatched open brace in list}}
@@ -89,6 +89,186 @@ test lsearch-4.2 {binary data} {
lsearch -exact [list foo one\000two bar] $x
} 1
+# Make a sorted list
+set l {}
+set l2 {}
+for {set i 0} {$i < 100} {incr i} {
+ lappend l $i
+ lappend l2 [expr {double($i)/2}]
+}
+set increasingIntegers [lsort -integer $l]
+set decreasingIntegers [lsort -decreasing -integer $l]
+set increasingDoubles [lsort -real $l2]
+set decreasingDoubles [lsort -decreasing -real $l2]
+set increasingStrings [lsort {48 6a 18b 22a 21aa 35 36}]
+set decreasingStrings [lsort -decreasing {48 6a 18b 22a 21aa 35 36}]
+set increasingDictionary [lsort -dictionary {48 6a 18b 22a 21aa 35 36}]
+set decreasingDictionary [lsort -dictionary -decreasing $increasingDictionary]
+
+set l {}
+for {set i 0} {$i < 10} {incr i} {
+ lappend l $i $i $i $i $i
+}
+set repeatingIncreasingIntegers [lsort -integer $l]
+set repeatingDecreasingIntegers [lsort -integer -decreasing $l]
+
+test lsearch-5.1 {binary search} {
+ set res {}
+ for {set i 0} {$i < 100} {incr i} {
+ lappend res [lsearch -integer -sorted $increasingIntegers $i]
+ }
+ set res
+} $increasingIntegers
+test lsearch-5.2 {binary search} {
+ set res {}
+ for {set i 0} {$i < 100} {incr i} {
+ lappend res [lsearch -integer -decreasing -sorted \
+ $decreasingIntegers $i]
+ }
+ set res
+} $decreasingIntegers
+test lsearch-5.3 {binary search finds leftmost occurances} {
+ set res {}
+ for {set i 0} {$i < 10} {incr i} {
+ lappend res [lsearch -integer -sorted $repeatingIncreasingIntegers $i]
+ }
+ set res
+} [list 0 5 10 15 20 25 30 35 40 45]
+test lsearch-5.4 {binary search -decreasing finds leftmost occurances} {
+ set res {}
+ for {set i 9} {$i >= 0} {incr i -1} {
+ lappend res [lsearch -sorted -integer -decreasing \
+ $repeatingDecreasingIntegers $i]
+ }
+ set res
+} [list 0 5 10 15 20 25 30 35 40 45]
+
+test lsearch-6.1 {integer search} {
+ set res {}
+ for {set i 0} {$i < 100} {incr i} {
+ lappend res [lsearch -exact -integer $increasingIntegers $i]
+ }
+ set res
+} [lrange $increasingIntegers 0 99]
+test lsearch-6.2 {decreasing integer search} {
+ set res {}
+ for {set i 0} {$i < 100} {incr i} {
+ lappend res [lsearch -exact -integer -decreasing \
+ $decreasingIntegers $i]
+ }
+ set res
+} [lrange $decreasingIntegers 0 99]
+test lsearch-6.3 {sorted integer search} {
+ set res {}
+ for {set i 0} {$i < 100} {incr i} {
+ lappend res [lsearch -sorted -integer $increasingIntegers $i]
+ }
+ set res
+} [lrange $increasingIntegers 0 99]
+test lsearch-6.4 {sorted decreasing integer search} {
+ set res {}
+ for {set i 0} {$i < 100} {incr i} {
+ lappend res [lsearch -integer -sorted -decreasing \
+ $decreasingIntegers $i]
+ }
+ set res
+} [lrange $decreasingIntegers 0 99]
+
+test lsearch-7.1 {double search} {
+ set res {}
+ for {set i 0} {$i < 100} {incr i} {
+ lappend res [lsearch -exact -real $increasingDoubles \
+ [expr {double($i)/2}]]
+ }
+ set res
+} [lrange $increasingIntegers 0 99]
+test lsearch-7.2 {decreasing double search} {
+ set res {}
+ for {set i 0} {$i < 100} {incr i} {
+ lappend res [lsearch -exact -real -decreasing \
+ $decreasingDoubles [expr {double($i)/2}]]
+ }
+ set res
+} [lrange $decreasingIntegers 0 99]
+test lsearch-7.3 {sorted double search} {
+ set res {}
+ for {set i 0} {$i < 100} {incr i} {
+ lappend res [lsearch -sorted -real \
+ $increasingDoubles [expr {double($i)/2}]]
+ }
+ set res
+} [lrange $increasingIntegers 0 99]
+test lsearch-7.4 {sorted decreasing double search} {
+ set res {}
+ for {set i 0} {$i < 100} {incr i} {
+ lappend res [lsearch -sorted -real -decreasing \
+ $decreasingDoubles [expr {double($i)/2}]]
+ }
+ set res
+} [lrange $decreasingIntegers 0 99]
+
+test lsearch-8.1 {dictionary search} {
+ set res {}
+ foreach val {6a 18b 21aa 22a 35 36 48} {
+ lappend res [lsearch -exact -dictionary $increasingDictionary $val]
+ }
+ set res
+} [list 0 1 2 3 4 5 6]
+test lsearch-8.2 {decreasing dictionary search} {
+ set res {}
+ foreach val {6a 18b 21aa 22a 35 36 48} {
+ lappend res [lsearch -exact -dictionary $decreasingDictionary $val]
+ }
+ set res
+} [list 6 5 4 3 2 1 0]
+test lsearch-8.3 {sorted dictionary search} {
+ set res {}
+ foreach val {6a 18b 21aa 22a 35 36 48} {
+ lappend res [lsearch -sorted -dictionary $increasingDictionary $val]
+ }
+ set res
+} [list 0 1 2 3 4 5 6]
+test lsearch-8.4 {decreasing sorted dictionary search} {
+ set res {}
+ foreach val {6a 18b 21aa 22a 35 36 48} {
+ lappend res [lsearch -decreasing -sorted -dictionary \
+ $decreasingDictionary $val]
+ }
+ set res
+} [list 6 5 4 3 2 1 0]
+
+test lsearch-9.1 {ascii search} {
+ set res {}
+ foreach val {18b 21aa 22a 35 36 48 6a} {
+ lappend res [lsearch -exact -ascii $increasingStrings $val]
+ }
+ set res
+} [list 0 1 2 3 4 5 6]
+test lsearch-9.2 {decreasing ascii search} {
+ set res {}
+ foreach val {18b 21aa 22a 35 36 48 6a} {
+ lappend res [lsearch -exact -ascii $decreasingStrings $val]
+ }
+ set res
+} [list 6 5 4 3 2 1 0]
+test lsearch-9.3 {sorted ascii search} {
+ set res {}
+ foreach val {18b 21aa 22a 35 36 48 6a} {
+ lappend res [lsearch -sorted -ascii $increasingStrings $val]
+ }
+ set res
+} [list 0 1 2 3 4 5 6]
+test lsearch-9.4 {decreasing sorted ascii search} {
+ set res {}
+ foreach val {18b 21aa 22a 35 36 48 6a} {
+ lappend res [lsearch -decreasing -sorted -ascii \
+ $decreasingStrings $val]
+ }
+ set res
+} [list 6 5 4 3 2 1 0]
+
+
+
# cleanup
::tcltest::cleanupTests
return