From a406bb38849430633c8cfb33dceac73e8a7903c4 Mon Sep 17 00:00:00 2001 From: ericm Date: Tue, 9 May 2000 17:50:38 +0000 Subject: * tests/lsearch.test: * doc/lsearch.n: * generic/tclCmdIL.c (Tcl_LsearchObjCmd): Extended [lsearch] to support sorted list searching and typed list searching. [RFE: 4098]. --- doc/lsearch.n | 52 +++++++++-- generic/tclCmdIL.c | 249 ++++++++++++++++++++++++++++++++++++++++++++++------- tests/lsearch.test | 192 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 448 insertions(+), 45 deletions(-) diff --git a/doc/lsearch.n b/doc/lsearch.n index 39eddac..2e2024e 100644 --- a/doc/lsearch.n +++ b/doc/lsearch.n @@ -5,7 +5,7 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" RCS: @(#) $Id: lsearch.n,v 1.3 1999/06/24 21:15:13 jpeek Exp $ +'\" RCS: @(#) $Id: lsearch.n,v 1.4 2000/05/09 17:50:38 ericm Exp $ '\" .so man.macros .TH lsearch n 7.0 Tcl "Tcl Built-In Commands" @@ -14,33 +14,69 @@ .SH NAME lsearch \- See if a list contains a particular element .SH SYNOPSIS -\fBlsearch \fR?\fImode\fR? \fIlist pattern\fR +\fBlsearch \fR?\fIoptions\fR? \fIlist pattern\fR .BE .SH DESCRIPTION .PP This command searches the elements of \fIlist\fR to see if one -of them matches \fIpattern\fR. -If so, the command returns the index of the first matching -element. -If not, the command returns \fB\-1\fR. -The \fImode\fR argument indicates how the elements of the list are to +of them matches \fIpattern\fR. If so, the command returns the index +of the first matching element. If not, the command returns \fB\-1\fR. +The \fIoption\fR arguments indicates how the elements of the list are to be matched against \fIpattern\fR and it must have one of the following values: .TP +\fB\-ascii\fR +The list elements are to be examined as ASCII strings. This option is only +meaningful when used with \fB\-exact\fR or \fB\-sorted\fR. +.TP +\fB\-decreasing\fR +The list elements are sorted in decreasing order. This option is only +meaningful when used with \fB\-sorted\fR. +.TP +\fB\-dictionary\fR +The list elements are to be compared using dictionary-style +comparisons. This option is only meaningful when used with +\fB\-exact\fR or \fB\-sorted\fR. +.TP \fB\-exact\fR The list element must contain exactly the same string as \fIpattern\fR. .TP +\fB\-increasing\fR +The list elements are sorted in increasing order. This option is only +meaningful when used with \fB\-sorted\fR. +.TP +\fB\-integer\fR +The list elements are to be compared as integers. This option is only +meaningful when used with \fB\-exact\fR or \fB\-sorted\fR. +.TP \fB\-glob\fR \fIPattern\fR is a glob-style pattern which is matched against each list element using the same rules as the \fBstring match\fR command. .TP +\fB\-real\fR +The list elements are to be compared as floating-point values. This +option is only meaningful when used with \fB\-exact\fR or \fB\-sorted\fR. +.TP \fB\-regexp\fR \fIPattern\fR is treated as a regular expression and matched against each list element using the rules described in the \fBre_syntax\fR reference page. +.TP +\fB\-sorted\fR +The list elements are in sorted order. If this option is specified, +\fBlsearch\fR will use a more efficient searching algorithm to search +\fIlist\fR. If no other options are specified, \fIlist\fR is assumed +to be sorted in increasing order, and to contain ASCII strings. This +option cannot be used with \fB\-glob\fR or \fB\-regexp\fR. .PP -If \fImode\fR is omitted then it defaults to \fB\-glob\fR. +If \fIoption\fR is omitted then it defaults to \fB\-glob\fR. If more +than one of \fB\-exact\fR, \fB\-glob\fR, \fB\-regexp\fR, and +\fB\-sorted\fR is specified, whichever option is specified last takes +precendence. If more than one of \fB\-ascii\fR, \fB\-dictionary\fR, +\fB\-integer\fR and \fB\-real\fR is specified, the option specified +last takes precendence. If more than one of \fB\-increasing\fR and +\fB\-decreasing\fR is specified, the option specified last takes precedence. .SH KEYWORDS list, match, pattern, regular expression, search, string diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index 03aa7e4..d9a29b6 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -14,7 +14,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdIL.c,v 1.25 2000/05/08 22:21:15 hobbs Exp $ + * RCS: @(#) $Id: tclCmdIL.c,v 1.26 2000/05/09 17:50:38 ericm Exp $ */ #include "tclInt.h" @@ -2399,23 +2399,73 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv) { char *bytes, *patternBytes; int i, match, mode, index, result, listc, length, elemLen; + int dataType, isIncreasing, lower, upper, patInt, objInt; + double patDouble, objDouble; Tcl_Obj *patObj, **listv; static char *options[] = { - "-exact", "-glob", "-regexp", NULL + "-ascii", "-decreasing", "-dictionary", "-exact", "-increasing", + "-integer", "-glob", "-real", "-regexp", "-sorted", NULL }; enum options { - LSEARCH_EXACT, LSEARCH_GLOB, LSEARCH_REGEXP + LSEARCH_ASCII, LSEARCH_DECREASING, LSEARCH_DICTIONARY, LSEARCH_EXACT, + LSEARCH_INCREASING, LSEARCH_INTEGER, LSEARCH_GLOB, + LSEARCH_REAL, LSEARCH_REGEXP, LSEARCH_SORTED }; - mode = LSEARCH_GLOB; - if (objc == 4) { - if (Tcl_GetIndexFromObj(interp, objv[1], options, "search mode", 0, - &mode) != TCL_OK) { + enum datatypes { + ASCII, DICTIONARY, INTEGER, REAL + }; + + enum modes { + EXACT, GLOB, REGEXP, SORTED + }; + + mode = GLOB; + dataType = ASCII; + isIncreasing = 1; + + if (objc < 3) { + Tcl_WrongNumArgs(interp, 1, objv, "?options? list pattern"); + return TCL_ERROR; + } + + for (i = 1; i < objc-2; i++) { + if (Tcl_GetIndexFromObj(interp, objv[i], options, "option", 0, &index) + != TCL_OK) { return TCL_ERROR; } - } else if (objc != 3) { - Tcl_WrongNumArgs(interp, 1, objv, "?mode? list pattern"); - return TCL_ERROR; + switch ((enum options) index) { + case LSEARCH_ASCII: /* -ascii */ + dataType = ASCII; + break; + case LSEARCH_DECREASING: /* -decreasing */ + isIncreasing = 0; + break; + case LSEARCH_DICTIONARY: /* -dictionary */ + dataType = DICTIONARY; + break; + case LSEARCH_EXACT: /* -increasing */ + mode = EXACT; + break; + case LSEARCH_INCREASING: /* -increasing */ + isIncreasing = 1; + break; + case LSEARCH_INTEGER: /* -integer */ + dataType = INTEGER; + break; + case LSEARCH_GLOB: /* -glob */ + mode = GLOB; + break; + case LSEARCH_REAL: /* -real */ + dataType = REAL; + break; + case LSEARCH_REGEXP: /* -regexp */ + mode = REGEXP; + break; + case LSEARCH_SORTED: /* -sorted */ + mode = SORTED; + break; + } } /* @@ -2429,36 +2479,173 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv) } patObj = objv[objc - 1]; - patternBytes = Tcl_GetStringFromObj(patObj, &length); + patternBytes = NULL; + if ((enum modes) mode == EXACT || (enum modes) mode == SORTED) { + switch ((enum datatypes) dataType) { + case ASCII: + case DICTIONARY: + patternBytes = Tcl_GetStringFromObj(patObj, &length); + break; + case INTEGER: + result = Tcl_GetIntFromObj(interp, patObj, &patInt); + if (result != TCL_OK) { + return result; + } + break; + case REAL: + result = Tcl_GetDoubleFromObj(interp, patObj, &patDouble); + if (result != TCL_OK) { + return result; + } + break; + } + } else { + patternBytes = Tcl_GetStringFromObj(patObj, &length); + } + /* + * Set default index value to -1, indicating failure; if we find the + * item in the course of our search, index will be set to the correct + * value. + */ index = -1; - for (i = 0; i < listc; i++) { - match = 0; - switch ((enum options) mode) { - case LSEARCH_EXACT: { - bytes = Tcl_GetStringFromObj(listv[i], &elemLen); - if (length == elemLen) { - match = (memcmp(bytes, patternBytes, - (size_t) length) == 0); + match = 0; + if ((enum modes) mode == SORTED) { + /* If the data is sorted, we can do a more intelligent search */ + lower = -1; + upper = listc; + while (lower + 1 != upper) { + i = (lower + upper)/2; + switch ((enum datatypes) dataType) { + case ASCII: { + bytes = Tcl_GetString(listv[i]); + match = strcmp(patternBytes, bytes); + break; + } + case DICTIONARY: { + bytes = Tcl_GetString(listv[i]); + match = DictionaryCompare(patternBytes, bytes); + break; + } + case INTEGER: { + result = Tcl_GetIntFromObj(interp, listv[i], &objInt); + if (result != TCL_OK) { + return result; + } + if (patInt == objInt) { + match = 0; + } else if (patInt < objInt) { + match = -1; + } else { + match = 1; + } + break; + } + case REAL: { + result = Tcl_GetDoubleFromObj(interp, listv[i], + &objDouble); + if (result != TCL_OK) { + return result; + } + if (patDouble == objDouble) { + match = 0; + } else if (patDouble < objDouble) { + match = -1; + } else { + match = 1; + } + break; } - break; } - case LSEARCH_GLOB: { - match = Tcl_StringMatch(Tcl_GetString(listv[i]), patternBytes); - break; + if (match == 0) { + /* + * Normally, binary search is written to stop when it + * finds a match. If there are duplicates of an element in + * the list, our first match might not be the first occurance. + * Consider: 0 0 0 1 1 1 2 2 2 + * To maintain consistancy with standard lsearch semantics, + * we must find the leftmost occurance of the pattern in the + * list. Thus we don't just stop searching here. This + * variation means that a search always makes log n + * comparisons (normal binary search might "get lucky" with + * an early comparison). + */ + index = i; + upper = i; + } else if (match > 0) { + if (isIncreasing) { + lower = i; + } else { + upper = i; + } + } else { + if (isIncreasing) { + upper = i; + } else { + lower = i; + } } - case LSEARCH_REGEXP: { - match = Tcl_RegExpMatchObj(interp, listv[i], patObj); - if (match < 0) { - return TCL_ERROR; + } + } else { + for (i = 0; i < listc; i++) { + match = 0; + switch ((enum modes) mode) { + case SORTED: + case EXACT: { + switch ((enum datatypes) dataType) { + case ASCII: { + bytes = Tcl_GetStringFromObj(listv[i], &elemLen); + if (length == elemLen) { + match = (memcmp(bytes, patternBytes, + (size_t) length) == 0); + } + break; + } + case DICTIONARY: { + bytes = Tcl_GetString(listv[i]); + match = + (DictionaryCompare(bytes, patternBytes) == 0); + break; + } + case INTEGER: { + result = Tcl_GetIntFromObj(interp, listv[i], + &objInt); + if (result != TCL_OK) { + return result; + } + match = (objInt == patInt); + break; + } + case REAL: { + result = Tcl_GetDoubleFromObj(interp, listv[i], + &objDouble); + if (result != TCL_OK) { + return result; + } + match = (objDouble == patDouble); + break; + } + } + break; + } + case GLOB: { + match = Tcl_StringMatch(Tcl_GetString(listv[i]), + patternBytes); + break; + } + case REGEXP: { + match = Tcl_RegExpMatchObj(interp, listv[i], patObj); + if (match < 0) { + return TCL_ERROR; + } + break; } + } + if (match != 0) { + index = i; break; } } - if (match != 0) { - index = i; - break; - } } Tcl_SetIntObj(Tcl_GetObjResult(interp), index); return TCL_OK; diff --git a/tests/lsearch.test b/tests/lsearch.test index a5ba1b7..275a569 100644 --- a/tests/lsearch.test +++ b/tests/lsearch.test @@ -11,7 +11,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: lsearch.test,v 1.5 2000/04/10 17:19:01 ericm Exp $ +# RCS: @(#) $Id: lsearch.test,v 1.6 2000/05/09 17:50:39 ericm Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -61,20 +61,20 @@ test lsearch-2.9 {search modes} { } 1 test lsearch-2.10 {search modes} { list [catch {lsearch -glib {b.x bx xy bcx} b.x} msg] $msg -} {1 {bad search mode "-glib": must be -exact, -glob, or -regexp}} +} {1 {bad option "-glib": must be -ascii, -decreasing, -dictionary, -exact, -increasing, -integer, -glob, -real, -regexp, or -sorted}} test lsearch-3.1 {lsearch errors} { list [catch lsearch msg] $msg -} {1 {wrong # args: should be "lsearch ?mode? list pattern"}} +} {1 {wrong # args: should be "lsearch ?options? list pattern"}} test lsearch-3.2 {lsearch errors} { list [catch {lsearch a} msg] $msg -} {1 {wrong # args: should be "lsearch ?mode? list pattern"}} +} {1 {wrong # args: should be "lsearch ?options? list pattern"}} test lsearch-3.3 {lsearch errors} { list [catch {lsearch a b c} msg] $msg -} {1 {bad search mode "a": must be -exact, -glob, or -regexp}} +} {1 {bad option "a": must be -ascii, -decreasing, -dictionary, -exact, -increasing, -integer, -glob, -real, -regexp, or -sorted}} test lsearch-3.4 {lsearch errors} { list [catch {lsearch a b c d} msg] $msg -} {1 {wrong # args: should be "lsearch ?mode? list pattern"}} +} {1 {bad option "a": must be -ascii, -decreasing, -dictionary, -exact, -increasing, -integer, -glob, -real, -regexp, or -sorted}} test lsearch-3.5 {lsearch errors} { list [catch {lsearch "\{" b} msg] $msg } {1 {unmatched open brace in list}} @@ -89,6 +89,186 @@ test lsearch-4.2 {binary data} { lsearch -exact [list foo one\000two bar] $x } 1 +# Make a sorted list +set l {} +set l2 {} +for {set i 0} {$i < 100} {incr i} { + lappend l $i + lappend l2 [expr {double($i)/2}] +} +set increasingIntegers [lsort -integer $l] +set decreasingIntegers [lsort -decreasing -integer $l] +set increasingDoubles [lsort -real $l2] +set decreasingDoubles [lsort -decreasing -real $l2] +set increasingStrings [lsort {48 6a 18b 22a 21aa 35 36}] +set decreasingStrings [lsort -decreasing {48 6a 18b 22a 21aa 35 36}] +set increasingDictionary [lsort -dictionary {48 6a 18b 22a 21aa 35 36}] +set decreasingDictionary [lsort -dictionary -decreasing $increasingDictionary] + +set l {} +for {set i 0} {$i < 10} {incr i} { + lappend l $i $i $i $i $i +} +set repeatingIncreasingIntegers [lsort -integer $l] +set repeatingDecreasingIntegers [lsort -integer -decreasing $l] + +test lsearch-5.1 {binary search} { + set res {} + for {set i 0} {$i < 100} {incr i} { + lappend res [lsearch -integer -sorted $increasingIntegers $i] + } + set res +} $increasingIntegers +test lsearch-5.2 {binary search} { + set res {} + for {set i 0} {$i < 100} {incr i} { + lappend res [lsearch -integer -decreasing -sorted \ + $decreasingIntegers $i] + } + set res +} $decreasingIntegers +test lsearch-5.3 {binary search finds leftmost occurances} { + set res {} + for {set i 0} {$i < 10} {incr i} { + lappend res [lsearch -integer -sorted $repeatingIncreasingIntegers $i] + } + set res +} [list 0 5 10 15 20 25 30 35 40 45] +test lsearch-5.4 {binary search -decreasing finds leftmost occurances} { + set res {} + for {set i 9} {$i >= 0} {incr i -1} { + lappend res [lsearch -sorted -integer -decreasing \ + $repeatingDecreasingIntegers $i] + } + set res +} [list 0 5 10 15 20 25 30 35 40 45] + +test lsearch-6.1 {integer search} { + set res {} + for {set i 0} {$i < 100} {incr i} { + lappend res [lsearch -exact -integer $increasingIntegers $i] + } + set res +} [lrange $increasingIntegers 0 99] +test lsearch-6.2 {decreasing integer search} { + set res {} + for {set i 0} {$i < 100} {incr i} { + lappend res [lsearch -exact -integer -decreasing \ + $decreasingIntegers $i] + } + set res +} [lrange $decreasingIntegers 0 99] +test lsearch-6.3 {sorted integer search} { + set res {} + for {set i 0} {$i < 100} {incr i} { + lappend res [lsearch -sorted -integer $increasingIntegers $i] + } + set res +} [lrange $increasingIntegers 0 99] +test lsearch-6.4 {sorted decreasing integer search} { + set res {} + for {set i 0} {$i < 100} {incr i} { + lappend res [lsearch -integer -sorted -decreasing \ + $decreasingIntegers $i] + } + set res +} [lrange $decreasingIntegers 0 99] + +test lsearch-7.1 {double search} { + set res {} + for {set i 0} {$i < 100} {incr i} { + lappend res [lsearch -exact -real $increasingDoubles \ + [expr {double($i)/2}]] + } + set res +} [lrange $increasingIntegers 0 99] +test lsearch-7.2 {decreasing double search} { + set res {} + for {set i 0} {$i < 100} {incr i} { + lappend res [lsearch -exact -real -decreasing \ + $decreasingDoubles [expr {double($i)/2}]] + } + set res +} [lrange $decreasingIntegers 0 99] +test lsearch-7.3 {sorted double search} { + set res {} + for {set i 0} {$i < 100} {incr i} { + lappend res [lsearch -sorted -real \ + $increasingDoubles [expr {double($i)/2}]] + } + set res +} [lrange $increasingIntegers 0 99] +test lsearch-7.4 {sorted decreasing double search} { + set res {} + for {set i 0} {$i < 100} {incr i} { + lappend res [lsearch -sorted -real -decreasing \ + $decreasingDoubles [expr {double($i)/2}]] + } + set res +} [lrange $decreasingIntegers 0 99] + +test lsearch-8.1 {dictionary search} { + set res {} + foreach val {6a 18b 21aa 22a 35 36 48} { + lappend res [lsearch -exact -dictionary $increasingDictionary $val] + } + set res +} [list 0 1 2 3 4 5 6] +test lsearch-8.2 {decreasing dictionary search} { + set res {} + foreach val {6a 18b 21aa 22a 35 36 48} { + lappend res [lsearch -exact -dictionary $decreasingDictionary $val] + } + set res +} [list 6 5 4 3 2 1 0] +test lsearch-8.3 {sorted dictionary search} { + set res {} + foreach val {6a 18b 21aa 22a 35 36 48} { + lappend res [lsearch -sorted -dictionary $increasingDictionary $val] + } + set res +} [list 0 1 2 3 4 5 6] +test lsearch-8.4 {decreasing sorted dictionary search} { + set res {} + foreach val {6a 18b 21aa 22a 35 36 48} { + lappend res [lsearch -decreasing -sorted -dictionary \ + $decreasingDictionary $val] + } + set res +} [list 6 5 4 3 2 1 0] + +test lsearch-9.1 {ascii search} { + set res {} + foreach val {18b 21aa 22a 35 36 48 6a} { + lappend res [lsearch -exact -ascii $increasingStrings $val] + } + set res +} [list 0 1 2 3 4 5 6] +test lsearch-9.2 {decreasing ascii search} { + set res {} + foreach val {18b 21aa 22a 35 36 48 6a} { + lappend res [lsearch -exact -ascii $decreasingStrings $val] + } + set res +} [list 6 5 4 3 2 1 0] +test lsearch-9.3 {sorted ascii search} { + set res {} + foreach val {18b 21aa 22a 35 36 48 6a} { + lappend res [lsearch -sorted -ascii $increasingStrings $val] + } + set res +} [list 0 1 2 3 4 5 6] +test lsearch-9.4 {decreasing sorted ascii search} { + set res {} + foreach val {18b 21aa 22a 35 36 48 6a} { + lappend res [lsearch -decreasing -sorted -ascii \ + $decreasingStrings $val] + } + set res +} [list 6 5 4 3 2 1 0] + + + # cleanup ::tcltest::cleanupTests return -- cgit v0.12