Implementation of TIP#241 from Joe Mistachkin

Also compilation of [switch -glob -nocase] from Donal Fellows
author: dkf <donal.k.fellows@manchester.ac.uk> 2005-06-01 11:00:24 (GMT)
committer: dkf <donal.k.fellows@manchester.ac.uk> 2005-06-01 11:00:24 (GMT)
commit: 854f85bb1700aa6f106cc6a443cb0eb2e917f2de (patch)
tree: d3bbe95a2b84f8455477e5d9e709e78633b6d7bd /generic
parent: 8f397c357860e5098e4eeea5140ed0d3c724075d (diff)
download: tcl-854f85bb1700aa6f106cc6a443cb0eb2e917f2de.zip
tcl-854f85bb1700aa6f106cc6a443cb0eb2e917f2de.tar.gz
tcl-854f85bb1700aa6f106cc6a443cb0eb2e917f2de.tar.bz2
3 files changed, 109 insertions, 53 deletions
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index b68f7ba..883ed45 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -16,7 +16,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCmdIL.c,v 1.75 2005/05/30 00:04:46 dkf Exp $
+ * RCS: @(#) $Id: tclCmdIL.c,v 1.76 2005/06/01 11:00:34 dkf Exp $
  */
 
 #include "tclInt.h"
@@ -36,6 +36,15 @@ typedef struct SortElement {
 } SortElement;
 
 /*
+ * These function pointer types are used with the "lsearch" and "lsort"
+ * commands to facilitate the "-nocase" option.
+ */
+
+typedef int (*SortStrCmpFn_t) _ANSI_ARGS_((const char *, const char *));
+typedef int (*SortMemCmpFn_t) _ANSI_ARGS_((const void *, const void *,
+			    size_t));
+
+/*
  * The "lsort" command needs to pass certain information down to the
  * function that compares two list elements, and the comparison function
  * needs to pass success or failure information back up to the top-level
@@ -47,6 +56,8 @@ typedef struct SortInfo {
     int isIncreasing;		/* Nonzero means sort in increasing order. */
     int sortMode;		/* The sort mode.  One of SORTMODE_*
 				 * values defined below */
+    SortStrCmpFn_t strCmpFn;     /* Basic string compare command (used with
+				 * ASCII mode). */
     Tcl_Obj *compareCmdPtr;     /* The Tcl comparison command when sortMode
 				 * is SORTMODE_COMMAND.  Pre-initialized to
 				 * hold base of command.*/
@@ -3201,7 +3212,7 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv)
     char *bytes, *patternBytes;
     int i, match, mode, index, result, listc, length, elemLen;
     int dataType, isIncreasing, lower, upper, patInt, objInt;
-    int offset, allMatches, inlineReturn, negatedMatch, returnSubindices;
+    int offset, allMatches, inlineReturn, negatedMatch, returnSubindices, noCase;
     double patDouble, objDouble;
     SortInfo sortInfo;
     Tcl_Obj *patObj, **listv, *listPtr, *startPtr, *itemPtr;
@@ -3209,15 +3220,17 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv)
     static CONST char *options[] = {
 	"-all",	    "-ascii",   "-decreasing", "-dictionary",
 	"-exact",   "-glob",    "-increasing", "-index",
-	"-inline",  "-integer", "-not",        "-real",
-	"-regexp",  "-sorted",  "-start",      "-subindices",
+	"-inline",  "-integer", "-nocase",     "-not",
+	"-real",    "-regexp",  "-sorted",     "-start",
+	"-subindices",
 	NULL
     };
     enum options {
 	LSEARCH_ALL, LSEARCH_ASCII, LSEARCH_DECREASING, LSEARCH_DICTIONARY,
 	LSEARCH_EXACT, LSEARCH_GLOB, LSEARCH_INCREASING, LSEARCH_INDEX,
-	LSEARCH_INLINE, LSEARCH_INTEGER, LSEARCH_NOT, LSEARCH_REAL,
-	LSEARCH_REGEXP, LSEARCH_SORTED, LSEARCH_START, LSEARCH_SUBINDICES
+	LSEARCH_INLINE, LSEARCH_INTEGER, LSEARCH_NOCASE, LSEARCH_NOT,
+	LSEARCH_REAL, LSEARCH_REGEXP, LSEARCH_SORTED, LSEARCH_START,
+	LSEARCH_SUBINDICES
     };
     enum datatypes {
 	ASCII, DICTIONARY, INTEGER, REAL
@@ -3225,6 +3238,7 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv)
     enum modes {
 	EXACT, GLOB, REGEXP, SORTED
     };
+    SortStrCmpFn_t strCmpFn = strcmp;
 
     mode = GLOB;
     dataType = ASCII;
@@ -3236,6 +3250,7 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv)
     listPtr = NULL;
     startPtr = NULL;
     offset = 0;
+    noCase = 0;
     sortInfo.compareCmdPtr = NULL;
     sortInfo.isIncreasing = 0;
     sortInfo.sortMode = 0;
@@ -3288,6 +3303,10 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv)
 	case LSEARCH_INTEGER:		/* -integer */
 	    dataType = INTEGER;
 	    break;
+	case LSEARCH_NOCASE:		/* -nocase */
+	    strCmpFn = strcasecmp;
+	    noCase = 1;
+	    break;
 	case LSEARCH_NOT:		/* -not */
 	    negatedMatch = 1;
 	    break;
@@ -3422,7 +3441,8 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv)
 	 * regexp rep before the list rep.
 	 */
 	regexp = Tcl_GetRegExpFromObj(interp, objv[objc - 1],
-		TCL_REG_ADVANCED | TCL_REG_NOSUB);
+		TCL_REG_ADVANCED | TCL_REG_NOSUB |
+		(noCase ? TCL_REG_NOCASE : 0));
 	if (regexp == NULL) {
 	    if (startPtr != NULL) {
 		Tcl_DecrRefCount(startPtr);
@@ -3531,7 +3551,7 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv)
 	    switch ((enum datatypes) dataType) {
 	    case ASCII:
 		bytes = TclGetString(itemPtr);
-		match = strcmp(patternBytes, bytes);
+		match = strCmpFn(patternBytes, bytes);
 		break;
 	    case DICTIONARY:
 		bytes = TclGetString(itemPtr);
@@ -3629,8 +3649,16 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv)
 		case ASCII:
 		    bytes = Tcl_GetStringFromObj(itemPtr, &elemLen);
 		    if (length == elemLen) {
-			match = (memcmp(bytes, patternBytes,
-				(size_t) length) == 0);
+			/*
+			 * This split allows for more optimal
+			 * compilation of memcmp
+			 */
+			if (noCase) {
+			    match = (strcasecmp(bytes, patternBytes) == 0);
+			} else {
+			    match = (memcmp(bytes, patternBytes,
+				    (size_t) length) == 0);
+			}
 		    }
 		    break;
 		case DICTIONARY:
@@ -3669,7 +3697,8 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv)
 		break;
 
 	    case GLOB:
-		match = Tcl_StringMatch(TclGetString(itemPtr), patternBytes);
+		match = Tcl_StringCaseMatch(TclGetString(itemPtr),
+			patternBytes, noCase);
 		break;
 	    case REGEXP:
 		match = Tcl_RegExpExecObj(interp, regexp, itemPtr, 0, 0, 0);
@@ -3871,12 +3900,13 @@ Tcl_LsortObjCmd(clientData, interp, objc, objv)
 					 * comparison function */
     static CONST char *switches[] = {
 	"-ascii", "-command", "-decreasing", "-dictionary", "-increasing",
-	"-index", "-indices", "-integer", "-real", "-unique", (char *) NULL
+	"-index", "-indices", "-integer", "-nocase", "-real", "-unique",
+	(char *) NULL
     };
     enum Lsort_Switches {
 	LSORT_ASCII, LSORT_COMMAND, LSORT_DECREASING, LSORT_DICTIONARY,
 	LSORT_INCREASING, LSORT_INDEX, LSORT_INDICES, LSORT_INTEGER,
-	LSORT_REAL, LSORT_UNIQUE
+	LSORT_NOCASE, LSORT_REAL, LSORT_UNIQUE
     };
 
     if (objc < 2) {
@@ -3890,6 +3920,7 @@ Tcl_LsortObjCmd(clientData, interp, objc, objv)
 
     sortInfo.isIncreasing = 1;
     sortInfo.sortMode = SORTMODE_ASCII;
+    sortInfo.strCmpFn = strcmp;
     sortInfo.indexv = NULL;
     sortInfo.indexc = 0;
     sortInfo.interp = interp;
@@ -3988,6 +4019,9 @@ Tcl_LsortObjCmd(clientData, interp, objc, objv)
 	case LSORT_INTEGER:
 	    sortInfo.sortMode = SORTMODE_INTEGER;
 	    break;
+	case LSORT_NOCASE:
+	    sortInfo.strCmpFn = strcasecmp;
+	    break;
 	case LSORT_REAL:
 	    sortInfo.sortMode = SORTMODE_REAL;
 	    break;
@@ -4262,7 +4296,7 @@ SortCompare(objPtr1, objPtr2, infoPtr)
     }
 
     if (infoPtr->sortMode == SORTMODE_ASCII) {
-	order = strcmp(TclGetString(objPtr1), TclGetString(objPtr2));
+	order = infoPtr->strCmpFn(TclGetString(objPtr1), TclGetString(objPtr2));
     } else if (infoPtr->sortMode == SORTMODE_DICTIONARY) {
 	order = DictionaryCompare(
 		TclGetString(objPtr1), TclGetString(objPtr2));
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index b507fe5..99182f3 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -15,7 +15,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.122 2005/05/25 19:27:10 hobbs Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.123 2005/06/01 11:00:34 dkf Exp $
  */
 
 #include "tclInt.h"
@@ -2520,7 +2520,7 @@ Tcl_SwitchObjCmd(dummy, interp, objc, objv)
     int objc;			/* Number of arguments. */
     Tcl_Obj *CONST objv[];	/* Argument objects. */
 {
-    int i, j, index, mode, result, splitObjs, numMatchesSaved;
+    int i, j, index, mode, result, splitObjs, numMatchesSaved, noCase;
     char *pattern;
     Tcl_Obj *stringObj, *indexVarObj, *matchVarObj;
     Tcl_Obj *CONST *savedObjv = objv;
@@ -2531,17 +2531,21 @@ Tcl_SwitchObjCmd(dummy, interp, objc, objv)
      * parser as well.
      */
     static CONST char *options[] = {
-	"-exact", "-glob", "-indexvar", "-matchvar", "-regexp", "--", 
-	NULL
+	"-exact", "-glob", "-indexvar", "-matchvar", "-nocase", "-regexp",
+	"--", NULL
     };
     enum options {
-	OPT_EXACT, OPT_GLOB, OPT_INDEXV, OPT_MATCHV, OPT_REGEXP, OPT_LAST
+	OPT_EXACT, OPT_GLOB, OPT_INDEXV, OPT_MATCHV, OPT_NOCASE, OPT_REGEXP,
+	OPT_LAST
     };
+    typedef int (*strCmpFn_t) _ANSI_ARGS_((const char *, const char *));
+    strCmpFn_t strCmpFn = strcmp;
 
     mode = OPT_EXACT;
     indexVarObj = NULL;
     matchVarObj = NULL;
     numMatchesSaved = 0;
+    noCase = 0;
     for (i = 1; i < objc; i++) {
 	if (TclGetString(objv[i])[0] != '-') {
 	    break;
@@ -2580,6 +2584,9 @@ Tcl_SwitchObjCmd(dummy, interp, objc, objv)
 	    }
 	    matchVarObj = objv[i];
 	    numMatchesSaved = -1;
+	} else if (index == OPT_NOCASE) {
+	    strCmpFn = strcasecmp;
+	    noCase = 1;
 	} else {
 	    mode = index;
 	}
@@ -2718,18 +2725,19 @@ Tcl_SwitchObjCmd(dummy, interp, objc, objv)
 	} else {
 	    switch (mode) {
 	    case OPT_EXACT:
-		if (strcmp(TclGetString(stringObj), pattern) == 0) {
+		if (strCmpFn(TclGetString(stringObj), pattern) == 0) {
 		    goto matchFound;
 		}
 		break;
 	    case OPT_GLOB:
-		if (Tcl_StringMatch(TclGetString(stringObj), pattern)) {
+		if (Tcl_StringCaseMatch(TclGetString(stringObj), pattern,
+			noCase)) {
 		    goto matchFound;
 		}
 		break;
 	    case OPT_REGEXP:
 		regExpr = Tcl_GetRegExpFromObj(interp, objv[i],
-			TCL_REG_ADVANCED);
+			TCL_REG_ADVANCED | (noCase ? TCL_REG_NOCASE : 0));
 		if (regExpr == NULL) {
 		    return TCL_ERROR;
 		} else {
diff --git a/generic/tclCompCmds.c b/generic/tclCompCmds.c
index c412329..9598d84 100644
--- a/generic/tclCompCmds.c
+++ b/generic/tclCompCmds.c
@@ -11,7 +11,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCompCmds.c,v 1.69 2005/06/01 10:02:19 dkf Exp $
+ * RCS: @(#) $Id: tclCompCmds.c,v 1.70 2005/06/01 11:00:35 dkf Exp $
  */
 
 #include "tclInt.h"
@@ -2723,6 +2723,7 @@ TclCompileSwitchCmd(interp, parsePtr, envPtr)
 				 * to the current (or next) real body. */
 
     int savedStackDepth = envPtr->currStackDepth;
+    int noCase;
     int i;
 
     /*
@@ -2753,50 +2754,55 @@ TclCompileSwitchCmd(interp, parsePtr, envPtr)
      * interpreted code falls apart if it encounters them, so we punt
      * if we *might* encounter them as that is the easiest way of
      * emulating the behaviour).
-     *
-     * Note that this parsing would probably be better done with a
-     * loop, but it works for now...
      */
 
-    if (tokenPtr->type != TCL_TOKEN_SIMPLE_WORD) {
-	return TCL_ERROR;
-    } else {
-	register int size = tokenPtr[1].size;
+    noCase = 0;
+    mode = Switch_Exact;
+    for (; numWords>=3 ; tokenPtr+=2,numWords--) {
+	register unsigned size = tokenPtr[1].size;
 	register CONST char *chrs = tokenPtr[1].start;
 
 	/*
-	 * Assume that -e and -g are unique prefixes of -exact and -glob
+	 * We only process literal options, and we assume that -e, -g
+	 * and -n are unique prefixes of -exact, -glob and -nocase
+	 * respectively (true at time of writing).
 	 */
-	if (size < 2) {
+	if (tokenPtr->type != TCL_TOKEN_SIMPLE_WORD || size < 2) {
 	    return TCL_ERROR;
 	}
-	if ((size <= 6) && (numWords >= 4)
-		&& !strncmp(chrs, "-exact", (unsigned) TclMin(size, 6))) {
+
+	if ((size <= 6) && !memcmp(chrs, "-exact", size)) {
 	    mode = Switch_Exact;
-	    tokenPtr += 2;
-	    numWords--;
-	} else if ((size <= 5) && (numWords >= 4)
-		&& !strncmp(chrs, "-glob", (unsigned) TclMin(size, 5))) {
+	    continue;
+	} else if ((size <= 5) && !memcmp(chrs, "-glob", size)) {
 	    mode = Switch_Glob;
-	    tokenPtr += 2;
-	    numWords--;
-	} else if ((size == 2) && (numWords >= 3) && !strncmp(chrs, "--", 2)) {
-	    /*
-	     * If no control flag present, use exact matching (the default).
-	     *
-	     * We end up re-checking this word, but that's the way things are.
-	     */
-	    mode = Switch_Exact;
-	} else {
-	    return TCL_ERROR;
+	    continue;
+	} else if ((size <= 7) && !memcmp(chrs, "-nocase", size)) {
+	    noCase = 1;
+	    continue;
+	} else if ((size == 2) && !memcmp(chrs, "--", 2)) {
+	    break;
 	}
+
+	/*
+	 * The switch command has many flags we cannot compile at all
+	 * (e.g. all the RE-related ones) which we must have
+	 * encountered. Either that or we have run off the end. The
+	 * action here is the same: punt to interpreted version.
+	 */
+	return TCL_ERROR;
     }
-    if ((tokenPtr->type != TCL_TOKEN_SIMPLE_WORD) || (tokenPtr[1].size != 2)
-	    || strncmp(tokenPtr[1].start, "--", 2)) {
+    if (numWords < 3) {
 	return TCL_ERROR;
     }
     tokenPtr += 2;
     numWords--;
+    if (noCase && (mode == Switch_Exact)) {
+	/*
+	 * Can't compile this case!
+	 */
+	return TCL_ERROR;
+    }
 
     /*
      * The value to test against is going to always get pushed on the
@@ -2902,6 +2908,14 @@ TclCompileSwitchCmd(interp, parsePtr, envPtr)
 	    return TCL_ERROR;
 	}
     } else if (numWords % 2 || numWords == 0) {
+	/*
+	 * Odd number of words (>1) available, or no words at all
+	 * available. Both are error cases, so punt and let the
+	 * interpreted-version generate the error message. Note that
+	 * the second case probably should get caught earlier, but
+	 * it's easy to check here again anyway because it'd cause a
+	 * nasty crash otherwise.
+	 */
 	return TCL_ERROR;
     } else {
 	bodyToken = (Tcl_Token **) ckalloc(sizeof(Tcl_Token *) * numWords);
@@ -2962,7 +2976,7 @@ TclCompileSwitchCmd(interp, parsePtr, envPtr)
 	int nextArmFixupIndex = -1;
 	envPtr->currStackDepth = savedStackDepth + 1;
 	if (i!=numWords-2 || bodyToken[numWords-2]->size != 7 ||
-		strncmp(bodyToken[numWords-2]->start, "default", 7)) {
+		memcmp(bodyToken[numWords-2]->start, "default", 7)) {
 	    /*
 	     * Generate the test for the arm. This code is slightly
 	     * inefficient, but much simpler than the first version.
@@ -2975,7 +2989,7 @@ TclCompileSwitchCmd(interp, parsePtr, envPtr)
 		TclEmitOpcode(INST_STR_EQ, envPtr);
 		break;
 	    case Switch_Glob:
-		TclEmitInstInt1(INST_STR_MATCH, /*nocase*/0, envPtr);
+		TclEmitInstInt1(INST_STR_MATCH, noCase, envPtr);
 		break;
 	    default:
 		Tcl_Panic("unknown switch mode: %d",mode);
author	dkf <donal.k.fellows@manchester.ac.uk>	2005-06-01 11:00:24 (GMT)
committer	dkf <donal.k.fellows@manchester.ac.uk>	2005-06-01 11:00:24 (GMT)
commit	854f85bb1700aa6f106cc6a443cb0eb2e917f2de (patch)
tree	d3bbe95a2b84f8455477e5d9e709e78633b6d7bd /generic
parent	8f397c357860e5098e4eeea5140ed0d3c724075d (diff)
download	tcl-854f85bb1700aa6f106cc6a443cb0eb2e917f2de.zip tcl-854f85bb1700aa6f106cc6a443cb0eb2e917f2de.tar.gz tcl-854f85bb1700aa6f106cc6a443cb0eb2e917f2de.tar.bz2