diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2005-06-01 11:00:24 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2005-06-01 11:00:24 (GMT) |
commit | 854f85bb1700aa6f106cc6a443cb0eb2e917f2de (patch) | |
tree | d3bbe95a2b84f8455477e5d9e709e78633b6d7bd /generic | |
parent | 8f397c357860e5098e4eeea5140ed0d3c724075d (diff) | |
download | tcl-854f85bb1700aa6f106cc6a443cb0eb2e917f2de.zip tcl-854f85bb1700aa6f106cc6a443cb0eb2e917f2de.tar.gz tcl-854f85bb1700aa6f106cc6a443cb0eb2e917f2de.tar.bz2 |
Implementation of TIP#241 from Joe Mistachkin
Also compilation of [switch -glob -nocase] from Donal Fellows
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclCmdIL.c | 62 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 24 | ||||
-rw-r--r-- | generic/tclCompCmds.c | 76 |
3 files changed, 109 insertions, 53 deletions
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index b68f7ba..883ed45 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -16,7 +16,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdIL.c,v 1.75 2005/05/30 00:04:46 dkf Exp $ + * RCS: @(#) $Id: tclCmdIL.c,v 1.76 2005/06/01 11:00:34 dkf Exp $ */ #include "tclInt.h" @@ -36,6 +36,15 @@ typedef struct SortElement { } SortElement; /* + * These function pointer types are used with the "lsearch" and "lsort" + * commands to facilitate the "-nocase" option. + */ + +typedef int (*SortStrCmpFn_t) _ANSI_ARGS_((const char *, const char *)); +typedef int (*SortMemCmpFn_t) _ANSI_ARGS_((const void *, const void *, + size_t)); + +/* * The "lsort" command needs to pass certain information down to the * function that compares two list elements, and the comparison function * needs to pass success or failure information back up to the top-level @@ -47,6 +56,8 @@ typedef struct SortInfo { int isIncreasing; /* Nonzero means sort in increasing order. */ int sortMode; /* The sort mode. One of SORTMODE_* * values defined below */ + SortStrCmpFn_t strCmpFn; /* Basic string compare command (used with + * ASCII mode). */ Tcl_Obj *compareCmdPtr; /* The Tcl comparison command when sortMode * is SORTMODE_COMMAND. Pre-initialized to * hold base of command.*/ @@ -3201,7 +3212,7 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv) char *bytes, *patternBytes; int i, match, mode, index, result, listc, length, elemLen; int dataType, isIncreasing, lower, upper, patInt, objInt; - int offset, allMatches, inlineReturn, negatedMatch, returnSubindices; + int offset, allMatches, inlineReturn, negatedMatch, returnSubindices, noCase; double patDouble, objDouble; SortInfo sortInfo; Tcl_Obj *patObj, **listv, *listPtr, *startPtr, *itemPtr; @@ -3209,15 +3220,17 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv) static CONST char *options[] = { "-all", "-ascii", "-decreasing", "-dictionary", "-exact", "-glob", "-increasing", "-index", - "-inline", "-integer", "-not", "-real", - "-regexp", "-sorted", "-start", "-subindices", + "-inline", "-integer", "-nocase", "-not", + "-real", "-regexp", "-sorted", "-start", + "-subindices", NULL }; enum options { LSEARCH_ALL, LSEARCH_ASCII, LSEARCH_DECREASING, LSEARCH_DICTIONARY, LSEARCH_EXACT, LSEARCH_GLOB, LSEARCH_INCREASING, LSEARCH_INDEX, - LSEARCH_INLINE, LSEARCH_INTEGER, LSEARCH_NOT, LSEARCH_REAL, - LSEARCH_REGEXP, LSEARCH_SORTED, LSEARCH_START, LSEARCH_SUBINDICES + LSEARCH_INLINE, LSEARCH_INTEGER, LSEARCH_NOCASE, LSEARCH_NOT, + LSEARCH_REAL, LSEARCH_REGEXP, LSEARCH_SORTED, LSEARCH_START, + LSEARCH_SUBINDICES }; enum datatypes { ASCII, DICTIONARY, INTEGER, REAL @@ -3225,6 +3238,7 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv) enum modes { EXACT, GLOB, REGEXP, SORTED }; + SortStrCmpFn_t strCmpFn = strcmp; mode = GLOB; dataType = ASCII; @@ -3236,6 +3250,7 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv) listPtr = NULL; startPtr = NULL; offset = 0; + noCase = 0; sortInfo.compareCmdPtr = NULL; sortInfo.isIncreasing = 0; sortInfo.sortMode = 0; @@ -3288,6 +3303,10 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv) case LSEARCH_INTEGER: /* -integer */ dataType = INTEGER; break; + case LSEARCH_NOCASE: /* -nocase */ + strCmpFn = strcasecmp; + noCase = 1; + break; case LSEARCH_NOT: /* -not */ negatedMatch = 1; break; @@ -3422,7 +3441,8 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv) * regexp rep before the list rep. */ regexp = Tcl_GetRegExpFromObj(interp, objv[objc - 1], - TCL_REG_ADVANCED | TCL_REG_NOSUB); + TCL_REG_ADVANCED | TCL_REG_NOSUB | + (noCase ? TCL_REG_NOCASE : 0)); if (regexp == NULL) { if (startPtr != NULL) { Tcl_DecrRefCount(startPtr); @@ -3531,7 +3551,7 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv) switch ((enum datatypes) dataType) { case ASCII: bytes = TclGetString(itemPtr); - match = strcmp(patternBytes, bytes); + match = strCmpFn(patternBytes, bytes); break; case DICTIONARY: bytes = TclGetString(itemPtr); @@ -3629,8 +3649,16 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv) case ASCII: bytes = Tcl_GetStringFromObj(itemPtr, &elemLen); if (length == elemLen) { - match = (memcmp(bytes, patternBytes, - (size_t) length) == 0); + /* + * This split allows for more optimal + * compilation of memcmp + */ + if (noCase) { + match = (strcasecmp(bytes, patternBytes) == 0); + } else { + match = (memcmp(bytes, patternBytes, + (size_t) length) == 0); + } } break; case DICTIONARY: @@ -3669,7 +3697,8 @@ Tcl_LsearchObjCmd(clientData, interp, objc, objv) break; case GLOB: - match = Tcl_StringMatch(TclGetString(itemPtr), patternBytes); + match = Tcl_StringCaseMatch(TclGetString(itemPtr), + patternBytes, noCase); break; case REGEXP: match = Tcl_RegExpExecObj(interp, regexp, itemPtr, 0, 0, 0); @@ -3871,12 +3900,13 @@ Tcl_LsortObjCmd(clientData, interp, objc, objv) * comparison function */ static CONST char *switches[] = { "-ascii", "-command", "-decreasing", "-dictionary", "-increasing", - "-index", "-indices", "-integer", "-real", "-unique", (char *) NULL + "-index", "-indices", "-integer", "-nocase", "-real", "-unique", + (char *) NULL }; enum Lsort_Switches { LSORT_ASCII, LSORT_COMMAND, LSORT_DECREASING, LSORT_DICTIONARY, LSORT_INCREASING, LSORT_INDEX, LSORT_INDICES, LSORT_INTEGER, - LSORT_REAL, LSORT_UNIQUE + LSORT_NOCASE, LSORT_REAL, LSORT_UNIQUE }; if (objc < 2) { @@ -3890,6 +3920,7 @@ Tcl_LsortObjCmd(clientData, interp, objc, objv) sortInfo.isIncreasing = 1; sortInfo.sortMode = SORTMODE_ASCII; + sortInfo.strCmpFn = strcmp; sortInfo.indexv = NULL; sortInfo.indexc = 0; sortInfo.interp = interp; @@ -3988,6 +4019,9 @@ Tcl_LsortObjCmd(clientData, interp, objc, objv) case LSORT_INTEGER: sortInfo.sortMode = SORTMODE_INTEGER; break; + case LSORT_NOCASE: + sortInfo.strCmpFn = strcasecmp; + break; case LSORT_REAL: sortInfo.sortMode = SORTMODE_REAL; break; @@ -4262,7 +4296,7 @@ SortCompare(objPtr1, objPtr2, infoPtr) } if (infoPtr->sortMode == SORTMODE_ASCII) { - order = strcmp(TclGetString(objPtr1), TclGetString(objPtr2)); + order = infoPtr->strCmpFn(TclGetString(objPtr1), TclGetString(objPtr2)); } else if (infoPtr->sortMode == SORTMODE_DICTIONARY) { order = DictionaryCompare( TclGetString(objPtr1), TclGetString(objPtr2)); diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index b507fe5..99182f3 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -15,7 +15,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.122 2005/05/25 19:27:10 hobbs Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.123 2005/06/01 11:00:34 dkf Exp $ */ #include "tclInt.h" @@ -2520,7 +2520,7 @@ Tcl_SwitchObjCmd(dummy, interp, objc, objv) int objc; /* Number of arguments. */ Tcl_Obj *CONST objv[]; /* Argument objects. */ { - int i, j, index, mode, result, splitObjs, numMatchesSaved; + int i, j, index, mode, result, splitObjs, numMatchesSaved, noCase; char *pattern; Tcl_Obj *stringObj, *indexVarObj, *matchVarObj; Tcl_Obj *CONST *savedObjv = objv; @@ -2531,17 +2531,21 @@ Tcl_SwitchObjCmd(dummy, interp, objc, objv) * parser as well. */ static CONST char *options[] = { - "-exact", "-glob", "-indexvar", "-matchvar", "-regexp", "--", - NULL + "-exact", "-glob", "-indexvar", "-matchvar", "-nocase", "-regexp", + "--", NULL }; enum options { - OPT_EXACT, OPT_GLOB, OPT_INDEXV, OPT_MATCHV, OPT_REGEXP, OPT_LAST + OPT_EXACT, OPT_GLOB, OPT_INDEXV, OPT_MATCHV, OPT_NOCASE, OPT_REGEXP, + OPT_LAST }; + typedef int (*strCmpFn_t) _ANSI_ARGS_((const char *, const char *)); + strCmpFn_t strCmpFn = strcmp; mode = OPT_EXACT; indexVarObj = NULL; matchVarObj = NULL; numMatchesSaved = 0; + noCase = 0; for (i = 1; i < objc; i++) { if (TclGetString(objv[i])[0] != '-') { break; @@ -2580,6 +2584,9 @@ Tcl_SwitchObjCmd(dummy, interp, objc, objv) } matchVarObj = objv[i]; numMatchesSaved = -1; + } else if (index == OPT_NOCASE) { + strCmpFn = strcasecmp; + noCase = 1; } else { mode = index; } @@ -2718,18 +2725,19 @@ Tcl_SwitchObjCmd(dummy, interp, objc, objv) } else { switch (mode) { case OPT_EXACT: - if (strcmp(TclGetString(stringObj), pattern) == 0) { + if (strCmpFn(TclGetString(stringObj), pattern) == 0) { goto matchFound; } break; case OPT_GLOB: - if (Tcl_StringMatch(TclGetString(stringObj), pattern)) { + if (Tcl_StringCaseMatch(TclGetString(stringObj), pattern, + noCase)) { goto matchFound; } break; case OPT_REGEXP: regExpr = Tcl_GetRegExpFromObj(interp, objv[i], - TCL_REG_ADVANCED); + TCL_REG_ADVANCED | (noCase ? TCL_REG_NOCASE : 0)); if (regExpr == NULL) { return TCL_ERROR; } else { diff --git a/generic/tclCompCmds.c b/generic/tclCompCmds.c index c412329..9598d84 100644 --- a/generic/tclCompCmds.c +++ b/generic/tclCompCmds.c @@ -11,7 +11,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCompCmds.c,v 1.69 2005/06/01 10:02:19 dkf Exp $ + * RCS: @(#) $Id: tclCompCmds.c,v 1.70 2005/06/01 11:00:35 dkf Exp $ */ #include "tclInt.h" @@ -2723,6 +2723,7 @@ TclCompileSwitchCmd(interp, parsePtr, envPtr) * to the current (or next) real body. */ int savedStackDepth = envPtr->currStackDepth; + int noCase; int i; /* @@ -2753,50 +2754,55 @@ TclCompileSwitchCmd(interp, parsePtr, envPtr) * interpreted code falls apart if it encounters them, so we punt * if we *might* encounter them as that is the easiest way of * emulating the behaviour). - * - * Note that this parsing would probably be better done with a - * loop, but it works for now... */ - if (tokenPtr->type != TCL_TOKEN_SIMPLE_WORD) { - return TCL_ERROR; - } else { - register int size = tokenPtr[1].size; + noCase = 0; + mode = Switch_Exact; + for (; numWords>=3 ; tokenPtr+=2,numWords--) { + register unsigned size = tokenPtr[1].size; register CONST char *chrs = tokenPtr[1].start; /* - * Assume that -e and -g are unique prefixes of -exact and -glob + * We only process literal options, and we assume that -e, -g + * and -n are unique prefixes of -exact, -glob and -nocase + * respectively (true at time of writing). */ - if (size < 2) { + if (tokenPtr->type != TCL_TOKEN_SIMPLE_WORD || size < 2) { return TCL_ERROR; } - if ((size <= 6) && (numWords >= 4) - && !strncmp(chrs, "-exact", (unsigned) TclMin(size, 6))) { + + if ((size <= 6) && !memcmp(chrs, "-exact", size)) { mode = Switch_Exact; - tokenPtr += 2; - numWords--; - } else if ((size <= 5) && (numWords >= 4) - && !strncmp(chrs, "-glob", (unsigned) TclMin(size, 5))) { + continue; + } else if ((size <= 5) && !memcmp(chrs, "-glob", size)) { mode = Switch_Glob; - tokenPtr += 2; - numWords--; - } else if ((size == 2) && (numWords >= 3) && !strncmp(chrs, "--", 2)) { - /* - * If no control flag present, use exact matching (the default). - * - * We end up re-checking this word, but that's the way things are. - */ - mode = Switch_Exact; - } else { - return TCL_ERROR; + continue; + } else if ((size <= 7) && !memcmp(chrs, "-nocase", size)) { + noCase = 1; + continue; + } else if ((size == 2) && !memcmp(chrs, "--", 2)) { + break; } + + /* + * The switch command has many flags we cannot compile at all + * (e.g. all the RE-related ones) which we must have + * encountered. Either that or we have run off the end. The + * action here is the same: punt to interpreted version. + */ + return TCL_ERROR; } - if ((tokenPtr->type != TCL_TOKEN_SIMPLE_WORD) || (tokenPtr[1].size != 2) - || strncmp(tokenPtr[1].start, "--", 2)) { + if (numWords < 3) { return TCL_ERROR; } tokenPtr += 2; numWords--; + if (noCase && (mode == Switch_Exact)) { + /* + * Can't compile this case! + */ + return TCL_ERROR; + } /* * The value to test against is going to always get pushed on the @@ -2902,6 +2908,14 @@ TclCompileSwitchCmd(interp, parsePtr, envPtr) return TCL_ERROR; } } else if (numWords % 2 || numWords == 0) { + /* + * Odd number of words (>1) available, or no words at all + * available. Both are error cases, so punt and let the + * interpreted-version generate the error message. Note that + * the second case probably should get caught earlier, but + * it's easy to check here again anyway because it'd cause a + * nasty crash otherwise. + */ return TCL_ERROR; } else { bodyToken = (Tcl_Token **) ckalloc(sizeof(Tcl_Token *) * numWords); @@ -2962,7 +2976,7 @@ TclCompileSwitchCmd(interp, parsePtr, envPtr) int nextArmFixupIndex = -1; envPtr->currStackDepth = savedStackDepth + 1; if (i!=numWords-2 || bodyToken[numWords-2]->size != 7 || - strncmp(bodyToken[numWords-2]->start, "default", 7)) { + memcmp(bodyToken[numWords-2]->start, "default", 7)) { /* * Generate the test for the arm. This code is slightly * inefficient, but much simpler than the first version. @@ -2975,7 +2989,7 @@ TclCompileSwitchCmd(interp, parsePtr, envPtr) TclEmitOpcode(INST_STR_EQ, envPtr); break; case Switch_Glob: - TclEmitInstInt1(INST_STR_MATCH, /*nocase*/0, envPtr); + TclEmitInstInt1(INST_STR_MATCH, noCase, envPtr); break; default: Tcl_Panic("unknown switch mode: %d",mode); |