diff options
Diffstat (limited to 'generic/tclCmdMZ.c')
| -rw-r--r-- | generic/tclCmdMZ.c | 5697 |
1 files changed, 4170 insertions, 1527 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 4dc272f..00c9f2f 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1,56 +1,78 @@ -/* +/* * tclCmdMZ.c -- * - * This file contains the top-level command routines for most of - * the Tcl built-in commands whose names begin with the letters - * M to Z. It contains only commands in the generic core (i.e. - * those that don't depend much upon UNIX facilities). + * This file contains the top-level command routines for most of the Tcl + * built-in commands whose names begin with the letters M to Z. It + * contains only commands in the generic core (i.e. those that don't + * depend much upon UNIX facilities). * * Copyright (c) 1987-1993 The Regents of the University of California. * Copyright (c) 1994-1997 Sun Microsystems, Inc. + * Copyright (c) 1998-2000 Scriptics Corporation. + * Copyright (c) 2002 ActiveState Corporation. + * Copyright (c) 2003-2009 Donal K. Fellows. * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * SCCS: @(#) tclCmdMZ.c 1.104 97/10/31 13:06:19 + * See the file "license.terms" for information on usage and redistribution of + * this file, and for a DISCLAIMER OF ALL WARRANTIES. */ #include "tclInt.h" -#include "tclPort.h" -#include "tclCompile.h" - -/* - * Structure used to hold information about variable traces: - */ - -typedef struct { - int flags; /* Operations for which Tcl command is - * to be invoked. */ - char *errMsg; /* Error message returned from Tcl command, - * or NULL. Malloc'ed. */ - int length; /* Number of non-NULL chars. in command. */ - char command[4]; /* Space for Tcl command to invoke. Actual - * size will be as large as necessary to - * hold command. This field must be the - * last in the structure, so that it can - * be larger than 4 bytes. */ -} TraceVarInfo; +#include "tclRegexp.h" +#include "tclStringTrim.h" + +static inline Tcl_Obj * During(Tcl_Interp *interp, int resultCode, + Tcl_Obj *oldOptions, Tcl_Obj *errorInfo); +static int SwitchPostProc(ClientData data[], Tcl_Interp *interp, + int result); +static int TryPostBody(ClientData data[], Tcl_Interp *interp, + int result); +static int TryPostFinal(ClientData data[], Tcl_Interp *interp, + int result); +static int TryPostHandler(ClientData data[], Tcl_Interp *interp, + int result); +static int UniCharIsAscii(int character); +static int UniCharIsHexDigit(int character); /* - * Forward declarations for procedures defined in this file: + * Default set of characters to trim in [string trim] and friends. This is a + * UTF-8 literal string containing all Unicode space characters [TIP #413] */ -static char * TraceVarProc _ANSI_ARGS_((ClientData clientData, - Tcl_Interp *interp, char *name1, char *name2, - int flags)); +const char tclDefaultTrimSet[] = + "\x09\x0a\x0b\x0c\x0d " /* ASCII */ + "\xc0\x80" /* nul (U+0000) */ + "\xc2\x85" /* next line (U+0085) */ + "\xc2\xa0" /* non-breaking space (U+00a0) */ + "\xe1\x9a\x80" /* ogham space mark (U+1680) */ + "\xe1\xa0\x8e" /* mongolian vowel separator (U+180e) */ + "\xe2\x80\x80" /* en quad (U+2000) */ + "\xe2\x80\x81" /* em quad (U+2001) */ + "\xe2\x80\x82" /* en space (U+2002) */ + "\xe2\x80\x83" /* em space (U+2003) */ + "\xe2\x80\x84" /* three-per-em space (U+2004) */ + "\xe2\x80\x85" /* four-per-em space (U+2005) */ + "\xe2\x80\x86" /* six-per-em space (U+2006) */ + "\xe2\x80\x87" /* figure space (U+2007) */ + "\xe2\x80\x88" /* punctuation space (U+2008) */ + "\xe2\x80\x89" /* thin space (U+2009) */ + "\xe2\x80\x8a" /* hair space (U+200a) */ + "\xe2\x80\x8b" /* zero width space (U+200b) */ + "\xe2\x80\xa8" /* line separator (U+2028) */ + "\xe2\x80\xa9" /* paragraph separator (U+2029) */ + "\xe2\x80\xaf" /* narrow no-break space (U+202f) */ + "\xe2\x81\x9f" /* medium mathematical space (U+205f) */ + "\xe2\x81\xa0" /* word joiner (U+2060) */ + "\xe3\x80\x80" /* ideographic space (U+3000) */ + "\xef\xbb\xbf" /* zero width no-break space (U+feff) */ +; /* *---------------------------------------------------------------------- * - * Tcl_PwdCmd -- + * Tcl_PwdObjCmd -- * - * This procedure is invoked to process the "pwd" Tcl command. - * See the user documentation for details on what it does. + * This procedure is invoked to process the "pwd" Tcl command. See the + * user documentation for details on what it does. * * Results: * A standard Tcl result. @@ -61,37 +83,36 @@ static char * TraceVarProc _ANSI_ARGS_((ClientData clientData, *---------------------------------------------------------------------- */ - /* ARGSUSED */ int -Tcl_PwdCmd(dummy, interp, argc, argv) - ClientData dummy; /* Not used. */ - Tcl_Interp *interp; /* Current interpreter. */ - int argc; /* Number of arguments. */ - char **argv; /* Argument strings. */ +Tcl_PwdObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ { - char *dirName; + Tcl_Obj *retVal; - if (argc != 1) { - Tcl_AppendResult(interp, "wrong # args: should be \"", - argv[0], "\"", (char *) NULL); + if (objc != 1) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); return TCL_ERROR; } - dirName = TclGetCwd(interp); - if (dirName == NULL) { + retVal = Tcl_FSGetCwd(interp); + if (retVal == NULL) { return TCL_ERROR; } - Tcl_SetResult(interp, dirName, TCL_VOLATILE); + Tcl_SetObjResult(interp, retVal); + Tcl_DecrRefCount(retVal); return TCL_OK; } /* *---------------------------------------------------------------------- * - * Tcl_RegexpCmd -- + * Tcl_RegexpObjCmd -- * - * This procedure is invoked to process the "regexp" Tcl command. - * See the user documentation for details on what it does. + * This procedure is invoked to process the "regexp" Tcl command. See + * the user documentation for details on what it does. * * Results: * A standard Tcl result. @@ -102,153 +123,354 @@ Tcl_PwdCmd(dummy, interp, argc, argv) *---------------------------------------------------------------------- */ - /* ARGSUSED */ int -Tcl_RegexpCmd(dummy, interp, argc, argv) - ClientData dummy; /* Not used. */ - Tcl_Interp *interp; /* Current interpreter. */ - int argc; /* Number of arguments. */ - char **argv; /* Argument strings. */ +Tcl_RegexpObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ { - int noCase = 0; - int indices = 0; + int i, indices, match, about, offset, all, doinline, numMatchesSaved; + int cflags, eflags, stringLength, matchLength; Tcl_RegExp regExpr; - char **argPtr, *string, *pattern, *start, *end; - int match = 0; /* Initialization needed only to - * prevent compiler warning. */ - int i; - Tcl_DString stringDString, patternDString; - - if (argc < 3) { - wrongNumArgs: - Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0], - " ?switches? exp string ?matchVar? ?subMatchVar ", - "subMatchVar ...?\"", (char *) NULL); - return TCL_ERROR; - } - argPtr = argv+1; - argc--; - while ((argc > 0) && (argPtr[0][0] == '-')) { - if (strcmp(argPtr[0], "-indices") == 0) { + Tcl_Obj *objPtr, *startIndex = NULL, *resultPtr = NULL; + Tcl_RegExpInfo info; + static const char *const options[] = { + "-all", "-about", "-indices", "-inline", + "-expanded", "-line", "-linestop", "-lineanchor", + "-nocase", "-start", "--", NULL + }; + enum options { + REGEXP_ALL, REGEXP_ABOUT, REGEXP_INDICES, REGEXP_INLINE, + REGEXP_EXPANDED,REGEXP_LINE, REGEXP_LINESTOP,REGEXP_LINEANCHOR, + REGEXP_NOCASE, REGEXP_START, REGEXP_LAST + }; + + indices = 0; + about = 0; + cflags = TCL_REG_ADVANCED; + offset = 0; + all = 0; + doinline = 0; + + for (i = 1; i < objc; i++) { + const char *name; + int index; + + name = TclGetString(objv[i]); + if (name[0] != '-') { + break; + } + if (Tcl_GetIndexFromObj(interp, objv[i], options, "switch", TCL_EXACT, + &index) != TCL_OK) { + goto optionError; + } + switch ((enum options) index) { + case REGEXP_ALL: + all = 1; + break; + case REGEXP_INDICES: indices = 1; - } else if (strcmp(argPtr[0], "-nocase") == 0) { - noCase = 1; - } else if (strcmp(argPtr[0], "--") == 0) { - argPtr++; - argc--; break; - } else { - Tcl_AppendResult(interp, "bad switch \"", argPtr[0], - "\": must be -indices, -nocase, or --", (char *) NULL); - return TCL_ERROR; + case REGEXP_INLINE: + doinline = 1; + break; + case REGEXP_NOCASE: + cflags |= TCL_REG_NOCASE; + break; + case REGEXP_ABOUT: + about = 1; + break; + case REGEXP_EXPANDED: + cflags |= TCL_REG_EXPANDED; + break; + case REGEXP_LINE: + cflags |= TCL_REG_NEWLINE; + break; + case REGEXP_LINESTOP: + cflags |= TCL_REG_NLSTOP; + break; + case REGEXP_LINEANCHOR: + cflags |= TCL_REG_NLANCH; + break; + case REGEXP_START: { + int temp; + if (++i >= objc) { + goto endOfForLoop; + } + if (TclGetIntForIndexM(interp, objv[i], 0, &temp) != TCL_OK) { + goto optionError; + } + if (startIndex) { + Tcl_DecrRefCount(startIndex); + } + startIndex = objv[i]; + Tcl_IncrRefCount(startIndex); + break; + } + case REGEXP_LAST: + i++; + goto endOfForLoop; } - argPtr++; - argc--; } - if (argc < 2) { - goto wrongNumArgs; + + endOfForLoop: + if ((objc - i) < (2 - about)) { + Tcl_WrongNumArgs(interp, 1, objv, + "?-switch ...? exp string ?matchVar? ?subMatchVar ...?"); + goto optionError; } + objc -= i; + objv += i; /* - * Convert the string and pattern to lower case, if desired, and - * perform the matching operation. + * Check if the user requested -inline, but specified match variables; a + * no-no. */ - if (noCase) { - register char *p; + if (doinline && ((objc - 2) != 0)) { + Tcl_SetObjResult(interp, Tcl_NewStringObj( + "regexp match variables not allowed when using -inline", -1)); + goto optionError; + } - Tcl_DStringInit(&patternDString); - Tcl_DStringAppend(&patternDString, argPtr[0], -1); - pattern = Tcl_DStringValue(&patternDString); - for (p = pattern; *p != 0; p++) { - if (isupper(UCHAR(*p))) { - *p = (char)tolower(UCHAR(*p)); - } - } - Tcl_DStringInit(&stringDString); - Tcl_DStringAppend(&stringDString, argPtr[1], -1); - string = Tcl_DStringValue(&stringDString); - for (p = string; *p != 0; p++) { - if (isupper(UCHAR(*p))) { - *p = (char)tolower(UCHAR(*p)); + /* + * Handle the odd about case separately. + */ + + if (about) { + regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags); + if ((regExpr == NULL) || (TclRegAbout(interp, regExpr) < 0)) { + optionError: + if (startIndex) { + Tcl_DecrRefCount(startIndex); } + return TCL_ERROR; } - } else { - pattern = argPtr[0]; - string = argPtr[1]; - } - regExpr = Tcl_RegExpCompile(interp, pattern); - if (regExpr != NULL) { - match = Tcl_RegExpExec(interp, regExpr, string, string); + return TCL_OK; } - if (noCase) { - Tcl_DStringFree(&stringDString); - Tcl_DStringFree(&patternDString); + + /* + * Get the length of the string that we are matching against so we can do + * the termination test for -all matches. Do this before getting the + * regexp to avoid shimmering problems. + */ + + objPtr = objv[1]; + stringLength = Tcl_GetCharLength(objPtr); + + if (startIndex) { + TclGetIntForIndexM(NULL, startIndex, stringLength, &offset); + Tcl_DecrRefCount(startIndex); + if (offset < 0) { + offset = 0; + } } + + regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags); if (regExpr == NULL) { return TCL_ERROR; } - if (match < 0) { - return TCL_ERROR; - } - if (!match) { - Tcl_SetResult(interp, "0", TCL_STATIC); - return TCL_OK; + + objc -= 2; + objv += 2; + + if (doinline) { + /* + * Save all the subexpressions, as we will return them as a list + */ + + numMatchesSaved = -1; + } else { + /* + * Save only enough subexpressions for matches we want to keep, expect + * in the case of -all, where we need to keep at least one to know + * where to move the offset. + */ + + numMatchesSaved = (objc == 0) ? all : objc; } /* - * If additional variable names have been specified, return - * index information in those variables. + * The following loop is to handle multiple matches within the same source + * string; each iteration handles one match. If "-all" hasn't been + * specified then the loop body only gets executed once. We terminate the + * loop when the starting offset is past the end of the string. */ - argc -= 2; - for (i = 0; i < argc; i++) { - char *result, info[50]; + while (1) { + /* + * Pass either 0 or TCL_REG_NOTBOL in the eflags. Passing + * TCL_REG_NOTBOL indicates that the character at offset should not be + * considered the start of the line. If for example the pattern {^} is + * passed and -start is positive, then the pattern will not match the + * start of the string unless the previous character is a newline. + */ - Tcl_RegExpRange(regExpr, i, &start, &end); - if (start == NULL) { - if (indices) { - result = Tcl_SetVar(interp, argPtr[i+2], "-1 -1", 0); - } else { - result = Tcl_SetVar(interp, argPtr[i+2], "", 0); - } + if (offset == 0) { + eflags = 0; + } else if (offset > stringLength) { + eflags = TCL_REG_NOTBOL; + } else if (Tcl_GetUniChar(objPtr, offset-1) == (Tcl_UniChar)'\n') { + eflags = 0; } else { + eflags = TCL_REG_NOTBOL; + } + + match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset, + numMatchesSaved, eflags); + if (match < 0) { + return TCL_ERROR; + } + + if (match == 0) { + /* + * We want to set the value of the intepreter result only when + * this is the first time through the loop. + */ + + if (all <= 1) { + /* + * If inlining, the interpreter's object result remains an + * empty list, otherwise set it to an integer object w/ value + * 0. + */ + + if (!doinline) { + Tcl_SetObjResult(interp, Tcl_NewIntObj(0)); + } + return TCL_OK; + } + break; + } + + /* + * If additional variable names have been specified, return index + * information in those variables. + */ + + Tcl_RegExpGetInfo(regExpr, &info); + if (doinline) { + /* + * It's the number of substitutions, plus one for the matchVar at + * index 0 + */ + + objc = info.nsubs + 1; + if (all <= 1) { + resultPtr = Tcl_NewObj(); + } + } + for (i = 0; i < objc; i++) { + Tcl_Obj *newPtr; + if (indices) { - sprintf(info, "%d %d", (int)(start - string), - (int)(end - string - 1)); - result = Tcl_SetVar(interp, argPtr[i+2], info, 0); - } else { - char savedChar, *first, *last; + int start, end; + Tcl_Obj *objs[2]; - first = argPtr[1] + (start - string); - last = argPtr[1] + (end - string); - if (first == last) { /* don't modify argument */ - result = Tcl_SetVar(interp, argPtr[i+2], "", 0); + /* + * Only adjust the match area if there was a match for that + * area. (Scriptics Bug 4391/SF Bug #219232) + */ + + if (i <= info.nsubs && info.matches[i].start >= 0) { + start = offset + info.matches[i].start; + end = offset + info.matches[i].end; + + /* + * Adjust index so it refers to the last character in the + * match instead of the first character after the match. + */ + + if (end >= offset) { + end--; + } + } else { + start = -1; + end = -1; + } + + objs[0] = Tcl_NewLongObj(start); + objs[1] = Tcl_NewLongObj(end); + + newPtr = Tcl_NewListObj(2, objs); + } else { + if (i <= info.nsubs) { + newPtr = Tcl_GetRange(objPtr, + offset + info.matches[i].start, + offset + info.matches[i].end - 1); } else { - savedChar = *last; - *last = 0; - result = Tcl_SetVar(interp, argPtr[i+2], first, 0); - *last = savedChar; + newPtr = Tcl_NewObj(); + } + } + if (doinline) { + if (Tcl_ListObjAppendElement(interp, resultPtr, newPtr) + != TCL_OK) { + Tcl_DecrRefCount(newPtr); + Tcl_DecrRefCount(resultPtr); + return TCL_ERROR; + } + } else { + if (Tcl_ObjSetVar2(interp, objv[i], NULL, newPtr, + TCL_LEAVE_ERR_MSG) == NULL) { + return TCL_ERROR; } } } - if (result == NULL) { - Tcl_AppendResult(interp, "couldn't set variable \"", - argPtr[i+2], "\"", (char *) NULL); - return TCL_ERROR; + + if (all == 0) { + break; } + + /* + * Adjust the offset to the character just after the last one in the + * matchVar and increment all to count how many times we are making a + * match. We always increment the offset by at least one to prevent + * endless looping (as in the case: regexp -all {a*} a). Otherwise, + * when we match the NULL string at the end of the input string, we + * will loop indefinately (because the length of the match is 0, so + * offset never changes). + */ + + matchLength = (info.matches[0].end - info.matches[0].start); + + offset += info.matches[0].end; + + /* + * A match of length zero could happen for {^} {$} or {.*} and in + * these cases we always want to bump the index up one. + */ + + if (matchLength == 0) { + offset++; + } + all++; + if (offset >= stringLength) { + break; + } + } + + /* + * Set the interpreter's object result to an integer object with value 1 + * if -all wasn't specified, otherwise it's all-1 (the number of times + * through the while - 1). + */ + + if (doinline) { + Tcl_SetObjResult(interp, resultPtr); + } else { + Tcl_SetObjResult(interp, Tcl_NewIntObj(all ? all-1 : 1)); } - Tcl_SetResult(interp, "1", TCL_STATIC); return TCL_OK; } /* *---------------------------------------------------------------------- * - * Tcl_RegsubCmd -- + * Tcl_RegsubObjCmd -- * - * This procedure is invoked to process the "regsub" Tcl command. - * See the user documentation for details on what it does. + * This procedure is invoked to process the "regsub" Tcl command. See the + * user documentation for details on what it does. * * Results: * A standard Tcl result. @@ -259,134 +481,286 @@ Tcl_RegexpCmd(dummy, interp, argc, argv) *---------------------------------------------------------------------- */ - /* ARGSUSED */ int -Tcl_RegsubCmd(dummy, interp, argc, argv) - ClientData dummy; /* Not used. */ - Tcl_Interp *interp; /* Current interpreter. */ - int argc; /* Number of arguments. */ - char **argv; /* Argument strings. */ +Tcl_RegsubObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ { - int noCase = 0, all = 0; + int idx, result, cflags, all, wlen, wsublen, numMatches, offset; + int start, end, subStart, subEnd, match; Tcl_RegExp regExpr; - char *string, *pattern, *p, *firstChar, **argPtr; - int match, code, numMatches; - char *start, *end, *subStart, *subEnd; - register char *src, c; - Tcl_DString stringDString, patternDString, resultDString; - - if (argc < 5) { - wrongNumArgs: - Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0], - " ?switches? exp string subSpec varName\"", (char *) NULL); - return TCL_ERROR; - } - argPtr = argv+1; - argc--; - while (argPtr[0][0] == '-') { - if (strcmp(argPtr[0], "-nocase") == 0) { - noCase = 1; - } else if (strcmp(argPtr[0], "-all") == 0) { + Tcl_RegExpInfo info; + Tcl_Obj *resultPtr, *subPtr, *objPtr, *startIndex = NULL; + Tcl_UniChar ch, *wsrc, *wfirstChar, *wstring, *wsubspec, *wend; + + static const char *const options[] = { + "-all", "-nocase", "-expanded", + "-line", "-linestop", "-lineanchor", "-start", + "--", NULL + }; + enum options { + REGSUB_ALL, REGSUB_NOCASE, REGSUB_EXPANDED, + REGSUB_LINE, REGSUB_LINESTOP, REGSUB_LINEANCHOR, REGSUB_START, + REGSUB_LAST + }; + + cflags = TCL_REG_ADVANCED; + all = 0; + offset = 0; + resultPtr = NULL; + + for (idx = 1; idx < objc; idx++) { + const char *name; + int index; + + name = TclGetString(objv[idx]); + if (name[0] != '-') { + break; + } + if (Tcl_GetIndexFromObj(interp, objv[idx], options, "switch", + TCL_EXACT, &index) != TCL_OK) { + goto optionError; + } + switch ((enum options) index) { + case REGSUB_ALL: all = 1; - } else if (strcmp(argPtr[0], "--") == 0) { - argPtr++; - argc--; break; - } else { - Tcl_AppendResult(interp, "bad switch \"", argPtr[0], - "\": must be -all, -nocase, or --", (char *) NULL); - return TCL_ERROR; + case REGSUB_NOCASE: + cflags |= TCL_REG_NOCASE; + break; + case REGSUB_EXPANDED: + cflags |= TCL_REG_EXPANDED; + break; + case REGSUB_LINE: + cflags |= TCL_REG_NEWLINE; + break; + case REGSUB_LINESTOP: + cflags |= TCL_REG_NLSTOP; + break; + case REGSUB_LINEANCHOR: + cflags |= TCL_REG_NLANCH; + break; + case REGSUB_START: { + int temp; + if (++idx >= objc) { + goto endOfForLoop; + } + if (TclGetIntForIndexM(interp, objv[idx], 0, &temp) != TCL_OK) { + goto optionError; + } + if (startIndex) { + Tcl_DecrRefCount(startIndex); + } + startIndex = objv[idx]; + Tcl_IncrRefCount(startIndex); + break; + } + case REGSUB_LAST: + idx++; + goto endOfForLoop; } - argPtr++; - argc--; } - if (argc != 4) { - goto wrongNumArgs; + + endOfForLoop: + if (objc-idx < 3 || objc-idx > 4) { + Tcl_WrongNumArgs(interp, 1, objv, + "?-switch ...? exp string subSpec ?varName?"); + optionError: + if (startIndex) { + Tcl_DecrRefCount(startIndex); + } + return TCL_ERROR; } - /* - * Convert the string and pattern to lower case, if desired. - */ + objc -= idx; + objv += idx; - if (noCase) { - Tcl_DStringInit(&patternDString); - Tcl_DStringAppend(&patternDString, argPtr[0], -1); - pattern = Tcl_DStringValue(&patternDString); - for (p = pattern; *p != 0; p++) { - if (isupper(UCHAR(*p))) { - *p = (char)tolower(UCHAR(*p)); - } + if (startIndex) { + int stringLength = Tcl_GetCharLength(objv[1]); + + TclGetIntForIndexM(NULL, startIndex, stringLength, &offset); + Tcl_DecrRefCount(startIndex); + if (offset < 0) { + offset = 0; } - Tcl_DStringInit(&stringDString); - Tcl_DStringAppend(&stringDString, argPtr[1], -1); - string = Tcl_DStringValue(&stringDString); - for (p = string; *p != 0; p++) { - if (isupper(UCHAR(*p))) { - *p = (char)tolower(UCHAR(*p)); + } + + if (all && (offset == 0) + && (strpbrk(TclGetString(objv[2]), "&\\") == NULL) + && (strpbrk(TclGetString(objv[0]), "*+?{}()[].\\|^$") == NULL)) { + /* + * This is a simple one pair string map situation. We make use of a + * slightly modified version of the one pair STR_MAP code. + */ + + int slen, nocase; + int (*strCmpFn)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long); + Tcl_UniChar *p, wsrclc; + + numMatches = 0; + nocase = (cflags & TCL_REG_NOCASE); + strCmpFn = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp; + + wsrc = Tcl_GetUnicodeFromObj(objv[0], &slen); + wstring = Tcl_GetUnicodeFromObj(objv[1], &wlen); + wsubspec = Tcl_GetUnicodeFromObj(objv[2], &wsublen); + wend = wstring + wlen - (slen ? slen - 1 : 0); + result = TCL_OK; + + if (slen == 0) { + /* + * regsub behavior for "" matches between each character. 'string + * map' skips the "" case. + */ + + if (wstring < wend) { + resultPtr = Tcl_NewUnicodeObj(wstring, 0); + Tcl_IncrRefCount(resultPtr); + for (; wstring < wend; wstring++) { + Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); + Tcl_AppendUnicodeToObj(resultPtr, wstring, 1); + numMatches++; + } + wlen = 0; + } + } else { + wsrclc = Tcl_UniCharToLower(*wsrc); + for (p = wfirstChar = wstring; wstring < wend; wstring++) { + if ((*wstring == *wsrc || + (nocase && Tcl_UniCharToLower(*wstring)==wsrclc)) && + (slen==1 || (strCmpFn(wstring, wsrc, + (unsigned long) slen) == 0))) { + if (numMatches == 0) { + resultPtr = Tcl_NewUnicodeObj(wstring, 0); + Tcl_IncrRefCount(resultPtr); + } + if (p != wstring) { + Tcl_AppendUnicodeToObj(resultPtr, p, wstring - p); + p = wstring + slen; + } else { + p += slen; + } + wstring = p - 1; + + Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); + numMatches++; + } + } + if (numMatches) { + wlen = wfirstChar + wlen - p; + wstring = p; } } - } else { - pattern = argPtr[0]; - string = argPtr[1]; + objPtr = NULL; + subPtr = NULL; + goto regsubDone; } - Tcl_DStringInit(&resultDString); - regExpr = Tcl_RegExpCompile(interp, pattern); + + regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags); if (regExpr == NULL) { - code = TCL_ERROR; - goto done; + return TCL_ERROR; + } + + /* + * Make sure to avoid problems where the objects are shared. This can + * cause RegExpObj <> UnicodeObj shimmering that causes data corruption. + * [Bug #461322] + */ + + if (objv[1] == objv[0]) { + objPtr = Tcl_DuplicateObj(objv[1]); + } else { + objPtr = objv[1]; + } + wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen); + if (objv[2] == objv[0]) { + subPtr = Tcl_DuplicateObj(objv[2]); + } else { + subPtr = objv[2]; } + wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen); + + result = TCL_OK; /* - * The following loop is to handle multiple matches within the - * same source string; each iteration handles one match and its - * corresponding substitution. If "-all" hasn't been specified - * then the loop body only gets executed once. + * The following loop is to handle multiple matches within the same source + * string; each iteration handles one match and its corresponding + * substitution. If "-all" hasn't been specified then the loop body only + * gets executed once. We must use 'offset <= wlen' in particular for the + * case where the regexp pattern can match the empty string - this is + * useful when doing, say, 'regsub -- ^ $str ...' when $str might be + * empty. */ numMatches = 0; - for (p = string; *p != 0; ) { - match = Tcl_RegExpExec(interp, regExpr, p, string); + for ( ; offset <= wlen; ) { + + /* + * The flags argument is set if string is part of a larger string, so + * that "^" won't match. + */ + + match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset, + 10 /* matches */, ((offset > 0 && + (wstring[offset-1] != (Tcl_UniChar)'\n')) + ? TCL_REG_NOTBOL : 0)); + if (match < 0) { - code = TCL_ERROR; + result = TCL_ERROR; goto done; } - if (!match) { + if (match == 0) { break; } - numMatches += 1; + if (numMatches == 0) { + resultPtr = Tcl_NewUnicodeObj(wstring, 0); + Tcl_IncrRefCount(resultPtr); + if (offset > 0) { + /* + * Copy the initial portion of the string in if an offset was + * specified. + */ + + Tcl_AppendUnicodeToObj(resultPtr, wstring, offset); + } + } + numMatches++; /* * Copy the portion of the source string before the match to the * result variable. */ - Tcl_RegExpRange(regExpr, 0, &start, &end); - Tcl_DStringAppend(&resultDString, argPtr[1] + (p - string), start - p); - + Tcl_RegExpGetInfo(regExpr, &info); + start = info.matches[0].start; + end = info.matches[0].end; + Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start); + /* * Append the subSpec argument to the variable, making appropriate - * substitutions. This code is a bit hairy because of the backslash + * substitutions. This code is a bit hairy because of the backslash * conventions and because the code saves up ranges of characters in * subSpec to reduce the number of calls to Tcl_SetVar. */ - - for (src = firstChar = argPtr[2], c = *src; c != 0; src++, c = *src) { - int index; - - if (c == '&') { - index = 0; - } else if (c == '\\') { - c = src[1]; - if ((c >= '0') && (c <= '9')) { - index = c - '0'; - } else if ((c == '\\') || (c == '&')) { - *src = c; - src[1] = 0; - Tcl_DStringAppend(&resultDString, firstChar, -1); - *src = '\\'; - src[1] = c; - firstChar = src+2; - src++; + + wsrc = wfirstChar = wsubspec; + wend = wsubspec + wsublen; + for (ch = *wsrc; wsrc != wend; wsrc++, ch = *wsrc) { + if (ch == '&') { + idx = 0; + } else if (ch == '\\') { + ch = wsrc[1]; + if ((ch >= '0') && (ch <= '9')) { + idx = ch - '0'; + } else if ((ch == '\\') || (ch == '&')) { + *wsrc = ch; + Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, + wsrc - wfirstChar + 1); + *wsrc = '\\'; + wfirstChar = wsrc + 2; + wsrc++; continue; } else { continue; @@ -394,42 +768,54 @@ Tcl_RegsubCmd(dummy, interp, argc, argv) } else { continue; } - if (firstChar != src) { - c = *src; - *src = 0; - Tcl_DStringAppend(&resultDString, firstChar, -1); - *src = c; + + if (wfirstChar != wsrc) { + Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, + wsrc - wfirstChar); } - Tcl_RegExpRange(regExpr, index, &subStart, &subEnd); - if ((subStart != NULL) && (subEnd != NULL)) { - char *first, *last, saved; - - first = argPtr[1] + (subStart - string); - last = argPtr[1] + (subEnd - string); - saved = *last; - *last = 0; - Tcl_DStringAppend(&resultDString, first, -1); - *last = saved; + + if (idx <= info.nsubs) { + subStart = info.matches[idx].start; + subEnd = info.matches[idx].end; + if ((subStart >= 0) && (subEnd >= 0)) { + Tcl_AppendUnicodeToObj(resultPtr, + wstring + offset + subStart, subEnd - subStart); + } } - if (*src == '\\') { - src++; + + if (*wsrc == '\\') { + wsrc++; } - firstChar = src+1; + wfirstChar = wsrc + 1; } - if (firstChar != src) { - Tcl_DStringAppend(&resultDString, firstChar, -1); + + if (wfirstChar != wsrc) { + Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar); } - if (end == p) { + if (end == 0) { /* - * Always consume at least one character of the input string - * in order to prevent infinite loops. + * Always consume at least one character of the input string in + * order to prevent infinite loops. */ - Tcl_DStringAppend(&resultDString, argPtr[1] + (p - string), 1); - p = end + 1; + if (offset < wlen) { + Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + } + offset++; } else { - p = end; + offset += end; + if (start == end) { + /* + * We matched an empty string, which means we must go forward + * one more step so we don't match again at the same spot. + */ + + if (offset < wlen) { + Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + } + offset++; + } } if (!all) { break; @@ -441,30 +827,49 @@ Tcl_RegsubCmd(dummy, interp, argc, argv) * result variable. */ - if ((*p != 0) || (numMatches == 0)) { - Tcl_DStringAppend(&resultDString, argPtr[1] + (p - string), -1); + regsubDone: + if (numMatches == 0) { + /* + * On zero matches, just ignore the offset, since it shouldn't matter + * to us in this case, and the user may have skewed it. + */ + + resultPtr = objv[1]; + Tcl_IncrRefCount(resultPtr); + } else if (offset < wlen) { + Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset); } - if (Tcl_SetVar(interp, argPtr[3], Tcl_DStringValue(&resultDString), 0) - == NULL) { - Tcl_AppendResult(interp, - "couldn't set variable \"", argPtr[3], "\"", - (char *) NULL); - code = TCL_ERROR; + if (objc == 4) { + if (Tcl_ObjSetVar2(interp, objv[3], NULL, resultPtr, + TCL_LEAVE_ERR_MSG) == NULL) { + result = TCL_ERROR; + } else { + /* + * Set the interpreter's object result to an integer object + * holding the number of matches. + */ + + Tcl_SetObjResult(interp, Tcl_NewIntObj(numMatches)); + } } else { - char buf[40]; - - TclFormatInt(buf, numMatches); - Tcl_SetResult(interp, buf, TCL_VOLATILE); - code = TCL_OK; + /* + * No varname supplied, so just return the modified string. + */ + + Tcl_SetObjResult(interp, resultPtr); } - done: - if (noCase) { - Tcl_DStringFree(&stringDString); - Tcl_DStringFree(&patternDString); + done: + if (objPtr && (objv[1] == objv[0])) { + Tcl_DecrRefCount(objPtr); } - Tcl_DStringFree(&resultDString); - return code; + if (subPtr && (objv[2] == objv[0])) { + Tcl_DecrRefCount(subPtr); + } + if (resultPtr) { + Tcl_DecrRefCount(resultPtr); + } + return result; } /* @@ -472,8 +877,8 @@ Tcl_RegsubCmd(dummy, interp, argc, argv) * * Tcl_RenameObjCmd -- * - * This procedure is invoked to process the "rename" Tcl command. - * See the user documentation for details on what it does. + * This procedure is invoked to process the "rename" Tcl command. See the + * user documentation for details on what it does. * * Results: * A standard Tcl object result. @@ -484,23 +889,22 @@ Tcl_RegsubCmd(dummy, interp, argc, argv) *---------------------------------------------------------------------- */ - /* ARGSUSED */ int -Tcl_RenameObjCmd(dummy, interp, objc, objv) - ClientData dummy; /* Arbitrary value passed to the command. */ - Tcl_Interp *interp; /* Current interpreter. */ - int objc; /* Number of arguments. */ - Tcl_Obj *CONST objv[]; /* Argument objects. */ +Tcl_RenameObjCmd( + ClientData dummy, /* Arbitrary value passed to the command. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ { - char *oldName, *newName; - + const char *oldName, *newName; + if (objc != 3) { Tcl_WrongNumArgs(interp, 1, objv, "oldName newName"); return TCL_ERROR; } - oldName = Tcl_GetStringFromObj(objv[1], (int *) NULL); - newName = Tcl_GetStringFromObj(objv[2], (int *) NULL); + oldName = TclGetString(objv[1]); + newName = TclGetString(objv[2]); return TclRenameCommand(interp, oldName, newName); } @@ -521,96 +925,355 @@ Tcl_RenameObjCmd(dummy, interp, objc, objv) *---------------------------------------------------------------------- */ - /* ARGSUSED */ int -Tcl_ReturnObjCmd(dummy, interp, objc, objv) - ClientData dummy; /* Not used. */ - Tcl_Interp *interp; /* Current interpreter. */ - int objc; /* Number of arguments. */ - Tcl_Obj *CONST objv[]; /* Argument objects. */ +Tcl_ReturnObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ { - Interp *iPtr = (Interp *) interp; - int optionLen, argLen, code, result; - - if (iPtr->errorInfo != NULL) { - ckfree(iPtr->errorInfo); - iPtr->errorInfo = NULL; - } - if (iPtr->errorCode != NULL) { - ckfree(iPtr->errorCode); - iPtr->errorCode = NULL; - } - code = TCL_OK; - - /* - * THIS FAILS IF AN OBJECT CONTAINS AN EMBEDDED NULL. - */ - - for (objv++, objc--; objc > 1; objv += 2, objc -= 2) { - char *option = Tcl_GetStringFromObj(objv[0], &optionLen); - char *arg = Tcl_GetStringFromObj(objv[1], &argLen); - - if (strcmp(option, "-code") == 0) { - register int c = arg[0]; - if ((c == 'o') && (strcmp(arg, "ok") == 0)) { - code = TCL_OK; - } else if ((c == 'e') && (strcmp(arg, "error") == 0)) { - code = TCL_ERROR; - } else if ((c == 'r') && (strcmp(arg, "return") == 0)) { - code = TCL_RETURN; - } else if ((c == 'b') && (strcmp(arg, "break") == 0)) { - code = TCL_BREAK; - } else if ((c == 'c') && (strcmp(arg, "continue") == 0)) { - code = TCL_CONTINUE; + int code, level; + Tcl_Obj *returnOpts; + + /* + * General syntax: [return ?-option value ...? ?result?] + * An even number of words means an explicit result argument is present. + */ + + int explicitResult = (0 == (objc % 2)); + int numOptionWords = objc - 1 - explicitResult; + + if (TCL_ERROR == TclMergeReturnOptions(interp, numOptionWords, objv+1, + &returnOpts, &code, &level)) { + return TCL_ERROR; + } + + code = TclProcessReturn(interp, code, level, returnOpts); + if (explicitResult) { + Tcl_SetObjResult(interp, objv[objc-1]); + } + return code; +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_SourceObjCmd -- + * + * This procedure is invoked to process the "source" Tcl command. See the + * user documentation for details on what it does. + * + * Results: + * A standard Tcl object result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_SourceObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + return Tcl_NRCallObjProc(interp, TclNRSourceObjCmd, dummy, objc, objv); +} + +int +TclNRSourceObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + const char *encodingName = NULL; + Tcl_Obj *fileName; + + if (objc != 2 && objc !=4) { + Tcl_WrongNumArgs(interp, 1, objv, "?-encoding name? fileName"); + return TCL_ERROR; + } + + fileName = objv[objc-1]; + + if (objc == 4) { + static const char *const options[] = { + "-encoding", NULL + }; + int index; + + if (TCL_ERROR == Tcl_GetIndexFromObj(interp, objv[1], options, + "option", TCL_EXACT, &index)) { + return TCL_ERROR; + } + encodingName = TclGetString(objv[2]); + } + + return TclNREvalFile(interp, fileName, encodingName); +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_SplitObjCmd -- + * + * This procedure is invoked to process the "split" Tcl command. See the + * user documentation for details on what it does. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_SplitObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + Tcl_UniChar ch; + int len; + const char *splitChars; + const char *stringPtr; + const char *end; + int splitCharLen, stringLen; + Tcl_Obj *listPtr, *objPtr; + + if (objc == 2) { + splitChars = " \n\t\r"; + splitCharLen = 4; + } else if (objc == 3) { + splitChars = TclGetStringFromObj(objv[2], &splitCharLen); + } else { + Tcl_WrongNumArgs(interp, 1, objv, "string ?splitChars?"); + return TCL_ERROR; + } + + stringPtr = TclGetStringFromObj(objv[1], &stringLen); + end = stringPtr + stringLen; + listPtr = Tcl_NewObj(); + + if (stringLen == 0) { + /* + * Do nothing. + */ + } else if (splitCharLen == 0) { + Tcl_HashTable charReuseTable; + Tcl_HashEntry *hPtr; + int isNew; + + /* + * Handle the special case of splitting on every character. + * + * Uses a hash table to ensure that each kind of character has only + * one Tcl_Obj instance (multiply-referenced) in the final list. This + * is a *major* win when splitting on a long string (especially in the + * megabyte range!) - DKF + */ + + Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS); + + for ( ; stringPtr < end; stringPtr += len) { + len = TclUtfToUniChar(stringPtr, &ch); + + /* + * Assume Tcl_UniChar is an integral type... + */ + + hPtr = Tcl_CreateHashEntry(&charReuseTable, INT2PTR((int) ch), + &isNew); + if (isNew) { + TclNewStringObj(objPtr, stringPtr, len); + + /* + * Don't need to fiddle with refcount... + */ + + Tcl_SetHashValue(hPtr, objPtr); } else { - result = Tcl_GetIntFromObj((Tcl_Interp *) NULL, objv[1], - &code); - if (result != TCL_OK) { - Tcl_ResetResult(interp); - Tcl_AppendStringsToObj(Tcl_GetObjResult(interp), - "bad completion code \"", - Tcl_GetStringFromObj(objv[1], (int *) NULL), - "\": must be ok, error, return, break, ", - "continue, or an integer", (char *) NULL); - return result; + objPtr = Tcl_GetHashValue(hPtr); + } + Tcl_ListObjAppendElement(NULL, listPtr, objPtr); + } + Tcl_DeleteHashTable(&charReuseTable); + + } else if (splitCharLen == 1) { + char *p; + + /* + * Handle the special case of splitting on a single character. This is + * only true for the one-char ASCII case, as one unicode char is > 1 + * byte in length. + */ + + while (*stringPtr && (p=strchr(stringPtr,(int)*splitChars)) != NULL) { + objPtr = Tcl_NewStringObj(stringPtr, p - stringPtr); + Tcl_ListObjAppendElement(NULL, listPtr, objPtr); + stringPtr = p + 1; + } + TclNewStringObj(objPtr, stringPtr, end - stringPtr); + Tcl_ListObjAppendElement(NULL, listPtr, objPtr); + } else { + const char *element, *p, *splitEnd; + int splitLen; + Tcl_UniChar splitChar; + + /* + * Normal case: split on any of a given set of characters. Discard + * instances of the split characters. + */ + + splitEnd = splitChars + splitCharLen; + + for (element = stringPtr; stringPtr < end; stringPtr += len) { + len = TclUtfToUniChar(stringPtr, &ch); + for (p = splitChars; p < splitEnd; p += splitLen) { + splitLen = TclUtfToUniChar(p, &splitChar); + if (ch == splitChar) { + TclNewStringObj(objPtr, element, stringPtr - element); + Tcl_ListObjAppendElement(NULL, listPtr, objPtr); + element = stringPtr + len; + break; } } - } else if (strcmp(option, "-errorinfo") == 0) { - iPtr->errorInfo = - (char *) ckalloc((unsigned) (strlen(arg) + 1)); - strcpy(iPtr->errorInfo, arg); - } else if (strcmp(option, "-errorcode") == 0) { - iPtr->errorCode = - (char *) ckalloc((unsigned) (strlen(arg) + 1)); - strcpy(iPtr->errorCode, arg); - } else { - Tcl_AppendStringsToObj(Tcl_GetObjResult(interp), - "bad option \"", option, - "\": must be -code, -errorcode, or -errorinfo", - (char *) NULL); - return TCL_ERROR; } + + TclNewStringObj(objPtr, element, stringPtr - element); + Tcl_ListObjAppendElement(NULL, listPtr, objPtr); } - - if (objc == 1) { + Tcl_SetObjResult(interp, listPtr); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringFirstCmd -- + * + * This procedure is invoked to process the "string first" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringFirstCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + Tcl_UniChar *needleStr, *haystackStr; + int match, start, needleLen, haystackLen; + + if (objc < 3 || objc > 4) { + Tcl_WrongNumArgs(interp, 1, objv, + "needleString haystackString ?startIndex?"); + return TCL_ERROR; + } + + /* + * We are searching haystackStr for the sequence needleStr. + */ + + match = -1; + start = 0; + haystackLen = -1; + + needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen); + haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen); + + if (objc == 4) { + /* + * If a startIndex is specified, we will need to fast forward to that + * point in the string before we think about a match. + */ + + if (TclGetIntForIndexM(interp, objv[3], haystackLen-1, + &start) != TCL_OK){ + return TCL_ERROR; + } + /* - * Set the interpreter's object result. An inline version of - * Tcl_SetObjResult. + * Reread to prevent shimmering problems. */ - Tcl_SetObjResult(interp, objv[0]); + needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen); + haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen); + + if (start >= haystackLen) { + goto str_first_done; + } else if (start > 0) { + haystackStr += start; + haystackLen -= start; + } else if (start < 0) { + /* + * Invalid start index mapped to string start; Bug #423581 + */ + + start = 0; + } + } + + /* + * If the length of the needle is more than the length of the haystack, it + * cannot be contained in there so we can avoid searching. [Bug 2960021] + */ + + if (needleLen > 0 && needleLen <= haystackLen) { + register Tcl_UniChar *p, *end; + + end = haystackStr + haystackLen - needleLen + 1; + for (p = haystackStr; p < end; p++) { + /* + * Scan forward to find the first character. + */ + + if ((*p == *needleStr) && (TclUniCharNcmp(needleStr, p, + (unsigned long) needleLen) == 0)) { + match = p - haystackStr; + break; + } + } } - iPtr->returnCode = code; - return TCL_RETURN; + + /* + * Compute the character index of the matching string by counting the + * number of characters before the match. + */ + + if ((match != -1) && (objc == 4)) { + match += start; + } + + str_first_done: + Tcl_SetObjResult(interp, Tcl_NewIntObj(match)); + return TCL_OK; } /* *---------------------------------------------------------------------- * - * Tcl_ScanCmd -- + * StringLastCmd -- * - * This procedure is invoked to process the "scan" Tcl command. - * See the user documentation for details on what it does. + * This procedure is invoked to process the "string last" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. * * Results: * A standard Tcl result. @@ -621,303 +1284,825 @@ Tcl_ReturnObjCmd(dummy, interp, objc, objv) *---------------------------------------------------------------------- */ - /* ARGSUSED */ -int -Tcl_ScanCmd(dummy, interp, argc, argv) - ClientData dummy; /* Not used. */ - Tcl_Interp *interp; /* Current interpreter. */ - int argc; /* Number of arguments. */ - char **argv; /* Argument strings. */ +static int +StringLastCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ { -# define MAX_FIELDS 20 - typedef struct { - char fmt; /* Format for field. */ - int size; /* How many bytes to allow for - * field. */ - char *location; /* Where field will be stored. */ - } Field; - Field fields[MAX_FIELDS]; /* Info about all the fields in the - * format string. */ - register Field *curField; - int numFields = 0; /* Number of fields actually - * specified. */ - int suppress; /* Current field is assignment- - * suppressed. */ - int totalSize = 0; /* Number of bytes needed to store - * all results combined. */ - char *results; /* Where scanned output goes. - * Malloced; NULL means not allocated - * yet. */ - int numScanned; /* sscanf's result. */ - register char *fmt; - int i, widthSpecified, length, code; - char buf[40]; + Tcl_UniChar *needleStr, *haystackStr, *p; + int match, start, needleLen, haystackLen; + + if (objc < 3 || objc > 4) { + Tcl_WrongNumArgs(interp, 1, objv, + "needleString haystackString ?startIndex?"); + return TCL_ERROR; + } + + /* + * We are searching haystackString for the sequence needleString. + */ + + match = -1; + start = 0; + haystackLen = -1; + + needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen); + haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen); + + if (objc == 4) { + /* + * If a startIndex is specified, we will need to restrict the string + * range to that char index in the string + */ + + if (TclGetIntForIndexM(interp, objv[3], haystackLen-1, + &start) != TCL_OK){ + return TCL_ERROR; + } + + /* + * Reread to prevent shimmering problems. + */ + + needleStr = Tcl_GetUnicodeFromObj(objv[1], &needleLen); + haystackStr = Tcl_GetUnicodeFromObj(objv[2], &haystackLen); + + if (start < 0) { + goto str_last_done; + } else if (start < haystackLen) { + p = haystackStr + start + 1 - needleLen; + } else { + p = haystackStr + haystackLen - needleLen; + } + } else { + p = haystackStr + haystackLen - needleLen; + } /* - * The variables below are used to hold a copy of the format - * string, so that we can replace format specifiers like "%f" - * and "%F" with specifiers like "%lf" + * If the length of the needle is more than the length of the haystack, it + * cannot be contained in there so we can avoid searching. [Bug 2960021] */ -# define STATIC_SIZE 5 - char copyBuf[STATIC_SIZE], *fmtCopy; - register char *dst; + if (needleLen > 0 && needleLen <= haystackLen) { + for (; p >= haystackStr; p--) { + /* + * Scan backwards to find the first character. + */ + + if ((*p == *needleStr) && !memcmp(needleStr, p, + sizeof(Tcl_UniChar) * (size_t)needleLen)) { + match = p - haystackStr; + break; + } + } + } + + str_last_done: + Tcl_SetObjResult(interp, Tcl_NewIntObj(match)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringIndexCmd -- + * + * This procedure is invoked to process the "string index" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringIndexCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int length, index; - if (argc < 3) { - Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0], - " string format ?varName varName ...?\"", (char *) NULL); + if (objc != 3) { + Tcl_WrongNumArgs(interp, 1, objv, "string charIndex"); return TCL_ERROR; } /* - * This procedure operates in four stages: - * 1. Scan the format string, collecting information about each field. - * 2. Allocate an array to hold all of the scanned fields. - * 3. Call sscanf to do all the dirty work, and have it store the - * parsed fields in the array. - * 4. Pick off the fields from the array and assign them to variables. + * Get the char length to calulate what 'end' means. */ - code = TCL_OK; - results = NULL; - length = strlen(argv[2]) * 2 + 1; - if (length < STATIC_SIZE) { - fmtCopy = copyBuf; - } else { - fmtCopy = (char *) ckalloc((unsigned) length); + length = Tcl_GetCharLength(objv[1]); + if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { + return TCL_ERROR; } - dst = fmtCopy; - for (fmt = argv[2]; *fmt != 0; fmt++) { - *dst = *fmt; - dst++; - if (*fmt != '%') { - continue; + + if ((index >= 0) && (index < length)) { + Tcl_UniChar ch = Tcl_GetUniChar(objv[1], index); + + /* + * If we have a ByteArray object, we're careful to generate a new + * bytearray for a result. + */ + + if (TclIsPureByteArray(objv[1])) { + unsigned char uch = (unsigned char) ch; + + Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(&uch, 1)); + } else { + char buf[TCL_UTF_MAX]; + + length = Tcl_UniCharToUtf(ch, buf); + Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length)); } - fmt++; - if (*fmt == '%') { - *dst = *fmt; - dst++; - continue; + } + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringIsCmd -- + * + * This procedure is invoked to process the "string is" Tcl command. See + * the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringIsCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + const char *string1, *end, *stop; + Tcl_UniChar ch; + int (*chcomp)(int) = NULL; /* The UniChar comparison function. */ + int i, failat = 0, result = 1, strict = 0, index, length1, length2; + Tcl_Obj *objPtr, *failVarObj = NULL; + Tcl_WideInt w; + + static const char *const isClasses[] = { + "alnum", "alpha", "ascii", "control", + "boolean", "digit", "double", "entier", + "false", "graph", "integer", "list", + "lower", "print", "punct", "space", + "true", "upper", "wideinteger", "wordchar", + "xdigit", NULL + }; + enum isClasses { + STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL, + STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, STR_IS_ENTIER, + STR_IS_FALSE, STR_IS_GRAPH, STR_IS_INT, STR_IS_LIST, + STR_IS_LOWER, STR_IS_PRINT, STR_IS_PUNCT, STR_IS_SPACE, + STR_IS_TRUE, STR_IS_UPPER, STR_IS_WIDE, STR_IS_WORD, + STR_IS_XDIGIT + }; + static const char *const isOptions[] = { + "-strict", "-failindex", NULL + }; + enum isOptions { + OPT_STRICT, OPT_FAILIDX + }; + + if (objc < 3 || objc > 6) { + Tcl_WrongNumArgs(interp, 1, objv, + "class ?-strict? ?-failindex var? str"); + return TCL_ERROR; + } + if (Tcl_GetIndexFromObj(interp, objv[1], isClasses, "class", 0, + &index) != TCL_OK) { + return TCL_ERROR; + } + + if (objc != 3) { + for (i = 2; i < objc-1; i++) { + int idx2; + + if (Tcl_GetIndexFromObj(interp, objv[i], isOptions, "option", 0, + &idx2) != TCL_OK) { + return TCL_ERROR; + } + switch ((enum isOptions) idx2) { + case OPT_STRICT: + strict = 1; + break; + case OPT_FAILIDX: + if (i+1 >= objc-1) { + Tcl_WrongNumArgs(interp, 2, objv, + "?-strict? ?-failindex var? str"); + return TCL_ERROR; + } + failVarObj = objv[++i]; + break; + } } - if (*fmt == '*') { - suppress = 1; - *dst = *fmt; - dst++; - fmt++; - } else { - suppress = 0; + } + + /* + * We get the objPtr so that we can short-cut for some classes by checking + * the object type (int and double), but we need the string otherwise, + * because we don't want any conversion of type occuring (as, for example, + * Tcl_Get*FromObj would do). + */ + + objPtr = objv[objc-1]; + + /* + * When entering here, result == 1 and failat == 0. + */ + + switch ((enum isClasses) index) { + case STR_IS_ALNUM: + chcomp = Tcl_UniCharIsAlnum; + break; + case STR_IS_ALPHA: + chcomp = Tcl_UniCharIsAlpha; + break; + case STR_IS_ASCII: + chcomp = UniCharIsAscii; + break; + case STR_IS_BOOL: + case STR_IS_TRUE: + case STR_IS_FALSE: + if ((objPtr->typePtr != &tclBooleanType) + && (TCL_OK != TclSetBooleanFromAny(NULL, objPtr))) { + if (strict) { + result = 0; + } else { + string1 = TclGetStringFromObj(objPtr, &length1); + result = length1 == 0; + } + } else if (((index == STR_IS_TRUE) && + objPtr->internalRep.longValue == 0) + || ((index == STR_IS_FALSE) && + objPtr->internalRep.longValue != 0)) { + result = 0; } - widthSpecified = 0; - while (isdigit(UCHAR(*fmt))) { - widthSpecified = 1; - *dst = *fmt; - dst++; - fmt++; + break; + case STR_IS_CONTROL: + chcomp = Tcl_UniCharIsControl; + break; + case STR_IS_DIGIT: + chcomp = Tcl_UniCharIsDigit; + break; + case STR_IS_DOUBLE: { + /* TODO */ + if ((objPtr->typePtr == &tclDoubleType) || + (objPtr->typePtr == &tclIntType) || +#ifndef TCL_WIDE_INT_IS_LONG + (objPtr->typePtr == &tclWideIntType) || +#endif + (objPtr->typePtr == &tclBignumType)) { + break; } - if ((*fmt == 'l') || (*fmt == 'h') || (*fmt == 'L')) { - fmt++; + string1 = TclGetStringFromObj(objPtr, &length1); + if (length1 == 0) { + if (strict) { + result = 0; + } + goto str_is_done; } - *dst = *fmt; - dst++; - if (suppress) { - continue; + end = string1 + length1; + if (TclParseNumber(NULL, objPtr, NULL, NULL, -1, + (const char **) &stop, 0) != TCL_OK) { + result = 0; + failat = 0; + } else { + failat = stop - string1; + if (stop < end) { + result = 0; + TclFreeIntRep(objPtr); + } } - if (numFields == MAX_FIELDS) { - Tcl_SetResult(interp, "too many fields to scan", TCL_STATIC); - code = TCL_ERROR; - goto done; + break; + } + case STR_IS_GRAPH: + chcomp = Tcl_UniCharIsGraph; + break; + case STR_IS_INT: + if (TCL_OK == TclGetIntFromObj(NULL, objPtr, &i)) { + break; } - curField = &fields[numFields]; - numFields++; - switch (*fmt) { - case 'd': - case 'i': - case 'o': - case 'x': - curField->fmt = 'd'; - curField->size = sizeof(int); - break; + goto failedIntParse; + case STR_IS_ENTIER: + if ((objPtr->typePtr == &tclIntType) || +#ifndef TCL_WIDE_INT_IS_LONG + (objPtr->typePtr == &tclWideIntType) || +#endif + (objPtr->typePtr == &tclBignumType)) { + break; + } + string1 = TclGetStringFromObj(objPtr, &length1); + if (length1 == 0) { + if (strict) { + result = 0; + } + goto str_is_done; + } + end = string1 + length1; + if (TclParseNumber(NULL, objPtr, NULL, NULL, -1, + (const char **) &stop, TCL_PARSE_INTEGER_ONLY) == TCL_OK) { + if (stop == end) { + /* + * Entire string parses as an integer. + */ - case 'u': - curField->fmt = 'u'; - curField->size = sizeof(int); break; + } else { + /* + * Some prefix parsed as an integer, but not the whole string, + * so return failure index as the point where parsing stopped. + * Clear out the internal rep, since keeping it would leave + * *objPtr in an inconsistent state. + */ + + result = 0; + failat = stop - string1; + TclFreeIntRep(objPtr); + } + } else { + /* + * No prefix is a valid integer. Fail at beginning. + */ - case 's': - curField->fmt = 's'; - curField->size = strlen(argv[1]) + 1; - break; + result = 0; + failat = 0; + } + break; + case STR_IS_WIDE: + if (TCL_OK == Tcl_GetWideIntFromObj(NULL, objPtr, &w)) { + break; + } - case 'c': - if (widthSpecified) { - Tcl_SetResult(interp, - "field width may not be specified in %c conversion", - TCL_STATIC); - code = TCL_ERROR; - goto done; - } - curField->fmt = 'c'; - curField->size = sizeof(int); - break; + failedIntParse: + string1 = TclGetStringFromObj(objPtr, &length1); + if (length1 == 0) { + if (strict) { + result = 0; + } + goto str_is_done; + } + result = 0; + if (failVarObj == NULL) { + /* + * Don't bother computing the failure point if we're not going to + * return it. + */ - case 'e': - case 'f': - case 'g': - dst[-1] = 'l'; - dst[0] = 'f'; - dst++; - curField->fmt = 'f'; - curField->size = sizeof(double); - break; + break; + } + end = string1 + length1; + if (TclParseNumber(NULL, objPtr, NULL, NULL, -1, + (const char **) &stop, TCL_PARSE_INTEGER_ONLY) == TCL_OK) { + if (stop == end) { + /* + * Entire string parses as an integer, but rejected by + * Tcl_Get(Wide)IntFromObj() so we must have overflowed the + * target type, and our convention is to return failure at + * index -1 in that situation. + */ + + failat = -1; + } else { + /* + * Some prefix parsed as an integer, but not the whole string, + * so return failure index as the point where parsing stopped. + * Clear out the internal rep, since keeping it would leave + * *objPtr in an inconsistent state. + */ + + failat = stop - string1; + TclFreeIntRep(objPtr); + } + } else { + /* + * No prefix is a valid integer. Fail at beginning. + */ - case '[': - curField->fmt = 's'; - curField->size = strlen(argv[1]) + 1; - do { - fmt++; - if (*fmt == 0) { - Tcl_SetResult(interp, - "unmatched [ in format string", TCL_STATIC); - code = TCL_ERROR; - goto done; - } - *dst = *fmt; - dst++; - } while (*fmt != ']'); - break; + failat = 0; + } + break; + case STR_IS_LIST: + /* + * We ignore the strictness here, since empty strings are always + * well-formed lists. + */ + + if (TCL_OK == TclListObjLength(NULL, objPtr, &length2)) { + break; + } + + if (failVarObj != NULL) { + /* + * Need to figure out where the list parsing failed, which is + * fairly expensive. This is adapted from the core of + * SetListFromAny(). + */ + + const char *elemStart, *nextElem; + int lenRemain, elemSize; + register const char *p; + + string1 = TclGetStringFromObj(objPtr, &length1); + end = string1 + length1; + failat = -1; + for (p=string1, lenRemain=length1; lenRemain > 0; + p=nextElem, lenRemain=end-nextElem) { + if (TCL_ERROR == TclFindElement(NULL, p, lenRemain, + &elemStart, &nextElem, &elemSize, NULL)) { + Tcl_Obj *tmpStr; - default: - { - char buf[50]; + /* + * This is the simplest way of getting the number of + * characters parsed. Note that this is not the same as + * the number of bytes when parsing strings with non-ASCII + * characters in them. + * + * Skip leading spaces first. This is only really an issue + * if it is the first "element" that has the failure. + */ - sprintf(buf, "bad scan conversion character \"%c\"", *fmt); - Tcl_SetResult(interp, buf, TCL_VOLATILE); - code = TCL_ERROR; - goto done; + while (TclIsSpaceProc(*p)) { + p++; + } + TclNewStringObj(tmpStr, string1, p-string1); + failat = Tcl_GetCharLength(tmpStr); + TclDecrRefCount(tmpStr); + break; } + } } - curField->size = TCL_ALIGN(curField->size); - totalSize += curField->size; + result = 0; + break; + case STR_IS_LOWER: + chcomp = Tcl_UniCharIsLower; + break; + case STR_IS_PRINT: + chcomp = Tcl_UniCharIsPrint; + break; + case STR_IS_PUNCT: + chcomp = Tcl_UniCharIsPunct; + break; + case STR_IS_SPACE: + chcomp = Tcl_UniCharIsSpace; + break; + case STR_IS_UPPER: + chcomp = Tcl_UniCharIsUpper; + break; + case STR_IS_WORD: + chcomp = Tcl_UniCharIsWordChar; + break; + case STR_IS_XDIGIT: + chcomp = UniCharIsHexDigit; + break; } - *dst = 0; - if (numFields != (argc-3)) { - Tcl_SetResult(interp, - "different numbers of variable names and field specifiers", - TCL_STATIC); - code = TCL_ERROR; - goto done; + if (chcomp != NULL) { + string1 = TclGetStringFromObj(objPtr, &length1); + if (length1 == 0) { + if (strict) { + result = 0; + } + goto str_is_done; + } + end = string1 + length1; + for (; string1 < end; string1 += length2, failat++) { + length2 = TclUtfToUniChar(string1, &ch); + if (!chcomp(ch)) { + result = 0; + break; + } + } } /* - * Step 2: + * Only set the failVarObj when we will return 0 and we have indicated a + * valid fail index (>= 0). */ - results = (char *) ckalloc((unsigned) totalSize); - for (i = 0, totalSize = 0, curField = fields; - i < numFields; i++, curField++) { - curField->location = results + totalSize; - totalSize += curField->size; + str_is_done: + if ((result == 0) && (failVarObj != NULL) && + Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(failat), + TCL_LEAVE_ERR_MSG) == NULL) { + return TCL_ERROR; + } + Tcl_SetObjResult(interp, Tcl_NewBooleanObj(result)); + return TCL_OK; +} + +static int +UniCharIsAscii( + int character) +{ + return (character >= 0) && (character < 0x80); +} + +static int +UniCharIsHexDigit( + int character) +{ + return (character >= 0) && (character < 0x80) && isxdigit(character); +} + +/* + *---------------------------------------------------------------------- + * + * StringMapCmd -- + * + * This procedure is invoked to process the "string map" Tcl command. See + * the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringMapCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int length1, length2, mapElemc, index; + int nocase = 0, mapWithDict = 0, copySource = 0; + Tcl_Obj **mapElemv, *sourceObj, *resultPtr; + Tcl_UniChar *ustring1, *ustring2, *p, *end; + int (*strCmpFn)(const Tcl_UniChar*, const Tcl_UniChar*, unsigned long); + + if (objc < 3 || objc > 4) { + Tcl_WrongNumArgs(interp, 1, objv, "?-nocase? charMap string"); + return TCL_ERROR; + } + + if (objc == 4) { + const char *string = TclGetStringFromObj(objv[1], &length2); + + if ((length2 > 1) && + strncmp(string, "-nocase", (size_t) length2) == 0) { + nocase = 1; + } else { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "bad option \"%s\": must be -nocase", string)); + Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "INDEX", "option", + string, NULL); + return TCL_ERROR; + } } /* - * Fill in the remaining fields with NULL; the only purpose of - * this is to keep some memory analyzers, like Purify, from - * complaining. + * This test is tricky, but has to be that way or you get other strange + * inconsistencies (see test string-10.20 for illustration why!) */ - for ( ; i < MAX_FIELDS; i++, curField++) { - curField->location = NULL; + if (objv[objc-2]->typePtr == &tclDictType && objv[objc-2]->bytes == NULL){ + int i, done; + Tcl_DictSearch search; + + /* + * We know the type exactly, so all dict operations will succeed for + * sure. This shortens this code quite a bit. + */ + + Tcl_DictObjSize(interp, objv[objc-2], &mapElemc); + if (mapElemc == 0) { + /* + * Empty charMap, just return whatever string was given. + */ + + Tcl_SetObjResult(interp, objv[objc-1]); + return TCL_OK; + } + + mapElemc *= 2; + mapWithDict = 1; + + /* + * Copy the dictionary out into an array; that's the easiest way to + * adapt this code... + */ + + mapElemv = TclStackAlloc(interp, sizeof(Tcl_Obj *) * mapElemc); + Tcl_DictObjFirst(interp, objv[objc-2], &search, mapElemv+0, + mapElemv+1, &done); + for (i=2 ; i<mapElemc ; i+=2) { + Tcl_DictObjNext(&search, mapElemv+i, mapElemv+i+1, &done); + } + Tcl_DictObjDone(&search); + } else { + if (TclListObjGetElements(interp, objv[objc-2], &mapElemc, + &mapElemv) != TCL_OK) { + return TCL_ERROR; + } + if (mapElemc == 0) { + /* + * empty charMap, just return whatever string was given. + */ + + Tcl_SetObjResult(interp, objv[objc-1]); + return TCL_OK; + } else if (mapElemc & 1) { + /* + * The charMap must be an even number of key/value items. + */ + + Tcl_SetObjResult(interp, + Tcl_NewStringObj("char map list unbalanced", -1)); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "MAP", + "UNBALANCED", NULL); + return TCL_ERROR; + } } /* - * Step 3: + * Take a copy of the source string object if it is the same as the map + * string to cut out nasty sharing crashes. [Bug 1018562] */ - numScanned = sscanf(argv[1], fmtCopy, - fields[0].location, fields[1].location, fields[2].location, - fields[3].location, fields[4].location, fields[5].location, - fields[6].location, fields[7].location, fields[8].location, - fields[9].location, fields[10].location, fields[11].location, - fields[12].location, fields[13].location, fields[14].location, - fields[15].location, fields[16].location, fields[17].location, - fields[18].location, fields[19].location); + if (objv[objc-2] == objv[objc-1]) { + sourceObj = Tcl_DuplicateObj(objv[objc-1]); + copySource = 1; + } else { + sourceObj = objv[objc-1]; + } + ustring1 = Tcl_GetUnicodeFromObj(sourceObj, &length1); + if (length1 == 0) { + /* + * Empty input string, just stop now. + */ + + goto done; + } + end = ustring1 + length1; + + strCmpFn = (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); /* - * Step 4: + * Force result to be Unicode */ - if (numScanned < numFields) { - numFields = numScanned; - } - for (i = 0, curField = fields; i < numFields; i++, curField++) { - switch (curField->fmt) { - char string[TCL_DOUBLE_SPACE]; - - case 'd': - TclFormatInt(string, *((int *) curField->location)); - if (Tcl_SetVar(interp, argv[i+3], string, 0) == NULL) { - storeError: - Tcl_AppendResult(interp, - "couldn't set variable \"", argv[i+3], "\"", - (char *) NULL); - code = TCL_ERROR; - goto done; - } - break; + resultPtr = Tcl_NewUnicodeObj(ustring1, 0); - case 'u': - sprintf(string, "%u", *((int *) curField->location)); - if (Tcl_SetVar(interp, argv[i+3], string, 0) == NULL) { - goto storeError; - } - break; + if (mapElemc == 2) { + /* + * Special case for one map pair which avoids the extra for loop and + * extra calls to get Unicode data. The algorithm is otherwise + * identical to the multi-pair case. This will be >30% faster on + * larger strings. + */ - case 'c': - TclFormatInt(string, *((char *) curField->location) & 0xff); - if (Tcl_SetVar(interp, argv[i+3], string, 0) == NULL) { - goto storeError; - } - break; + int mapLen; + Tcl_UniChar *mapString, u2lc; + + ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2); + p = ustring1; + if ((length2 > length1) || (length2 == 0)) { + /* + * Match string is either longer than input or empty. + */ + + ustring1 = end; + } else { + mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen); + u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0); + for (; ustring1 < end; ustring1++) { + if (((*ustring1 == *ustring2) || + (nocase&&Tcl_UniCharToLower(*ustring1)==u2lc)) && + (length2==1 || strCmpFn(ustring1, ustring2, + (unsigned long) length2) == 0)) { + if (p != ustring1) { + Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p); + p = ustring1 + length2; + } else { + p += length2; + } + ustring1 = p - 1; - case 's': - if (Tcl_SetVar(interp, argv[i+3], curField->location, 0) - == NULL) { - goto storeError; + Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen); } - break; + } + } + } else { + Tcl_UniChar **mapStrings, *u2lc = NULL; + int *mapLens; + + /* + * Precompute pointers to the unicode string and length. This saves us + * repeated function calls later, significantly speeding up the + * algorithm. We only need the lowercase first char in the nocase + * case. + */ - case 'f': - Tcl_PrintDouble((Tcl_Interp *) NULL, - *((double *) curField->location), string); - if (Tcl_SetVar(interp, argv[i+3], string, 0) == NULL) { - goto storeError; + mapStrings = TclStackAlloc(interp, mapElemc*2*sizeof(Tcl_UniChar *)); + mapLens = TclStackAlloc(interp, mapElemc * 2 * sizeof(int)); + if (nocase) { + u2lc = TclStackAlloc(interp, mapElemc * sizeof(Tcl_UniChar)); + } + for (index = 0; index < mapElemc; index++) { + mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index], + mapLens+index); + if (nocase && ((index % 2) == 0)) { + u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]); + } + } + for (p = ustring1; ustring1 < end; ustring1++) { + for (index = 0; index < mapElemc; index += 2) { + /* + * Get the key string to match on. + */ + + ustring2 = mapStrings[index]; + length2 = mapLens[index]; + if ((length2 > 0) && ((*ustring1 == *ustring2) || (nocase && + (Tcl_UniCharToLower(*ustring1) == u2lc[index/2]))) && + /* Restrict max compare length. */ + (end-ustring1 >= length2) && ((length2 == 1) || + !strCmpFn(ustring2, ustring1, (unsigned) length2))) { + if (p != ustring1) { + /* + * Put the skipped chars onto the result first. + */ + + Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p); + p = ustring1 + length2; + } else { + p += length2; + } + + /* + * Adjust len to be full length of matched string. + */ + + ustring1 = p - 1; + + /* + * Append the map value to the unicode string. + */ + + Tcl_AppendUnicodeToObj(resultPtr, + mapStrings[index+1], mapLens[index+1]); + break; } - break; + } + } + if (nocase) { + TclStackFree(interp, u2lc); } + TclStackFree(interp, mapLens); + TclStackFree(interp, mapStrings); + } + if (p != ustring1) { + /* + * Put the rest of the unmapped chars onto result. + */ + + Tcl_AppendUnicodeToObj(resultPtr, p, ustring1 - p); } - TclFormatInt(buf, numScanned); - Tcl_SetResult(interp, buf, TCL_VOLATILE); - done: - if (results != NULL) { - ckfree(results); + Tcl_SetObjResult(interp, resultPtr); + done: + if (mapWithDict) { + TclStackFree(interp, mapElemv); } - if (fmtCopy != copyBuf) { - ckfree(fmtCopy); + if (copySource) { + Tcl_DecrRefCount(sourceObj); } - return code; + return TCL_OK; } /* *---------------------------------------------------------------------- * - * Tcl_SourceObjCmd -- + * StringMatchCmd -- * - * This procedure is invoked to process the "source" Tcl command. - * See the user documentation for details on what it does. + * This procedure is invoked to process the "string match" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. * * Results: - * A standard Tcl object result. + * A standard Tcl result. * * Side effects: * See the user documentation. @@ -925,38 +2110,104 @@ Tcl_ScanCmd(dummy, interp, argc, argv) *---------------------------------------------------------------------- */ - /* ARGSUSED */ -int -Tcl_SourceObjCmd(dummy, interp, objc, objv) - ClientData dummy; /* Not used. */ - Tcl_Interp *interp; /* Current interpreter. */ - int objc; /* Number of arguments. */ - Tcl_Obj *CONST objv[]; /* Argument objects. */ +static int +StringMatchCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ { - char *bytes; - int result; - - if (objc != 2) { - Tcl_WrongNumArgs(interp, 1, objv, "fileName"); + int nocase = 0; + + if (objc < 3 || objc > 4) { + Tcl_WrongNumArgs(interp, 1, objv, "?-nocase? pattern string"); + return TCL_ERROR; + } + + if (objc == 4) { + int length; + const char *string = TclGetStringFromObj(objv[1], &length); + + if ((length > 1) && + strncmp(string, "-nocase", (size_t) length) == 0) { + nocase = TCL_MATCH_NOCASE; + } else { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "bad option \"%s\": must be -nocase", string)); + Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "INDEX", "option", + string, NULL); + return TCL_ERROR; + } + } + Tcl_SetObjResult(interp, Tcl_NewBooleanObj( + TclStringMatchObj(objv[objc-1], objv[objc-2], nocase))); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringRangeCmd -- + * + * This procedure is invoked to process the "string range" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringRangeCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int length, first, last; + + if (objc != 4) { + Tcl_WrongNumArgs(interp, 1, objv, "string first last"); return TCL_ERROR; } /* - * THIS FAILS IF THE OBJECT'S STRING REP CONTAINS A NULL. + * Get the length in actual characters; Then reduce it by one because + * 'end' refers to the last character, not one past it. */ - bytes = Tcl_GetStringFromObj(objv[1], (int *) NULL); - result = Tcl_EvalFile(interp, bytes); - return result; + length = Tcl_GetCharLength(objv[1]) - 1; + + if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK || + TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK) { + return TCL_ERROR; + } + + if (first < 0) { + first = 0; + } + if (last >= length) { + last = length; + } + if (last >= first) { + Tcl_SetObjResult(interp, Tcl_GetRange(objv[1], first, last)); + } + return TCL_OK; } /* *---------------------------------------------------------------------- * - * Tcl_SplitObjCmd -- + * StringReptCmd -- * - * This procedure is invoked to process the "split" Tcl command. - * See the user documentation for details on what it does. + * This procedure is invoked to process the "string repeat" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. * * Results: * A standard Tcl result. @@ -967,75 +2218,108 @@ Tcl_SourceObjCmd(dummy, interp, objc, objv) *---------------------------------------------------------------------- */ - /* ARGSUSED */ -int -Tcl_SplitObjCmd(dummy, interp, objc, objv) - ClientData dummy; /* Not used. */ - Tcl_Interp *interp; /* Current interpreter. */ - int objc; /* Number of arguments. */ - Tcl_Obj *CONST objv[]; /* Argument objects. */ +static int +StringReptCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ { - register char *p, *p2; - char *splitChars, *string, *elementStart; - int splitCharLen, stringLen, i, j; - Tcl_Obj *listPtr; + const char *string1; + char *string2; + int count, index, length1, length2; + Tcl_Obj *resultPtr; - if (objc == 2) { - splitChars = " \n\t\r"; - splitCharLen = 4; - } else if (objc == 3) { - splitChars = Tcl_GetStringFromObj(objv[2], &splitCharLen); - } else { - Tcl_WrongNumArgs(interp, 1, objv, "string ?splitChars?"); + if (objc != 3) { + Tcl_WrongNumArgs(interp, 1, objv, "string count"); + return TCL_ERROR; + } + + if (TclGetIntFromObj(interp, objv[2], &count) != TCL_OK) { return TCL_ERROR; } - string = Tcl_GetStringFromObj(objv[1], &stringLen); - listPtr = Tcl_NewListObj(0, (Tcl_Obj **) NULL); - /* - * Handle the special case of splitting on every character. + * Check for cases that allow us to skip copying stuff. */ - if (splitCharLen == 0) { - for (i = 0, p = string; i < stringLen; i++, p++) { - Tcl_ListObjAppendElement(interp, listPtr, - Tcl_NewStringObj(p, 1)); - } - } else { + if (count == 1) { + Tcl_SetObjResult(interp, objv[1]); + goto done; + } else if (count < 1) { + goto done; + } + string1 = TclGetStringFromObj(objv[1], &length1); + if (length1 <= 0) { + goto done; + } + + /* + * Only build up a string that has data. Instead of building it up with + * repeated appends, we just allocate the necessary space once and copy + * the string value in. + * + * We have to worry about overflow [Bugs 714106, 2561746]. + * At this point we know 1 <= length1 <= INT_MAX and 2 <= count <= INT_MAX. + * We need to keep 2 <= length2 <= INT_MAX. + */ + + if (count > INT_MAX/length1) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "result exceeds max size for a Tcl value (%d bytes)", + INT_MAX)); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + return TCL_ERROR; + } + length2 = length1 * count; + + /* + * Include space for the NUL. + */ + + string2 = attemptckalloc((unsigned) length2 + 1); + if (string2 == NULL) { /* - * Normal case: split on any of a given set of characters. - * Discard instances of the split characters. + * Alloc failed. Note that in this case we try to do an error message + * since this is a case that's most likely when the alloc is large and + * that's easy to do with this API. Note that if we fail allocating a + * short string, this will likely keel over too (and fatally). */ - for (i = 0, p = elementStart = string; i < stringLen; i++, p++) { - for (j = 0, p2 = splitChars; j < splitCharLen; j++, p2++) { - if (*p2 == *p) { - Tcl_ListObjAppendElement(interp, listPtr, - Tcl_NewStringObj(elementStart, (p-elementStart))); - elementStart = p+1; - break; - } - } - } - if (p != string) { - int remainingChars = stringLen - (elementStart-string); - Tcl_ListObjAppendElement(interp, listPtr, - Tcl_NewStringObj(elementStart, remainingChars)); - } + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "string size overflow, out of memory allocating %u bytes", + length2 + 1)); + Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); + return TCL_ERROR; + } + for (index = 0; index < count; index++) { + memcpy(string2 + (length1 * index), string1, (size_t) length1); } + string2[length2] = '\0'; - Tcl_SetObjResult(interp, listPtr); + /* + * We have to directly assign this instead of using Tcl_SetStringObj (and + * indirectly TclInitStringRep) because that makes another copy of the + * data. + */ + + TclNewObj(resultPtr); + resultPtr->bytes = string2; + resultPtr->length = length2; + Tcl_SetObjResult(interp, resultPtr); + + done: return TCL_OK; } /* *---------------------------------------------------------------------- * - * Tcl_StringObjCmd -- + * StringRplcCmd -- * - * This procedure is invoked to process the "string" Tcl command. - * See the user documentation for details on what it does. + * This procedure is invoked to process the "string replace" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. * * Results: * A standard Tcl result. @@ -1046,383 +2330,674 @@ Tcl_SplitObjCmd(dummy, interp, objc, objv) *---------------------------------------------------------------------- */ - /* ARGSUSED */ -int -Tcl_StringObjCmd(dummy, interp, objc, objv) - ClientData dummy; /* Not used. */ - Tcl_Interp *interp; /* Current interpreter. */ - int objc; /* Number of arguments. */ - Tcl_Obj *CONST objv[]; /* Argument objects. */ +static int +StringRplcCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ { - int index, left, right; - Tcl_Obj *resultPtr; - char *string1, *string2; - int length1, length2; - static char *options[] = { - "compare", "first", "index", "last", - "length", "match", "range", "tolower", - "toupper", "trim", "trimleft", "trimright", - "wordend", "wordstart", NULL - }; - enum options { - STR_COMPARE, STR_FIRST, STR_INDEX, STR_LAST, - STR_LENGTH, STR_MATCH, STR_RANGE, STR_TOLOWER, - STR_TOUPPER, STR_TRIM, STR_TRIMLEFT, STR_TRIMRIGHT, - STR_WORDEND, STR_WORDSTART - }; - - if (objc < 2) { - Tcl_WrongNumArgs(interp, 1, objv, "option arg ?arg ...?"); + Tcl_UniChar *ustring; + int first, last, length; + + if (objc < 4 || objc > 5) { + Tcl_WrongNumArgs(interp, 1, objv, "string first last ?string?"); return TCL_ERROR; } - - if (Tcl_GetIndexFromObj(interp, objv[1], options, "option", 0, - &index) != TCL_OK) { + + ustring = Tcl_GetUnicodeFromObj(objv[1], &length); + length--; + + if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK || + TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK){ return TCL_ERROR; } - resultPtr = Tcl_GetObjResult(interp); - switch ((enum options) index) { - case STR_COMPARE: { - int match, length; + if ((last < first) || (last < 0) || (first > length)) { + Tcl_SetObjResult(interp, objv[1]); + } else { + Tcl_Obj *resultPtr; - if (objc != 4) { - Tcl_WrongNumArgs(interp, 2, objv, "string1 string2"); - return TCL_ERROR; - } + ustring = Tcl_GetUnicodeFromObj(objv[1], &length); + length--; - string1 = Tcl_GetStringFromObj(objv[2], &length1); - string2 = Tcl_GetStringFromObj(objv[3], &length2); + if (first < 0) { + first = 0; + } - length = (length1 < length2) ? length1 : length2; - match = memcmp(string1, string2, (unsigned) length); - if (match == 0) { - match = length1 - length2; - } - Tcl_SetIntObj(resultPtr, (match > 0) ? 1 : (match < 0) ? -1 : 0); - break; + resultPtr = Tcl_NewUnicodeObj(ustring, first); + if (objc == 5) { + Tcl_AppendObjToObj(resultPtr, objv[4]); + } + if (last < length) { + Tcl_AppendUnicodeToObj(resultPtr, ustring + last + 1, + length - last); } - case STR_FIRST: { - register char *p, *end; - int match; + Tcl_SetObjResult(interp, resultPtr); + } + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringRevCmd -- + * + * This procedure is invoked to process the "string reverse" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ - if (objc != 4) { - badFirstLastArgs: - Tcl_WrongNumArgs(interp, 2, objv, "string1 string2"); - return TCL_ERROR; - } +static int +StringRevCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, "string"); + return TCL_ERROR; + } - match = -1; - string1 = Tcl_GetStringFromObj(objv[2], &length1); - string2 = Tcl_GetStringFromObj(objv[3], &length2); - if (length1 > 0) { - end = string2 + length2 - length1 + 1; - for (p = string2; p < end; p++) { - /* - * Scan forward to find the first character. - */ - - p = memchr(p, *string1, (unsigned) (end - p)); - if (p == NULL) { - break; - } - if (memcmp(string1, p, (unsigned) length1) == 0) { - match = p - string2; - break; - } - } - } - Tcl_SetIntObj(resultPtr, match); - break; - } - case STR_INDEX: { - int index; + Tcl_SetObjResult(interp, TclStringObjReverse(objv[1])); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringStartCmd -- + * + * This procedure is invoked to process the "string wordstart" Tcl + * command. See the user documentation for details on what it does. Note + * that this command only functions correctly on properly formed Tcl UTF + * strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ - if (objc != 4) { - Tcl_WrongNumArgs(interp, 2, objv, "string charIndex"); - return TCL_ERROR; - } +static int +StringStartCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + Tcl_UniChar ch; + const char *p, *string; + int cur, index, length, numChars; - string1 = Tcl_GetStringFromObj(objv[2], &length1); - if (Tcl_GetIntFromObj(interp, objv[3], &index) != TCL_OK) { - return TCL_ERROR; - } - if ((index >= 0) && (index < length1)) { - Tcl_SetStringObj(resultPtr, string1 + index, 1); + if (objc != 3) { + Tcl_WrongNumArgs(interp, 1, objv, "string index"); + return TCL_ERROR; + } + + string = TclGetStringFromObj(objv[1], &length); + numChars = Tcl_NumUtfChars(string, length); + if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) { + return TCL_ERROR; + } + string = TclGetStringFromObj(objv[1], &length); + if (index >= numChars) { + index = numChars - 1; + } + cur = 0; + if (index > 0) { + p = Tcl_UtfAtIndex(string, index); + for (cur = index; cur >= 0; cur--) { + TclUtfToUniChar(p, &ch); + if (!Tcl_UniCharIsWordChar(ch)) { + break; } - break; + p = Tcl_UtfPrev(p, string); + } + if (cur != index) { + cur += 1; } - case STR_LAST: { - register char *p; - int match; + } + Tcl_SetObjResult(interp, Tcl_NewIntObj(cur)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringEndCmd -- + * + * This procedure is invoked to process the "string wordend" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ - if (objc != 4) { - goto badFirstLastArgs; - } +static int +StringEndCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + Tcl_UniChar ch; + const char *p, *end, *string; + int cur, index, length, numChars; - match = -1; - string1 = Tcl_GetStringFromObj(objv[2], &length1); - string2 = Tcl_GetStringFromObj(objv[3], &length2); - if (length1 > 0) { - for (p = string2 + length2 - length1; p >= string2; p--) { - /* - * Scan backwards to find the first character. - */ - - while ((p != string2) && (*p != *string1)) { - p--; - } - if (memcmp(string1, p, (unsigned) length1) == 0) { - match = p - string2; - break; - } - } + if (objc != 3) { + Tcl_WrongNumArgs(interp, 1, objv, "string index"); + return TCL_ERROR; + } + + string = TclGetStringFromObj(objv[1], &length); + numChars = Tcl_NumUtfChars(string, length); + if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) { + return TCL_ERROR; + } + string = TclGetStringFromObj(objv[1], &length); + if (index < 0) { + index = 0; + } + if (index < numChars) { + p = Tcl_UtfAtIndex(string, index); + end = string+length; + for (cur = index; p < end; cur++) { + p += TclUtfToUniChar(p, &ch); + if (!Tcl_UniCharIsWordChar(ch)) { + break; } - Tcl_SetIntObj(resultPtr, match); - break; } - case STR_LENGTH: { - if (objc != 3) { - Tcl_WrongNumArgs(interp, 2, objv, "string"); - return TCL_ERROR; - } - - (void) Tcl_GetStringFromObj(objv[2], &length1); - Tcl_SetIntObj(resultPtr, length1); - break; + if (cur == index) { + cur++; } - case STR_MATCH: { - if (objc != 4) { - Tcl_WrongNumArgs(interp, 2, objv, "pattern string"); - return TCL_ERROR; - } + } else { + cur = numChars; + } + Tcl_SetObjResult(interp, Tcl_NewIntObj(cur)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringEqualCmd -- + * + * This procedure is invoked to process the "string equal" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ - string1 = Tcl_GetStringFromObj(objv[2], &length1); - string2 = Tcl_GetStringFromObj(objv[3], &length2); - Tcl_SetBooleanObj(resultPtr, Tcl_StringMatch(string2, string1)); - break; - } - case STR_RANGE: { - int first, last; +static int +StringEqualCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + /* + * Remember to keep code here in some sync with the byte-compiled versions + * in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as + * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...). + */ - if (objc != 5) { - Tcl_WrongNumArgs(interp, 2, objv, "string first last"); - return TCL_ERROR; - } + const char *string1, *string2; + int length1, length2, i, match, length, nocase = 0, reqlength = -1; + typedef int (*strCmpFn_t)(const char *, const char *, unsigned int); + strCmpFn_t strCmpFn; - string1 = Tcl_GetStringFromObj(objv[2], &length1); - if (TclGetIntForIndex(interp, objv[3], length1 - 1, - &first) != TCL_OK) { - return TCL_ERROR; + if (objc < 3 || objc > 6) { + str_cmp_args: + Tcl_WrongNumArgs(interp, 1, objv, + "?-nocase? ?-length int? string1 string2"); + return TCL_ERROR; + } + + for (i = 1; i < objc-2; i++) { + string2 = TclGetStringFromObj(objv[i], &length2); + if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) { + nocase = 1; + } else if ((length2 > 1) + && !strncmp(string2, "-length", (size_t)length2)) { + if (i+1 >= objc-2) { + goto str_cmp_args; } - if (TclGetIntForIndex(interp, objv[4], length1 - 1, - &last) != TCL_OK) { + i++; + if (TclGetIntFromObj(interp, objv[i], &reqlength) != TCL_OK) { return TCL_ERROR; } - if (first < 0) { - first = 0; - } - if (last >= length1 - 1) { - last = length1 - 1; - } - if (last >= first) { - Tcl_SetStringObj(resultPtr, string1 + first, last - first + 1); - } - break; + } else { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "bad option \"%s\": must be -nocase or -length", + string2)); + Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "INDEX", "option", + string2, NULL); + return TCL_ERROR; } - case STR_TOLOWER: { - register char *p, *end; + } - if (objc != 3) { - Tcl_WrongNumArgs(interp, 2, objv, "string"); - return TCL_ERROR; - } + /* + * From now on, we only access the two objects at the end of the argument + * array. + */ - string1 = Tcl_GetStringFromObj(objv[2], &length1); + objv += objc-2; + if ((reqlength == 0) || (objv[0] == objv[1])) { + /* + * Always match at 0 chars of if it is the same obj. + */ + + Tcl_SetObjResult(interp, Tcl_NewBooleanObj(1)); + return TCL_OK; + } + + if (!nocase && TclIsPureByteArray(objv[0]) && + TclIsPureByteArray(objv[1])) { + /* + * Use binary versions of comparisons since that won't cause undue + * type conversions and it is much faster. Only do this if we're + * case-sensitive (which is all that really makes sense with byte + * arrays anyway, and we have no memcasecmp() for some reason... :^) + */ + + string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1); + string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2); + strCmpFn = (strCmpFn_t) memcmp; + } else if ((objv[0]->typePtr == &tclStringType) + && (objv[1]->typePtr == &tclStringType)) { + /* + * Do a unicode-specific comparison if both of the args are of String + * type. In benchmark testing this proved the most efficient check + * between the unicode and string comparison operations. + */ + + string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1); + string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2); + strCmpFn = (strCmpFn_t) + (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); + } else { + /* + * As a catch-all we will work with UTF-8. We cannot use memcmp() as + * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's + * utf rep). We can use the more efficient TclpUtfNcmp2 if we are + * case-sensitive and no specific length was requested. + */ + + string1 = (char *) TclGetStringFromObj(objv[0], &length1); + string2 = (char *) TclGetStringFromObj(objv[1], &length2); + if ((reqlength < 0) && !nocase) { + strCmpFn = (strCmpFn_t) TclpUtfNcmp2; + } else { + length1 = Tcl_NumUtfChars(string1, length1); + length2 = Tcl_NumUtfChars(string2, length2); + strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); + } + } + + if ((reqlength < 0) && (length1 != length2)) { + match = 1; /* This will be reversed below. */ + } else { + length = (length1 < length2) ? length1 : length2; + if (reqlength > 0 && reqlength < length) { + length = reqlength; + } else if (reqlength < 0) { /* - * Since I know resultPtr is not a shared object, I can reach - * in and diddle the bytes in its string rep to convert them in - * place to lower case. + * The requested length is negative, so we ignore it by setting it + * to length + 1 so we correct the match var. */ - Tcl_SetStringObj(resultPtr, string1, length1); - string1 = Tcl_GetStringFromObj(resultPtr, &length1); - end = string1 + length1; - for (p = string1; p < end; p++) { - if (isupper(UCHAR(*p))) { - *p = (char) tolower(UCHAR(*p)); - } - } - break; + reqlength = length + 1; + } + + match = strCmpFn(string1, string2, (unsigned) length); + if ((match == 0) && (reqlength > length)) { + match = length1 - length2; } - case STR_TOUPPER: { - register char *p, *end; + } - if (objc != 3) { - Tcl_WrongNumArgs(interp, 2, objv, "string"); + Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringCmpCmd -- + * + * This procedure is invoked to process the "string compare" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringCmpCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + /* + * Remember to keep code here in some sync with the byte-compiled versions + * in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as + * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...). + */ + + const char *string1, *string2; + int length1, length2, i, match, length, nocase = 0, reqlength = -1; + typedef int (*strCmpFn_t)(const char *, const char *, unsigned int); + strCmpFn_t strCmpFn; + + if (objc < 3 || objc > 6) { + str_cmp_args: + Tcl_WrongNumArgs(interp, 1, objv, + "?-nocase? ?-length int? string1 string2"); + return TCL_ERROR; + } + + for (i = 1; i < objc-2; i++) { + string2 = TclGetStringFromObj(objv[i], &length2); + if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) { + nocase = 1; + } else if ((length2 > 1) + && !strncmp(string2, "-length", (size_t)length2)) { + if (i+1 >= objc-2) { + goto str_cmp_args; + } + i++; + if (TclGetIntFromObj(interp, objv[i], &reqlength) != TCL_OK) { return TCL_ERROR; } + } else { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "bad option \"%s\": must be -nocase or -length", + string2)); + Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "INDEX", "option", + string2, NULL); + return TCL_ERROR; + } + } + + /* + * From now on, we only access the two objects at the end of the argument + * array. + */ - string1 = Tcl_GetStringFromObj(objv[2], &length1); + objv += objc-2; - /* - * Since I know resultPtr is not a shared object, I can reach - * in and diddle the bytes in its string rep to convert them in - * place to upper case. - */ + if ((reqlength == 0) || (objv[0] == objv[1])) { + /* + * Always match at 0 chars of if it is the same obj. + */ - Tcl_SetStringObj(resultPtr, string1, length1); - string1 = Tcl_GetStringFromObj(resultPtr, &length1); - end = string1 + length1; - for (p = string1; p < end; p++) { - if (islower(UCHAR(*p))) { - *p = (char) toupper(UCHAR(*p)); - } - } - break; + Tcl_SetObjResult(interp, Tcl_NewBooleanObj(0)); + return TCL_OK; + } + + if (!nocase && TclIsPureByteArray(objv[0]) && + TclIsPureByteArray(objv[1])) { + /* + * Use binary versions of comparisons since that won't cause undue + * type conversions and it is much faster. Only do this if we're + * case-sensitive (which is all that really makes sense with byte + * arrays anyway, and we have no memcasecmp() for some reason... :^) + */ + + string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1); + string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2); + strCmpFn = (strCmpFn_t) memcmp; + } else if ((objv[0]->typePtr == &tclStringType) + && (objv[1]->typePtr == &tclStringType)) { + /* + * Do a unicode-specific comparison if both of the args are of String + * type. In benchmark testing this proved the most efficient check + * between the unicode and string comparison operations. + */ + + string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1); + string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2); + strCmpFn = (strCmpFn_t) + (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); + } else { + /* + * As a catch-all we will work with UTF-8. We cannot use memcmp() as + * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's + * utf rep). We can use the more efficient TclpUtfNcmp2 if we are + * case-sensitive and no specific length was requested. + */ + + string1 = (char *) TclGetStringFromObj(objv[0], &length1); + string2 = (char *) TclGetStringFromObj(objv[1], &length2); + if ((reqlength < 0) && !nocase) { + strCmpFn = (strCmpFn_t) TclpUtfNcmp2; + } else { + length1 = Tcl_NumUtfChars(string1, length1); + length2 = Tcl_NumUtfChars(string2, length2); + strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); } - case STR_TRIM: { - char ch; - register char *p, *end; - char *check, *checkEnd; + } - left = 1; - right = 1; + length = (length1 < length2) ? length1 : length2; + if (reqlength > 0 && reqlength < length) { + length = reqlength; + } else if (reqlength < 0) { + /* + * The requested length is negative, so we ignore it by setting it to + * length + 1 so we correct the match var. + */ - trim: - if (objc == 4) { - string2 = Tcl_GetStringFromObj(objv[3], &length2); - } else if (objc == 3) { - string2 = " \t\n\r"; - length2 = strlen(string2); - } else { - Tcl_WrongNumArgs(interp, 2, objv, "string ?chars?"); - return TCL_ERROR; - } - string1 = Tcl_GetStringFromObj(objv[2], &length1); - checkEnd = string2 + length2; - - if (left) { - end = string1 + length1; - for (p = string1; p < end; p++) { - ch = *p; - for (check = string2; ; check++) { - if (check >= checkEnd) { - p = end; - break; - } - if (ch == *check) { - length1--; - string1++; - break; - } - } - } - } - if (right) { - end = string1; - for (p = string1 + length1; p > end; ) { - p--; - ch = *p; - for (check = string2; ; check++) { - if (check >= checkEnd) { - p = end; - break; - } - if (ch == *check) { - length1--; - break; - } - } - } - } - Tcl_SetStringObj(resultPtr, string1, length1); - break; + reqlength = length + 1; + } + + match = strCmpFn(string1, string2, (unsigned) length); + if ((match == 0) && (reqlength > length)) { + match = length1 - length2; + } + + Tcl_SetObjResult(interp, + Tcl_NewIntObj((match > 0) ? 1 : (match < 0) ? -1 : 0)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringBytesCmd -- + * + * This procedure is invoked to process the "string bytelength" Tcl + * command. See the user documentation for details on what it does. Note + * that this command only functions correctly on properly formed Tcl UTF + * strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringBytesCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int length; + + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, "string"); + return TCL_ERROR; + } + + (void) TclGetStringFromObj(objv[1], &length); + Tcl_SetObjResult(interp, Tcl_NewIntObj(length)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringLenCmd -- + * + * This procedure is invoked to process the "string length" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringLenCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + if (objc != 2) { + Tcl_WrongNumArgs(interp, 1, objv, "string"); + return TCL_ERROR; + } + + Tcl_SetObjResult(interp, Tcl_NewIntObj(Tcl_GetCharLength(objv[1]))); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringLowerCmd -- + * + * This procedure is invoked to process the "string tolower" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringLowerCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int length1, length2; + const char *string1; + char *string2; + + if (objc < 2 || objc > 4) { + Tcl_WrongNumArgs(interp, 1, objv, "string ?first? ?last?"); + return TCL_ERROR; + } + + string1 = TclGetStringFromObj(objv[1], &length1); + + if (objc == 2) { + Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); + + length1 = Tcl_UtfToLower(TclGetString(resultPtr)); + Tcl_SetObjLength(resultPtr, length1); + Tcl_SetObjResult(interp, resultPtr); + } else { + int first, last; + const char *start, *end; + Tcl_Obj *resultPtr; + + length1 = Tcl_NumUtfChars(string1, length1) - 1; + if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) { + return TCL_ERROR; } - case STR_TRIMLEFT: { - left = 1; - right = 0; - goto trim; - } - case STR_TRIMRIGHT: { - left = 0; - right = 1; - goto trim; - } - case STR_WORDEND: { - int cur, c; - - if (objc != 4) { - Tcl_WrongNumArgs(interp, 2, objv, "string index"); - return TCL_ERROR; - } + if (first < 0) { + first = 0; + } + last = first; - string1 = Tcl_GetStringFromObj(objv[2], &length1); - if (Tcl_GetIntFromObj(interp, objv[3], &index) != TCL_OK) { - return TCL_ERROR; - } - if (index < 0) { - index = 0; - } - cur = length1; - if (index < length1) { - for (cur = index; cur < length1; cur++) { - c = UCHAR(string1[cur]); - if (!isalnum(c) && (c != '_')) { - break; - } - } - if (cur == index) { - cur = index + 1; - } - } - Tcl_SetIntObj(resultPtr, cur); - break; + if ((objc == 4) && (TclGetIntForIndexM(interp, objv[3], length1, + &last) != TCL_OK)) { + return TCL_ERROR; } - case STR_WORDSTART: { - int cur, c; - - if (objc != 4) { - Tcl_WrongNumArgs(interp, 2, objv, "string index"); - return TCL_ERROR; - } - string1 = Tcl_GetStringFromObj(objv[2], &length1); - if (Tcl_GetIntFromObj(interp, objv[3], &index) != TCL_OK) { - return TCL_ERROR; - } - if (index >= length1) { - index = length1 - 1; - } - cur = 0; - if (index > 0) { - for (cur = index; cur >= 0; cur--) { - c = UCHAR(string1[cur]); - if (!isalnum(c) && (c != '_')) { - break; - } - } - if (cur != index) { - cur += 1; - } - } - Tcl_SetIntObj(resultPtr, cur); - break; + if (last >= length1) { + last = length1; + } + if (last < first) { + Tcl_SetObjResult(interp, objv[1]); + return TCL_OK; } + + string1 = TclGetStringFromObj(objv[1], &length1); + start = Tcl_UtfAtIndex(string1, first); + end = Tcl_UtfAtIndex(start, last - first + 1); + resultPtr = Tcl_NewStringObj(string1, end - string1); + string2 = TclGetString(resultPtr) + (start - string1); + + length2 = Tcl_UtfToLower(string2); + Tcl_SetObjLength(resultPtr, length2 + (start - string1)); + + Tcl_AppendToObj(resultPtr, end, -1); + Tcl_SetObjResult(interp, resultPtr); } + return TCL_OK; } /* *---------------------------------------------------------------------- * - * Tcl_SubstCmd -- + * StringUpperCmd -- * - * This procedure is invoked to process the "subst" Tcl command. - * See the user documentation for details on what it does. This - * command is an almost direct copy of an implementation by - * Andrew Payne. + * This procedure is invoked to process the "string toupper" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. * * Results: * A standard Tcl result. @@ -1433,130 +3008,445 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) *---------------------------------------------------------------------- */ - /* ARGSUSED */ -int -Tcl_SubstCmd(dummy, interp, argc, argv) - ClientData dummy; /* Not used. */ - Tcl_Interp *interp; /* Current interpreter. */ - int argc; /* Number of arguments. */ - char **argv; /* Argument strings. */ +static int +StringUpperCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ { - Interp *iPtr = (Interp *) interp; - Tcl_DString result; - char *p, *old, *value; - int code, count, doVars, doCmds, doBackslashes, i; - size_t length; - char c; + int length1, length2; + const char *string1; + char *string2; - /* - * Parse command-line options. - */ + if (objc < 2 || objc > 4) { + Tcl_WrongNumArgs(interp, 1, objv, "string ?first? ?last?"); + return TCL_ERROR; + } - doVars = doCmds = doBackslashes = 1; - for (i = 1; i < (argc-1); i++) { - p = argv[i]; - if (*p != '-') { - break; + string1 = TclGetStringFromObj(objv[1], &length1); + + if (objc == 2) { + Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); + + length1 = Tcl_UtfToUpper(TclGetString(resultPtr)); + Tcl_SetObjLength(resultPtr, length1); + Tcl_SetObjResult(interp, resultPtr); + } else { + int first, last; + const char *start, *end; + Tcl_Obj *resultPtr; + + length1 = Tcl_NumUtfChars(string1, length1) - 1; + if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) { + return TCL_ERROR; + } + if (first < 0) { + first = 0; } - length = strlen(p); - if (length < 4) { - badSwitch: - Tcl_AppendResult(interp, "bad switch \"", p, - "\": must be -nobackslashes, -nocommands, ", - "or -novariables", (char *) NULL); + last = first; + + if ((objc == 4) && (TclGetIntForIndexM(interp, objv[3], length1, + &last) != TCL_OK)) { return TCL_ERROR; } - if ((p[3] == 'b') && (strncmp(p, "-nobackslashes", length) == 0)) { - doBackslashes = 0; - } else if ((p[3] == 'c') && (strncmp(p, "-nocommands", length) == 0)) { - doCmds = 0; - } else if ((p[3] == 'v') && (strncmp(p, "-novariables", length) == 0)) { - doVars = 0; - } else { - goto badSwitch; + + if (last >= length1) { + last = length1; } + if (last < first) { + Tcl_SetObjResult(interp, objv[1]); + return TCL_OK; + } + + string1 = TclGetStringFromObj(objv[1], &length1); + start = Tcl_UtfAtIndex(string1, first); + end = Tcl_UtfAtIndex(start, last - first + 1); + resultPtr = Tcl_NewStringObj(string1, end - string1); + string2 = TclGetString(resultPtr) + (start - string1); + + length2 = Tcl_UtfToUpper(string2); + Tcl_SetObjLength(resultPtr, length2 + (start - string1)); + + Tcl_AppendToObj(resultPtr, end, -1); + Tcl_SetObjResult(interp, resultPtr); } - if (i != (argc-1)) { - Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0], - " ?-nobackslashes? ?-nocommands? ?-novariables? string\"", - (char *) NULL); + + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringTitleCmd -- + * + * This procedure is invoked to process the "string totitle" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringTitleCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int length1, length2; + const char *string1; + char *string2; + + if (objc < 2 || objc > 4) { + Tcl_WrongNumArgs(interp, 1, objv, "string ?first? ?last?"); return TCL_ERROR; } - /* - * Scan through the string one character at a time, performing - * command, variable, and backslash substitutions. - */ + string1 = TclGetStringFromObj(objv[1], &length1); - Tcl_DStringInit(&result); - old = p = argv[i]; - while (*p != 0) { - switch (*p) { - case '\\': - if (doBackslashes) { - if (p != old) { - Tcl_DStringAppend(&result, old, p-old); - } - c = Tcl_Backslash(p, &count); - Tcl_DStringAppend(&result, &c, 1); - p += count; - old = p; - } else { - p++; - } - break; + if (objc == 2) { + Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); - case '$': - if (doVars) { - if (p != old) { - Tcl_DStringAppend(&result, old, p-old); - } - value = Tcl_ParseVar(interp, p, &p); - if (value == NULL) { - Tcl_DStringFree(&result); - return TCL_ERROR; - } - Tcl_DStringAppend(&result, value, -1); - old = p; - } else { - p++; - } - break; + length1 = Tcl_UtfToTitle(TclGetString(resultPtr)); + Tcl_SetObjLength(resultPtr, length1); + Tcl_SetObjResult(interp, resultPtr); + } else { + int first, last; + const char *start, *end; + Tcl_Obj *resultPtr; - case '[': - if (doCmds) { - if (p != old) { - Tcl_DStringAppend(&result, old, p-old); - } - iPtr->evalFlags = TCL_BRACKET_TERM; - code = Tcl_Eval(interp, p+1); - if (code == TCL_ERROR) { - Tcl_DStringFree(&result); - return code; - } - old = p = (p+1 + iPtr->termOffset+1); - Tcl_DStringAppend(&result, iPtr->result, -1); - Tcl_ResetResult(interp); - } else { - p++; - } - break; + length1 = Tcl_NumUtfChars(string1, length1) - 1; + if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) { + return TCL_ERROR; + } + if (first < 0) { + first = 0; + } + last = first; - default: - p++; - break; + if ((objc == 4) && (TclGetIntForIndexM(interp, objv[3], length1, + &last) != TCL_OK)) { + return TCL_ERROR; + } + + if (last >= length1) { + last = length1; + } + if (last < first) { + Tcl_SetObjResult(interp, objv[1]); + return TCL_OK; } + + string1 = TclGetStringFromObj(objv[1], &length1); + start = Tcl_UtfAtIndex(string1, first); + end = Tcl_UtfAtIndex(start, last - first + 1); + resultPtr = Tcl_NewStringObj(string1, end - string1); + string2 = TclGetString(resultPtr) + (start - string1); + + length2 = Tcl_UtfToTitle(string2); + Tcl_SetObjLength(resultPtr, length2 + (start - string1)); + + Tcl_AppendToObj(resultPtr, end, -1); + Tcl_SetObjResult(interp, resultPtr); + } + + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringTrimCmd -- + * + * This procedure is invoked to process the "string trim" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringTrimCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + const char *string1, *string2; + int triml, trimr, length1, length2; + + if (objc == 3) { + string2 = TclGetStringFromObj(objv[2], &length2); + } else if (objc == 2) { + string2 = tclDefaultTrimSet; + length2 = strlen(tclDefaultTrimSet); + } else { + Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?"); + return TCL_ERROR; + } + string1 = TclGetStringFromObj(objv[1], &length1); + + triml = TclTrimLeft(string1, length1, string2, length2); + trimr = TclTrimRight(string1 + triml, length1 - triml, string2, length2); + + Tcl_SetObjResult(interp, + Tcl_NewStringObj(string1 + triml, length1 - triml - trimr)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringTrimLCmd -- + * + * This procedure is invoked to process the "string trimleft" Tcl + * command. See the user documentation for details on what it does. Note + * that this command only functions correctly on properly formed Tcl UTF + * strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringTrimLCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + const char *string1, *string2; + int trim, length1, length2; + + if (objc == 3) { + string2 = TclGetStringFromObj(objv[2], &length2); + } else if (objc == 2) { + string2 = tclDefaultTrimSet; + length2 = strlen(tclDefaultTrimSet); + } else { + Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?"); + return TCL_ERROR; } - if (p != old) { - Tcl_DStringAppend(&result, old, p-old); + string1 = TclGetStringFromObj(objv[1], &length1); + + trim = TclTrimLeft(string1, length1, string2, length2); + + Tcl_SetObjResult(interp, Tcl_NewStringObj(string1+trim, length1-trim)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringTrimRCmd -- + * + * This procedure is invoked to process the "string trimright" Tcl + * command. See the user documentation for details on what it does. Note + * that this command only functions correctly on properly formed Tcl UTF + * strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringTrimRCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + const char *string1, *string2; + int trim, length1, length2; + + if (objc == 3) { + string2 = TclGetStringFromObj(objv[2], &length2); + } else if (objc == 2) { + string2 = tclDefaultTrimSet; + length2 = strlen(tclDefaultTrimSet); + } else { + Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?"); + return TCL_ERROR; } - Tcl_DStringResult(interp, &result); + string1 = TclGetStringFromObj(objv[1], &length1); + + trim = TclTrimRight(string1, length1, string2, length2); + + Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1-trim)); return TCL_OK; } /* *---------------------------------------------------------------------- * + * TclInitStringCmd -- + * + * This procedure creates the "string" Tcl command. See the user + * documentation for details on what it does. Note that this command only + * functions correctly on properly formed Tcl UTF strings. + * + * Also note that the primary methods here (equal, compare, match, ...) + * have bytecode equivalents. You will find the code for those in + * tclExecute.c. The code here will only be used in the non-bc case (like + * in an 'eval'). + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +Tcl_Command +TclInitStringCmd( + Tcl_Interp *interp) /* Current interpreter. */ +{ + static const EnsembleImplMap stringImplMap[] = { + {"bytelength", StringBytesCmd, TclCompileBasic1ArgCmd, NULL, NULL, 0}, + {"compare", StringCmpCmd, TclCompileStringCmpCmd, NULL, NULL, 0}, + {"equal", StringEqualCmd, TclCompileStringEqualCmd, NULL, NULL, 0}, + {"first", StringFirstCmd, TclCompileStringFirstCmd, NULL, NULL, 0}, + {"index", StringIndexCmd, TclCompileStringIndexCmd, NULL, NULL, 0}, + {"is", StringIsCmd, TclCompileStringIsCmd, NULL, NULL, 0}, + {"last", StringLastCmd, TclCompileStringLastCmd, NULL, NULL, 0}, + {"length", StringLenCmd, TclCompileStringLenCmd, NULL, NULL, 0}, + {"map", StringMapCmd, TclCompileStringMapCmd, NULL, NULL, 0}, + {"match", StringMatchCmd, TclCompileStringMatchCmd, NULL, NULL, 0}, + {"range", StringRangeCmd, TclCompileStringRangeCmd, NULL, NULL, 0}, + {"repeat", StringReptCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, + {"replace", StringRplcCmd, TclCompileStringReplaceCmd, NULL, NULL, 0}, + {"reverse", StringRevCmd, TclCompileBasic1ArgCmd, NULL, NULL, 0}, + {"tolower", StringLowerCmd, TclCompileStringToLowerCmd, NULL, NULL, 0}, + {"toupper", StringUpperCmd, TclCompileStringToUpperCmd, NULL, NULL, 0}, + {"totitle", StringTitleCmd, TclCompileStringToTitleCmd, NULL, NULL, 0}, + {"trim", StringTrimCmd, TclCompileStringTrimCmd, NULL, NULL, 0}, + {"trimleft", StringTrimLCmd, TclCompileStringTrimLCmd, NULL, NULL, 0}, + {"trimright", StringTrimRCmd, TclCompileStringTrimRCmd, NULL, NULL, 0}, + {"wordend", StringEndCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, + {"wordstart", StringStartCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, + {NULL, NULL, NULL, NULL, NULL, 0} + }; + + return TclMakeEnsemble(interp, "string", stringImplMap); +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_SubstObjCmd -- + * + * This procedure is invoked to process the "subst" Tcl command. See the + * user documentation for details on what it does. This command relies on + * Tcl_SubstObj() for its implementation. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +int +TclSubstOptions( + Tcl_Interp *interp, + int numOpts, + Tcl_Obj *const opts[], + int *flagPtr) +{ + static const char *const substOptions[] = { + "-nobackslashes", "-nocommands", "-novariables", NULL + }; + enum { + SUBST_NOBACKSLASHES, SUBST_NOCOMMANDS, SUBST_NOVARS + }; + int i, flags = TCL_SUBST_ALL; + + for (i = 0; i < numOpts; i++) { + int optionIndex; + + if (Tcl_GetIndexFromObj(interp, opts[i], substOptions, "switch", 0, + &optionIndex) != TCL_OK) { + return TCL_ERROR; + } + switch (optionIndex) { + case SUBST_NOBACKSLASHES: + flags &= ~TCL_SUBST_BACKSLASHES; + break; + case SUBST_NOCOMMANDS: + flags &= ~TCL_SUBST_COMMANDS; + break; + case SUBST_NOVARS: + flags &= ~TCL_SUBST_VARIABLES; + break; + default: + Tcl_Panic("Tcl_SubstObjCmd: bad option index to SubstOptions"); + } + } + *flagPtr = flags; + return TCL_OK; +} + +int +Tcl_SubstObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + return Tcl_NRCallObjProc(interp, TclNRSubstObjCmd, dummy, objc, objv); +} + +int +TclNRSubstObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int flags; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 1, objv, + "?-nobackslashes? ?-nocommands? ?-novariables? string"); + return TCL_ERROR; + } + + if (TclSubstOptions(interp, objc-2, objv+1, &flags) != TCL_OK) { + return TCL_ERROR; + } + return Tcl_NRSubstObj(interp, objv[objc-1], flags); +} + +/* + *---------------------------------------------------------------------- + * * Tcl_SwitchObjCmd -- * * This object-based procedure is invoked to process the "switch" Tcl @@ -1571,185 +3461,582 @@ Tcl_SubstCmd(dummy, interp, argc, argv) *---------------------------------------------------------------------- */ - /* ARGSUSED */ int -Tcl_SwitchObjCmd(dummy, interp, objc, objv) - ClientData dummy; /* Not used. */ - Tcl_Interp *interp; /* Current interpreter. */ - int objc; /* Number of arguments. */ - Tcl_Obj *CONST objv[]; /* Argument objects. */ +Tcl_SwitchObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ { -#define EXACT 0 -#define GLOB 1 -#define REGEXP 2 - int switchObjc, index; - Tcl_Obj *CONST *switchObjv; - Tcl_Obj *patternObj, *bodyObj; - char *string, *pattern, *body; - int splitObjs, length, patternLen, i, code, mode, matched, bodyIdx; - static char *switches[] = - {"-exact", "-glob", "-regexp", "--", (char *) NULL}; - - switchObjc = objc-1; - switchObjv = objv+1; - mode = EXACT; - - while (switchObjc > 0) { - string = Tcl_GetStringFromObj(switchObjv[0], &length); - if (*string != '-') { + return Tcl_NRCallObjProc(interp, TclNRSwitchObjCmd, dummy, objc, objv); +} +int +TclNRSwitchObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int i,j, index, mode, foundmode, splitObjs, numMatchesSaved; + int noCase, patternLength; + const char *pattern; + Tcl_Obj *stringObj, *indexVarObj, *matchVarObj; + Tcl_Obj *const *savedObjv = objv; + Tcl_RegExp regExpr = NULL; + Interp *iPtr = (Interp *) interp; + int pc = 0; + int bidx = 0; /* Index of body argument. */ + Tcl_Obj *blist = NULL; /* List obj which is the body */ + CmdFrame *ctxPtr; /* Copy of the topmost cmdframe, to allow us + * to mess with the line information */ + + /* + * If you add options that make -e and -g not unique prefixes of -exact or + * -glob, you *must* fix TclCompileSwitchCmd's option parser as well. + */ + + static const char *const options[] = { + "-exact", "-glob", "-indexvar", "-matchvar", "-nocase", "-regexp", + "--", NULL + }; + enum options { + OPT_EXACT, OPT_GLOB, OPT_INDEXV, OPT_MATCHV, OPT_NOCASE, OPT_REGEXP, + OPT_LAST + }; + typedef int (*strCmpFn_t)(const char *, const char *); + strCmpFn_t strCmpFn = strcmp; + + mode = OPT_EXACT; + foundmode = 0; + indexVarObj = NULL; + matchVarObj = NULL; + numMatchesSaved = 0; + noCase = 0; + for (i = 1; i < objc-2; i++) { + if (TclGetString(objv[i])[0] != '-') { break; } - if (Tcl_GetIndexFromObj(interp, switchObjv[0], switches, - "option", 0, &index) != TCL_OK) { + if (Tcl_GetIndexFromObj(interp, objv[i], options, "option", 0, + &index) != TCL_OK) { return TCL_ERROR; } - switch (index) { - case 0: /* -exact */ - mode = EXACT; - break; - case 1: /* -glob */ - mode = GLOB; - break; - case 2: /* -regexp */ - mode = REGEXP; - break; - case 3: /* -- */ - switchObjc--; - switchObjv++; - goto doneWithSwitches; + switch ((enum options) index) { + /* + * General options. + */ + + case OPT_LAST: + i++; + goto finishedOptions; + case OPT_NOCASE: + strCmpFn = TclUtfCasecmp; + noCase = 1; + break; + + /* + * Handle the different switch mode options. + */ + + default: + if (foundmode) { + /* + * Mode already set via -exact, -glob, or -regexp. + */ + + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "bad option \"%s\": %s option already found", + TclGetString(objv[i]), options[mode])); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "SWITCH", + "DOUBLEOPT", NULL); + return TCL_ERROR; + } + foundmode = 1; + mode = index; + break; + + /* + * Check for TIP#75 options specifying the variables to write + * regexp information into. + */ + + case OPT_INDEXV: + i++; + if (i >= objc-2) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "missing variable name argument to %s option", + "-indexvar")); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "SWITCH", + "NOVAR", NULL); + return TCL_ERROR; + } + indexVarObj = objv[i]; + numMatchesSaved = -1; + break; + case OPT_MATCHV: + i++; + if (i >= objc-2) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "missing variable name argument to %s option", + "-matchvar")); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "SWITCH", + "NOVAR", NULL); + return TCL_ERROR; + } + matchVarObj = objv[i]; + numMatchesSaved = -1; + break; } - switchObjc--; - switchObjv++; } - doneWithSwitches: - if (switchObjc < 2) { + finishedOptions: + if (objc - i < 2) { Tcl_WrongNumArgs(interp, 1, objv, - "?switches? string pattern body ... ?default body?"); + "?-switch ...? string ?pattern body ...? ?default body?"); + return TCL_ERROR; + } + if (indexVarObj != NULL && mode != OPT_REGEXP) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "%s option requires -regexp option", "-indexvar")); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "SWITCH", + "MODERESTRICTION", NULL); + return TCL_ERROR; + } + if (matchVarObj != NULL && mode != OPT_REGEXP) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "%s option requires -regexp option", "-matchvar")); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "SWITCH", + "MODERESTRICTION", NULL); return TCL_ERROR; } - - string = Tcl_GetStringFromObj(switchObjv[0], &length); - switchObjc--; - switchObjv++; + + stringObj = objv[i]; + objc -= i + 1; + objv += i + 1; + bidx = i + 1; /* First after the match string. */ /* - * If all of the pattern/command pairs are lumped into a single - * argument, split them out again. + * If all of the pattern/command pairs are lumped into a single argument, + * split them out again. + * + * TIP #280: Determine the lines the words in the list start at, based on + * the same data for the list word itself. The cmdFramePtr line + * information is manipulated directly. */ splitObjs = 0; - if (switchObjc == 1) { - code = Tcl_ListObjLength(interp, switchObjv[0], &switchObjc); - if (code != TCL_OK) { - return code; + if (objc == 1) { + Tcl_Obj **listv; + + blist = objv[0]; + if (TclListObjGetElements(interp, objv[0], &objc, &listv) != TCL_OK){ + return TCL_ERROR; + } + + /* + * Ensure that the list is non-empty. + */ + + if (objc < 1) { + Tcl_WrongNumArgs(interp, 1, savedObjv, + "?-switch ...? string {?pattern body ...? ?default body?}"); + return TCL_ERROR; } + objv = listv; splitObjs = 1; } - for (i = 0; i < switchObjc; i += 2) { - if (i == (switchObjc-1)) { - Tcl_ResetResult(interp); - Tcl_AppendToObj(Tcl_GetObjResult(interp), - "extra switch pattern with no body", -1); - code = TCL_ERROR; - goto done; - } + /* + * Complain if there is an odd number of words in the list of patterns and + * bodies. + */ + + if (objc % 2) { + Tcl_ResetResult(interp); + Tcl_SetObjResult(interp, Tcl_NewStringObj( + "extra switch pattern with no body", -1)); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "SWITCH", "BADARM", + NULL); /* - * See if the pattern matches the string. + * Check if this can be due to a badly placed comment in the switch + * block. + * + * The following is an heuristic to detect the infamous "comment in + * switch" error: just check if a pattern begins with '#'. */ if (splitObjs) { - code = Tcl_ListObjIndex(interp, switchObjv[0], i, &patternObj); - if (code != TCL_OK) { - return code; + for (i=0 ; i<objc ; i+=2) { + if (TclGetString(objv[i])[0] == '#') { + Tcl_AppendToObj(Tcl_GetObjResult(interp), + ", this may be due to a comment incorrectly" + " placed outside of a switch body - see the" + " \"switch\" documentation", -1); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "SWITCH", + "BADARM", "COMMENT?", NULL); + break; + } } - pattern = Tcl_GetStringFromObj(patternObj, &patternLen); - } else { - pattern = Tcl_GetStringFromObj(switchObjv[i], &patternLen); } - matched = 0; - if ((*pattern == 'd') && (i == switchObjc-2) + return TCL_ERROR; + } + + /* + * Complain if the last body is a continuation. Note that this check + * assumes that the list is non-empty! + */ + + if (strcmp(TclGetString(objv[objc-1]), "-") == 0) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "no body specified for pattern \"%s\"", + TclGetString(objv[objc-2]))); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "SWITCH", "BADARM", + "FALLTHROUGH", NULL); + return TCL_ERROR; + } + + for (i = 0; i < objc; i += 2) { + /* + * See if the pattern matches the string. + */ + + pattern = TclGetStringFromObj(objv[i], &patternLength); + + if ((i == objc - 2) && (*pattern == 'd') && (strcmp(pattern, "default") == 0)) { - matched = 1; - } else { + Tcl_Obj *emptyObj = NULL; + /* - * THIS FAILS IF AN OBJECT'S STRING REP HAS A NULL. + * If either indexVarObj or matchVarObj are non-NULL, we're in + * REGEXP mode but have reached the default clause anyway. TIP#75 + * specifies that we set the variables to empty lists (== empty + * objects) in that case. */ - switch (mode) { - case EXACT: - matched = (strcmp(string, pattern) == 0); - break; - case GLOB: - matched = Tcl_StringMatch(string, pattern); - break; - case REGEXP: - matched = Tcl_RegExpMatch(interp, string, pattern); - if (matched < 0) { - code = TCL_ERROR; - goto done; - } - break; + + if (indexVarObj != NULL) { + TclNewObj(emptyObj); + if (Tcl_ObjSetVar2(interp, indexVarObj, NULL, emptyObj, + TCL_LEAVE_ERR_MSG) == NULL) { + return TCL_ERROR; + } + } + if (matchVarObj != NULL) { + if (emptyObj == NULL) { + TclNewObj(emptyObj); + } + if (Tcl_ObjSetVar2(interp, matchVarObj, NULL, emptyObj, + TCL_LEAVE_ERR_MSG) == NULL) { + return TCL_ERROR; + } } + goto matchFound; } - if (!matched) { - continue; + + switch (mode) { + case OPT_EXACT: + if (strCmpFn(TclGetString(stringObj), pattern) == 0) { + goto matchFound; + } + break; + case OPT_GLOB: + if (Tcl_StringCaseMatch(TclGetString(stringObj),pattern,noCase)) { + goto matchFound; + } + break; + case OPT_REGEXP: + regExpr = Tcl_GetRegExpFromObj(interp, objv[i], + TCL_REG_ADVANCED | (noCase ? TCL_REG_NOCASE : 0)); + if (regExpr == NULL) { + return TCL_ERROR; + } else { + int matched = Tcl_RegExpExecObj(interp, regExpr, stringObj, 0, + numMatchesSaved, 0); + + if (matched < 0) { + return TCL_ERROR; + } else if (matched) { + goto matchFoundRegexp; + } + } + break; + } + } + return TCL_OK; + + matchFoundRegexp: + /* + * We are operating in REGEXP mode and we need to store information about + * what we matched in some user-nominated arrays. So build the lists of + * values and indices to write here. [TIP#75] + */ + + if (numMatchesSaved) { + Tcl_RegExpInfo info; + Tcl_Obj *matchesObj, *indicesObj = NULL; + + Tcl_RegExpGetInfo(regExpr, &info); + if (matchVarObj != NULL) { + TclNewObj(matchesObj); + } else { + matchesObj = NULL; + } + if (indexVarObj != NULL) { + TclNewObj(indicesObj); } - /* - * We've got a match. Find a body to execute, skipping bodies - * that are "-". - */ + for (j=0 ; j<=info.nsubs ; j++) { + if (indexVarObj != NULL) { + Tcl_Obj *rangeObjAry[2]; - for (bodyIdx = i+1; ; bodyIdx += 2) { - if (bodyIdx >= switchObjc) { - Tcl_AppendStringsToObj(Tcl_GetObjResult(interp), - "no body specified for pattern \"", pattern, - "\"", (char *) NULL); - code = TCL_ERROR; - goto done; - } - - if (splitObjs) { - code = Tcl_ListObjIndex(interp, switchObjv[0], bodyIdx, - &bodyObj); - if (code != TCL_OK) { - return code; + if (info.matches[j].end > 0) { + rangeObjAry[0] = Tcl_NewLongObj(info.matches[j].start); + rangeObjAry[1] = Tcl_NewLongObj(info.matches[j].end-1); + } else { + rangeObjAry[0] = rangeObjAry[1] = Tcl_NewIntObj(-1); } - } else { - bodyObj = switchObjv[bodyIdx]; + + /* + * Never fails; the object is always clean at this point. + */ + + Tcl_ListObjAppendElement(NULL, indicesObj, + Tcl_NewListObj(2, rangeObjAry)); + } + + if (matchVarObj != NULL) { + Tcl_Obj *substringObj; + + substringObj = Tcl_GetRange(stringObj, + info.matches[j].start, info.matches[j].end-1); + + /* + * Never fails; the object is always clean at this point. + */ + + Tcl_ListObjAppendElement(NULL, matchesObj, substringObj); + } + } + + if (indexVarObj != NULL) { + if (Tcl_ObjSetVar2(interp, indexVarObj, NULL, indicesObj, + TCL_LEAVE_ERR_MSG) == NULL) { + /* + * Careful! Check to see if we have allocated the list of + * matched strings; if so (but there was an error assigning + * the indices list) we have a potential memory leak because + * the match list has not been written to a variable. Except + * that we'll clean that up right now. + */ + + if (matchesObj != NULL) { + Tcl_DecrRefCount(matchesObj); + } + return TCL_ERROR; + } + } + if (matchVarObj != NULL) { + if (Tcl_ObjSetVar2(interp, matchVarObj, NULL, matchesObj, + TCL_LEAVE_ERR_MSG) == NULL) { + /* + * Unlike above, if indicesObj is non-NULL at this point, it + * will have been written to a variable already and will hence + * not be leaked. + */ + + return TCL_ERROR; } + } + } + + /* + * We've got a match. Find a body to execute, skipping bodies that are + * "-". + */ + + matchFound: + ctxPtr = TclStackAlloc(interp, sizeof(CmdFrame)); + *ctxPtr = *iPtr->cmdFramePtr; + + if (splitObjs) { + /* + * We have to perform the GetSrc and other type dependent handling of + * the frame here because we are munging with the line numbers, + * something the other commands like if, etc. are not doing. Them are + * fine with simply passing the CmdFrame through and having the + * special handling done in 'info frame', or the bc compiler + */ + + if (ctxPtr->type == TCL_LOCATION_BC) { /* - * THIS FAILS IF AN OBJECT'S STRING REP HAS A NULL. + * Type BC => ctxPtr->data.eval.path is not used. + * ctxPtr->data.tebc.codePtr is used instead. */ - body = Tcl_GetStringFromObj(bodyObj, &length); - if ((length != 1) || (body[0] != '-')) { - break; + + TclGetSrcInfoForPc(ctxPtr); + pc = 1; + + /* + * The line information in the cmdFrame is now a copy we do not + * own. + */ + } + + if (ctxPtr->type == TCL_LOCATION_SOURCE && ctxPtr->line[bidx] >= 0) { + int bline = ctxPtr->line[bidx]; + + ctxPtr->line = ckalloc(objc * sizeof(int)); + ctxPtr->nline = objc; + TclListLines(blist, bline, objc, ctxPtr->line, objv); + } else { + /* + * This is either a dynamic code word, when all elements are + * relative to themselves, or something else less expected and + * where we have no information. The result is the same in both + * cases; tell the code to come that it doesn't know where it is, + * which triggers reversion to the old behavior. + */ + + int k; + + ctxPtr->line = ckalloc(objc * sizeof(int)); + ctxPtr->nline = objc; + for (k=0; k < objc; k++) { + ctxPtr->line[k] = -1; } } - code = Tcl_EvalObj(interp, bodyObj); - if (code == TCL_ERROR) { - char msg[100]; - sprintf(msg, "\n (\"%.50s\" arm line %d)", pattern, - interp->errorLine); - Tcl_AddObjErrorInfo(interp, msg, -1); + } + + for (j = i + 1; ; j += 2) { + if (j >= objc) { + /* + * This shouldn't happen since we've checked that the last body is + * not a continuation... + */ + + Tcl_Panic("fall-out when searching for body to match pattern"); + } + if (strcmp(TclGetString(objv[j]), "-") != 0) { + break; } - goto done; } /* - * Nothing matched: return nothing. + * TIP #280: Make invoking context available to switch branch. */ - code = TCL_OK; + Tcl_NRAddCallback(interp, SwitchPostProc, INT2PTR(splitObjs), ctxPtr, + INT2PTR(pc), (ClientData) pattern); + return TclNREvalObjEx(interp, objv[j], 0, ctxPtr, splitObjs ? j : bidx+j); +} - done: - return code; -#undef EXACT -#undef GLOB -#undef REGEXP +static int +SwitchPostProc( + ClientData data[], /* Data passed from Tcl_NRAddCallback above */ + Tcl_Interp *interp, /* Tcl interpreter */ + int result) /* Result to return*/ +{ + /* Unpack the preserved data */ + + int splitObjs = PTR2INT(data[0]); + CmdFrame *ctxPtr = data[1]; + int pc = PTR2INT(data[2]); + const char *pattern = data[3]; + int patternLength = strlen(pattern); + + /* + * Clean up TIP 280 context information + */ + + if (splitObjs) { + ckfree(ctxPtr->line); + if (pc && (ctxPtr->type == TCL_LOCATION_SOURCE)) { + /* + * Death of SrcInfo reference. + */ + + Tcl_DecrRefCount(ctxPtr->data.eval.path); + } + } + + /* + * Generate an error message if necessary. + */ + + if (result == TCL_ERROR) { + int limit = 50; + int overflow = (patternLength > limit); + + Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf( + "\n (\"%.*s%s\" arm line %d)", + (overflow ? limit : patternLength), pattern, + (overflow ? "..." : ""), Tcl_GetErrorLine(interp))); + } + TclStackFree(interp, ctxPtr); + return result; +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_ThrowObjCmd -- + * + * This procedure is invoked to process the "throw" Tcl command. See the + * user documentation for details on what it does. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + + /* ARGSUSED */ +int +Tcl_ThrowObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + Tcl_Obj *options; + int len; + + if (objc != 3) { + Tcl_WrongNumArgs(interp, 1, objv, "type message"); + return TCL_ERROR; + } + + /* + * The type must be a list of at least length 1. + */ + + if (Tcl_ListObjLength(interp, objv[1], &len) != TCL_OK) { + return TCL_ERROR; + } else if (len < 1) { + Tcl_SetObjResult(interp, Tcl_NewStringObj( + "type must be non-empty list", -1)); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "THROW", "BADEXCEPTION", + NULL); + return TCL_ERROR; + } + + /* + * Now prepare the result options dictionary. We use the list API as it is + * slightly more convenient. + */ + + TclNewLiteralStringObj(options, "-code error -level 0 -errorcode"); + Tcl_ListObjAppendElement(NULL, options, objv[1]); + + /* + * We're ready to go. Fire things into the low-level result machinery. + */ + + Tcl_SetObjResult(interp, objv[2]); + return Tcl_SetReturnOptions(interp, options); } /* @@ -1758,7 +4045,7 @@ Tcl_SwitchObjCmd(dummy, interp, objc, objv) * Tcl_TimeObjCmd -- * * This object-based procedure is invoked to process the "time" Tcl - * command. See the user documentation for details on what it does. + * command. See the user documentation for details on what it does. * * Results: * A standard Tcl object result. @@ -1769,25 +4056,28 @@ Tcl_SwitchObjCmd(dummy, interp, objc, objv) *---------------------------------------------------------------------- */ - /* ARGSUSED */ int -Tcl_TimeObjCmd(dummy, interp, objc, objv) - ClientData dummy; /* Not used. */ - Tcl_Interp *interp; /* Current interpreter. */ - int objc; /* Number of arguments. */ - Tcl_Obj *CONST objv[]; /* Argument objects. */ +Tcl_TimeObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ { register Tcl_Obj *objPtr; + Tcl_Obj *objs[4]; register int i, result; int count; double totalMicroSec; +#ifndef TCL_WIDE_CLICKS Tcl_Time start, stop; - char buf[100]; +#else + Tcl_WideInt start, stop; +#endif if (objc == 2) { count = 1; } else if (objc == 3) { - result = Tcl_GetIntFromObj(interp, objv[2], &count); + result = TclGetIntFromObj(interp, objv[2], &count); if (result != TCL_OK) { return result; } @@ -1795,37 +4085,62 @@ Tcl_TimeObjCmd(dummy, interp, objc, objv) Tcl_WrongNumArgs(interp, 1, objv, "command ?count?"); return TCL_ERROR; } - + objPtr = objv[1]; i = count; - TclpGetTime(&start); +#ifndef TCL_WIDE_CLICKS + Tcl_GetTime(&start); +#else + start = TclpGetWideClicks(); +#endif while (i-- > 0) { - result = Tcl_EvalObj(interp, objPtr); + result = Tcl_EvalObjEx(interp, objPtr, 0); if (result != TCL_OK) { return result; } } - TclpGetTime(&stop); - - totalMicroSec = - (stop.sec - start.sec)*1000000 + (stop.usec - start.usec); - sprintf(buf, "%.0f microseconds per iteration", - ((count <= 0) ? 0 : totalMicroSec/count)); - Tcl_ResetResult(interp); - Tcl_AppendToObj(Tcl_GetObjResult(interp), buf, -1); +#ifndef TCL_WIDE_CLICKS + Tcl_GetTime(&stop); + totalMicroSec = ((double) (stop.sec - start.sec)) * 1.0e6 + + (stop.usec - start.usec); +#else + stop = TclpGetWideClicks(); + totalMicroSec = ((double) TclpWideClicksToNanoseconds(stop - start))/1.0e3; +#endif + + if (count <= 1) { + /* + * Use int obj since we know time is not fractional. [Bug 1202178] + */ + + objs[0] = Tcl_NewIntObj((count <= 0) ? 0 : (int) totalMicroSec); + } else { + objs[0] = Tcl_NewDoubleObj(totalMicroSec/count); + } + + /* + * Construct the result as a list because many programs have always parsed + * as such (extracting the first element, typically). + */ + + TclNewLiteralStringObj(objs[1], "microseconds"); + TclNewLiteralStringObj(objs[2], "per"); + TclNewLiteralStringObj(objs[3], "iteration"); + Tcl_SetObjResult(interp, Tcl_NewListObj(4, objs)); + return TCL_OK; } /* *---------------------------------------------------------------------- * - * Tcl_TraceCmd -- + * Tcl_TryObjCmd, TclNRTryObjCmd -- * - * This procedure is invoked to process the "trace" Tcl command. - * See the user documentation for details on what it does. + * This procedure is invoked to process the "try" Tcl command. See the + * user documentation (or TIP #329) for details on what it does. * * Results: - * A standard Tcl result. + * A standard Tcl object result. * * Side effects: * See the user documentation. @@ -1833,288 +4148,557 @@ Tcl_TimeObjCmd(dummy, interp, objc, objv) *---------------------------------------------------------------------- */ - /* ARGSUSED */ int -Tcl_TraceCmd(dummy, interp, argc, argv) - ClientData dummy; /* Not used. */ - Tcl_Interp *interp; /* Current interpreter. */ - int argc; /* Number of arguments. */ - char **argv; /* Argument strings. */ +Tcl_TryObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ { - int c; - size_t length; + return Tcl_NRCallObjProc(interp, TclNRTryObjCmd, dummy, objc, objv); +} - if (argc < 2) { - Tcl_AppendResult(interp, "too few args: should be \"", - argv[0], " option [arg arg ...]\"", (char *) NULL); +int +TclNRTryObjCmd( + ClientData clientData, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + Tcl_Obj *bodyObj, *handlersObj, *finallyObj = NULL; + int i, bodyShared, haveHandlers, dummy, code; + static const char *const handlerNames[] = { + "finally", "on", "trap", NULL + }; + enum Handlers { + TryFinally, TryOn, TryTrap + }; + + /* + * Parse the arguments. The handlers are passed to subsequent callbacks as + * a Tcl_Obj list of the 5-tuples like (type, returnCode, errorCodePrefix, + * bindVariables, script), and the finally script is just passed as it is. + */ + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 1, objv, + "body ?handler ...? ?finally script?"); return TCL_ERROR; } - c = argv[1][1]; - length = strlen(argv[1]); - if ((c == 'a') && (strncmp(argv[1], "variable", length) == 0) - && (length >= 2)) { - char *p; - int flags, length; - TraceVarInfo *tvarPtr; - - if (argc != 5) { - Tcl_AppendResult(interp, "wrong # args: should be \"", - argv[0], " variable name ops command\"", (char *) NULL); + bodyObj = objv[1]; + handlersObj = Tcl_NewObj(); + bodyShared = 0; + haveHandlers = 0; + for (i=2 ; i<objc ; i++) { + int type; + Tcl_Obj *info[5]; + + if (Tcl_GetIndexFromObj(interp, objv[i], handlerNames, "handler type", + 0, &type) != TCL_OK) { + Tcl_DecrRefCount(handlersObj); return TCL_ERROR; } + switch ((enum Handlers) type) { + case TryFinally: /* finally script */ + if (i < objc-2) { + Tcl_SetObjResult(interp, Tcl_NewStringObj( + "finally clause must be last", -1)); + Tcl_DecrRefCount(handlersObj); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "TRY", "FINALLY", + "NONTERMINAL", NULL); + return TCL_ERROR; + } else if (i == objc-1) { + Tcl_SetObjResult(interp, Tcl_NewStringObj( + "wrong # args to finally clause: must be" + " \"... finally script\"", -1)); + Tcl_DecrRefCount(handlersObj); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "TRY", "FINALLY", + "ARGUMENT", NULL); + return TCL_ERROR; + } + finallyObj = objv[++i]; + break; - flags = 0; - for (p = argv[3] ; *p != 0; p++) { - if (*p == 'r') { - flags |= TCL_TRACE_READS; - } else if (*p == 'w') { - flags |= TCL_TRACE_WRITES; - } else if (*p == 'u') { - flags |= TCL_TRACE_UNSETS; - } else { - goto badOps; + case TryOn: /* on code variableList script */ + if (i > objc-4) { + Tcl_SetObjResult(interp, Tcl_NewStringObj( + "wrong # args to on clause: must be \"... on code" + " variableList script\"", -1)); + Tcl_DecrRefCount(handlersObj); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "TRY", "ON", + "ARGUMENT", NULL); + return TCL_ERROR; } - } - if (flags == 0) { - goto badOps; - } + if (TclGetCompletionCodeFromObj(interp, objv[i+1], + &code) != TCL_OK) { + Tcl_DecrRefCount(handlersObj); + return TCL_ERROR; + } + info[2] = NULL; + goto commonHandler; + + case TryTrap: /* trap pattern variableList script */ + if (i > objc-4) { + Tcl_SetObjResult(interp, Tcl_NewStringObj( + "wrong # args to trap clause: " + "must be \"... trap pattern variableList script\"", + -1)); + Tcl_DecrRefCount(handlersObj); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "TRY", "TRAP", + "ARGUMENT", NULL); + return TCL_ERROR; + } + code = 1; + if (Tcl_ListObjLength(NULL, objv[i+1], &dummy) != TCL_OK) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "bad prefix '%s': must be a list", + Tcl_GetString(objv[i+1]))); + Tcl_DecrRefCount(handlersObj); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "TRY", "TRAP", + "EXNFORMAT", NULL); + return TCL_ERROR; + } + info[2] = objv[i+1]; - length = strlen(argv[4]); - tvarPtr = (TraceVarInfo *) ckalloc((unsigned) - (sizeof(TraceVarInfo) - sizeof(tvarPtr->command) + length + 1)); - tvarPtr->flags = flags; - tvarPtr->errMsg = NULL; - tvarPtr->length = length; - flags |= TCL_TRACE_UNSETS; - strcpy(tvarPtr->command, argv[4]); - if (Tcl_TraceVar(interp, argv[2], flags, TraceVarProc, - (ClientData) tvarPtr) != TCL_OK) { - ckfree((char *) tvarPtr); - return TCL_ERROR; + commonHandler: + if (Tcl_ListObjLength(interp, objv[i+2], &dummy) != TCL_OK) { + Tcl_DecrRefCount(handlersObj); + return TCL_ERROR; + } + + info[0] = objv[i]; /* type */ + TclNewIntObj(info[1], code); /* returnCode */ + if (info[2] == NULL) { /* errorCodePrefix */ + TclNewObj(info[2]); + } + info[3] = objv[i+2]; /* bindVariables */ + info[4] = objv[i+3]; /* script */ + + bodyShared = !strcmp(TclGetString(objv[i+3]), "-"); + Tcl_ListObjAppendElement(NULL, handlersObj, + Tcl_NewListObj(5, info)); + haveHandlers = 1; + i += 3; + break; } - } else if ((c == 'd') && (strncmp(argv[1], "vdelete", length) - && (length >= 2)) == 0) { - char *p; - int flags, length; - TraceVarInfo *tvarPtr; - ClientData clientData; + } + if (bodyShared) { + Tcl_SetObjResult(interp, Tcl_NewStringObj( + "last non-finally clause must not have a body of \"-\"", -1)); + Tcl_DecrRefCount(handlersObj); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "TRY", "BADFALLTHROUGH", + NULL); + return TCL_ERROR; + } + if (!haveHandlers) { + Tcl_DecrRefCount(handlersObj); + handlersObj = NULL; + } - if (argc != 5) { - Tcl_AppendResult(interp, "wrong # args: should be \"", - argv[0], " vdelete name ops command\"", (char *) NULL); - return TCL_ERROR; + /* + * Execute the body. + */ + + Tcl_NRAddCallback(interp, TryPostBody, handlersObj, finallyObj, + (ClientData)objv, INT2PTR(objc)); + return TclNREvalObjEx(interp, bodyObj, 0, + ((Interp *) interp)->cmdFramePtr, 1); +} + +/* + *---------------------------------------------------------------------- + * + * During -- + * + * This helper function patches together the updates to the interpreter's + * return options that are needed when things fail during the processing + * of a handler or finally script for the [try] command. + * + * Returns: + * The new option dictionary. + * + *---------------------------------------------------------------------- + */ + +static inline Tcl_Obj * +During( + Tcl_Interp *interp, + int resultCode, /* The result code from the just-evaluated + * script. */ + Tcl_Obj *oldOptions, /* The old option dictionary. */ + Tcl_Obj *errorInfo) /* An object to append to the errorinfo and + * release, or NULL if nothing is to be added. + * Designed to be used with Tcl_ObjPrintf. */ +{ + Tcl_Obj *during, *options; + + if (errorInfo != NULL) { + Tcl_AppendObjToErrorInfo(interp, errorInfo); + } + options = Tcl_GetReturnOptions(interp, resultCode); + TclNewLiteralStringObj(during, "-during"); + Tcl_IncrRefCount(during); + Tcl_DictObjPut(interp, options, during, oldOptions); + Tcl_DecrRefCount(during); + Tcl_IncrRefCount(options); + Tcl_DecrRefCount(oldOptions); + return options; +} + +/* + *---------------------------------------------------------------------- + * + * TryPostBody -- + * + * Callback to handle the outcome of the execution of the body of a 'try' + * command. + * + *---------------------------------------------------------------------- + */ + +static int +TryPostBody( + ClientData data[], + Tcl_Interp *interp, + int result) +{ + Tcl_Obj *resultObj, *options, *handlersObj, *finallyObj, *cmdObj, **objv; + int i, dummy, code, objc; + int numHandlers = 0; + + handlersObj = data[0]; + finallyObj = data[1]; + objv = data[2]; + objc = PTR2INT(data[3]); + + cmdObj = objv[0]; + + /* + * Check for limits/rewinding, which override normal trapping behaviour. + */ + + if (((Interp*) interp)->execEnvPtr->rewind || Tcl_LimitExceeded(interp)) { + Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf( + "\n (\"%s\" body line %d)", TclGetString(cmdObj), + Tcl_GetErrorLine(interp))); + if (handlersObj != NULL) { + Tcl_DecrRefCount(handlersObj); } + return TCL_ERROR; + } + + /* + * Basic processing of the outcome of the script, including adding of + * errorinfo trace. + */ + + if (result == TCL_ERROR) { + Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf( + "\n (\"%s\" body line %d)", TclGetString(cmdObj), + Tcl_GetErrorLine(interp))); + } + resultObj = Tcl_GetObjResult(interp); + Tcl_IncrRefCount(resultObj); + options = Tcl_GetReturnOptions(interp, result); + Tcl_IncrRefCount(options); + Tcl_ResetResult(interp); + + /* + * Handle the results. + */ + + if (handlersObj != NULL) { + int found = 0; + Tcl_Obj **handlers, **info; + + Tcl_ListObjGetElements(NULL, handlersObj, &numHandlers, &handlers); + for (i=0 ; i<numHandlers ; i++) { + Tcl_Obj *handlerBodyObj; + + Tcl_ListObjGetElements(NULL, handlers[i], &dummy, &info); + if (!found) { + Tcl_GetIntFromObj(NULL, info[1], &code); + if (code != result) { + continue; + } + + /* + * When processing an error, we must also perform list-prefix + * matching of the errorcode list. However, if this was an + * 'on' handler, the list that we are matching against will be + * empty. + */ + + if (code == TCL_ERROR) { + Tcl_Obj *errorCodeName, *errcode, **bits1, **bits2; + int len1, len2, j; + + TclNewLiteralStringObj(errorCodeName, "-errorcode"); + Tcl_DictObjGet(NULL, options, errorCodeName, &errcode); + Tcl_DecrRefCount(errorCodeName); + Tcl_ListObjGetElements(NULL, info[2], &len1, &bits1); + if (Tcl_ListObjGetElements(NULL, errcode, &len2, + &bits2) != TCL_OK) { + continue; + } + if (len2 < len1) { + continue; + } + for (j=0 ; j<len1 ; j++) { + if (strcmp(TclGetString(bits1[j]), + TclGetString(bits2[j])) != 0) { + /* + * Really want 'continue outerloop;', but C does + * not give us that. + */ + + goto didNotMatch; + } + } + } + + found = 1; + } + + /* + * Now we need to scan forward over "-" bodies. Note that we've + * already checked that the last body is not a "-", so this search + * will terminate successfully. + */ + + if (!strcmp(TclGetString(info[4]), "-")) { + continue; + } + + /* + * Bind the variables. We already know this is a list of variable + * names, but it might be empty. + */ - flags = 0; - for (p = argv[3] ; *p != 0; p++) { - if (*p == 'r') { - flags |= TCL_TRACE_READS; - } else if (*p == 'w') { - flags |= TCL_TRACE_WRITES; - } else if (*p == 'u') { - flags |= TCL_TRACE_UNSETS; + Tcl_ResetResult(interp); + result = TCL_ERROR; + Tcl_ListObjLength(NULL, info[3], &dummy); + if (dummy > 0) { + Tcl_Obj *varName; + + Tcl_ListObjIndex(NULL, info[3], 0, &varName); + if (Tcl_ObjSetVar2(interp, varName, NULL, resultObj, + TCL_LEAVE_ERR_MSG) == NULL) { + Tcl_DecrRefCount(resultObj); + goto handlerFailed; + } + Tcl_DecrRefCount(resultObj); + if (dummy > 1) { + Tcl_ListObjIndex(NULL, info[3], 1, &varName); + if (Tcl_ObjSetVar2(interp, varName, NULL, options, + TCL_LEAVE_ERR_MSG) == NULL) { + goto handlerFailed; + } + } } else { - goto badOps; + /* + * Dispose of the result to prevent a memleak. [Bug 2910044] + */ + + Tcl_DecrRefCount(resultObj); } - } - if (flags == 0) { - goto badOps; + + /* + * Evaluate the handler body and process the outcome. Note that we + * need to keep the kind of handler for debugging purposes, and in + * any case anything we want from info[] must be extracted right + * now because the info[] array is about to become invalid. There + * is very little refcount handling here however, since we know + * that the objects that we still want to refer to now were input + * arguments to [try] and so are still on the Tcl value stack. + */ + + handlerBodyObj = info[4]; + Tcl_NRAddCallback(interp, TryPostHandler, objv, options, info[0], + INT2PTR((finallyObj == NULL) ? 0 : objc - 1)); + Tcl_DecrRefCount(handlersObj); + return TclNREvalObjEx(interp, handlerBodyObj, 0, + ((Interp *) interp)->cmdFramePtr, 4*i + 5); + + handlerFailed: + resultObj = Tcl_GetObjResult(interp); + Tcl_IncrRefCount(resultObj); + options = During(interp, result, options, NULL); + break; + + didNotMatch: + continue; } /* - * Search through all of our traces on this variable to - * see if there's one with the given command. If so, then - * delete the first one that matches. + * No handler matched; get rid of the list of handlers. */ - length = strlen(argv[4]); - clientData = 0; - while ((clientData = Tcl_VarTraceInfo(interp, argv[2], 0, - TraceVarProc, clientData)) != 0) { - tvarPtr = (TraceVarInfo *) clientData; - if ((tvarPtr->length == length) && (tvarPtr->flags == flags) - && (strncmp(argv[4], tvarPtr->command, - (size_t) length) == 0)) { - Tcl_UntraceVar(interp, argv[2], flags | TCL_TRACE_UNSETS, - TraceVarProc, clientData); - if (tvarPtr->errMsg != NULL) { - ckfree(tvarPtr->errMsg); - } - ckfree((char *) tvarPtr); - break; - } - } - } else if ((c == 'i') && (strncmp(argv[1], "vinfo", length) == 0) - && (length >= 2)) { - ClientData clientData; - char ops[4], *p; - char *prefix = "{"; + Tcl_DecrRefCount(handlersObj); + } - if (argc != 3) { - Tcl_AppendResult(interp, "wrong # args: should be \"", - argv[0], " vinfo name\"", (char *) NULL); - return TCL_ERROR; - } - clientData = 0; - while ((clientData = Tcl_VarTraceInfo(interp, argv[2], 0, - TraceVarProc, clientData)) != 0) { - TraceVarInfo *tvarPtr = (TraceVarInfo *) clientData; - p = ops; - if (tvarPtr->flags & TCL_TRACE_READS) { - *p = 'r'; - p++; - } - if (tvarPtr->flags & TCL_TRACE_WRITES) { - *p = 'w'; - p++; - } - if (tvarPtr->flags & TCL_TRACE_UNSETS) { - *p = 'u'; - p++; - } - *p = '\0'; - Tcl_AppendResult(interp, prefix, (char *) NULL); - Tcl_AppendElement(interp, ops); - Tcl_AppendElement(interp, tvarPtr->command); - Tcl_AppendResult(interp, "}", (char *) NULL); - prefix = " {"; - } - } else { - Tcl_AppendResult(interp, "bad option \"", argv[1], - "\": should be variable, vdelete, or vinfo", - (char *) NULL); - return TCL_ERROR; + /* + * Process the finally clause. + */ + + if (finallyObj != NULL) { + Tcl_NRAddCallback(interp, TryPostFinal, resultObj, options, cmdObj, + NULL); + return TclNREvalObjEx(interp, finallyObj, 0, + ((Interp *) interp)->cmdFramePtr, objc - 1); } - return TCL_OK; - badOps: - Tcl_AppendResult(interp, "bad operations \"", argv[3], - "\": should be one or more of rwu", (char *) NULL); - return TCL_ERROR; + /* + * Install the correct result/options into the interpreter and clean up + * any temporary storage. + */ + + result = Tcl_SetReturnOptions(interp, options); + Tcl_DecrRefCount(options); + Tcl_SetObjResult(interp, resultObj); + Tcl_DecrRefCount(resultObj); + return result; } /* *---------------------------------------------------------------------- * - * TraceVarProc -- - * - * This procedure is called to handle variable accesses that have - * been traced using the "trace" command. + * TryPostHandler -- * - * Results: - * Normally returns NULL. If the trace command returns an error, - * then this procedure returns an error string. - * - * Side effects: - * Depends on the command associated with the trace. + * Callback to handle the outcome of the execution of a handler of a + * 'try' command. * *---------------------------------------------------------------------- */ - /* ARGSUSED */ -static char * -TraceVarProc(clientData, interp, name1, name2, flags) - ClientData clientData; /* Information about the variable trace. */ - Tcl_Interp *interp; /* Interpreter containing variable. */ - char *name1; /* Name of variable or array. */ - char *name2; /* Name of element within array; NULL means - * scalar variable is being referenced. */ - int flags; /* OR-ed bits giving operation and other - * information. */ +static int +TryPostHandler( + ClientData data[], + Tcl_Interp *interp, + int result) { - Interp *iPtr = (Interp *) interp; - TraceVarInfo *tvarPtr = (TraceVarInfo *) clientData; - char *result; - int code; - Interp dummy; - Tcl_DString cmd; - Tcl_Obj *saveObjPtr, *oldObjResultPtr; + Tcl_Obj *resultObj, *cmdObj, *options, *handlerKindObj, **objv; + Tcl_Obj *finallyObj; + int finally; - result = NULL; - if (tvarPtr->errMsg != NULL) { - ckfree(tvarPtr->errMsg); - tvarPtr->errMsg = NULL; - } - if ((tvarPtr->flags & flags) && !(flags & TCL_INTERP_DESTROYED)) { + objv = data[0]; + options = data[1]; + handlerKindObj = data[2]; + finally = PTR2INT(data[3]); - /* - * Generate a command to execute by appending list elements - * for the two variable names and the operation. The five - * extra characters are for three space, the opcode character, - * and the terminating null. - */ + cmdObj = objv[0]; + finallyObj = finally ? objv[finally] : 0; - if (name2 == NULL) { - name2 = ""; - } - Tcl_DStringInit(&cmd); - Tcl_DStringAppend(&cmd, tvarPtr->command, tvarPtr->length); - Tcl_DStringAppendElement(&cmd, name1); - Tcl_DStringAppendElement(&cmd, name2); - if (flags & TCL_TRACE_READS) { - Tcl_DStringAppend(&cmd, " r", 2); - } else if (flags & TCL_TRACE_WRITES) { - Tcl_DStringAppend(&cmd, " w", 2); - } else if (flags & TCL_TRACE_UNSETS) { - Tcl_DStringAppend(&cmd, " u", 2); - } + /* + * Check for limits/rewinding, which override normal trapping behaviour. + */ - /* - * Execute the command. Be careful to save and restore both the - * string and object results from the interpreter used for - * the command. We discard any object result the command returns. - */ + if (((Interp*) interp)->execEnvPtr->rewind || Tcl_LimitExceeded(interp)) { + options = During(interp, result, options, Tcl_ObjPrintf( + "\n (\"%s ... %s\" handler line %d)", + TclGetString(cmdObj), TclGetString(handlerKindObj), + Tcl_GetErrorLine(interp))); + Tcl_DecrRefCount(options); + return TCL_ERROR; + } - dummy.objResultPtr = Tcl_NewObj(); - Tcl_IncrRefCount(dummy.objResultPtr); - if (interp->freeProc == 0) { - dummy.freeProc = (Tcl_FreeProc *) 0; - dummy.result = ""; - Tcl_SetResult((Tcl_Interp *) &dummy, interp->result, - TCL_VOLATILE); - } else { - dummy.freeProc = interp->freeProc; - dummy.result = interp->result; - interp->freeProc = (Tcl_FreeProc *) 0; - } - - saveObjPtr = Tcl_GetObjResult(interp); - Tcl_IncrRefCount(saveObjPtr); - - code = Tcl_Eval(interp, Tcl_DStringValue(&cmd)); - if (code != TCL_OK) { /* copy error msg to result */ - tvarPtr->errMsg = (char *) - ckalloc((unsigned) (strlen(interp->result) + 1)); - strcpy(tvarPtr->errMsg, interp->result); - result = tvarPtr->errMsg; - Tcl_ResetResult(interp); /* must clear error state. */ - } + /* + * The handler result completely substitutes for the result of the body. + */ - /* - * Restore the interpreter's string result. - */ - - Tcl_SetResult(interp, dummy.result, - (dummy.freeProc == 0) ? TCL_VOLATILE : dummy.freeProc); + resultObj = Tcl_GetObjResult(interp); + Tcl_IncrRefCount(resultObj); + if (result == TCL_ERROR) { + options = During(interp, result, options, Tcl_ObjPrintf( + "\n (\"%s ... %s\" handler line %d)", + TclGetString(cmdObj), TclGetString(handlerKindObj), + Tcl_GetErrorLine(interp))); + } else { + Tcl_DecrRefCount(options); + options = Tcl_GetReturnOptions(interp, result); + Tcl_IncrRefCount(options); + } - /* - * Restore the interpreter's object result from saveObjPtr. - */ + /* + * Process the finally clause if it is present. + */ - oldObjResultPtr = iPtr->objResultPtr; - iPtr->objResultPtr = saveObjPtr; /* was incremented above */ - Tcl_DecrRefCount(oldObjResultPtr); + if (finallyObj != NULL) { + Interp *iPtr = (Interp *) interp; - Tcl_DecrRefCount(dummy.objResultPtr); - dummy.objResultPtr = NULL; - Tcl_DStringFree(&cmd); + Tcl_NRAddCallback(interp, TryPostFinal, resultObj, options, cmdObj, + NULL); + + /* The 'finally' script is always the last argument word. */ + return TclNREvalObjEx(interp, finallyObj, 0, iPtr->cmdFramePtr, + finally); } - if (flags & TCL_TRACE_DESTROYED) { - result = NULL; - if (tvarPtr->errMsg != NULL) { - ckfree(tvarPtr->errMsg); + + /* + * Install the correct result/options into the interpreter and clean up + * any temporary storage. + */ + + result = Tcl_SetReturnOptions(interp, options); + Tcl_DecrRefCount(options); + Tcl_SetObjResult(interp, resultObj); + Tcl_DecrRefCount(resultObj); + return result; +} + +/* + *---------------------------------------------------------------------- + * + * TryPostFinal -- + * + * Callback to handle the outcome of the execution of the finally script + * of a 'try' command. + * + *---------------------------------------------------------------------- + */ + +static int +TryPostFinal( + ClientData data[], + Tcl_Interp *interp, + int result) +{ + Tcl_Obj *resultObj, *options, *cmdObj; + + resultObj = data[0]; + options = data[1]; + cmdObj = data[2]; + + /* + * If the result wasn't OK, we need to adjust the result options. + */ + + if (result != TCL_OK) { + Tcl_DecrRefCount(resultObj); + resultObj = NULL; + if (result == TCL_ERROR) { + options = During(interp, result, options, Tcl_ObjPrintf( + "\n (\"%s ... finally\" body line %d)", + TclGetString(cmdObj), Tcl_GetErrorLine(interp))); + } else { + Tcl_Obj *origOptions = options; + + options = Tcl_GetReturnOptions(interp, result); + Tcl_IncrRefCount(options); + Tcl_DecrRefCount(origOptions); } - ckfree((char *) tvarPtr); + } + + /* + * Install the correct result/options into the interpreter and clean up + * any temporary storage. + */ + + result = Tcl_SetReturnOptions(interp, options); + Tcl_DecrRefCount(options); + if (resultObj != NULL) { + Tcl_SetObjResult(interp, resultObj); + Tcl_DecrRefCount(resultObj); } return result; } @@ -2122,65 +4706,124 @@ TraceVarProc(clientData, interp, name1, name2, flags) /* *---------------------------------------------------------------------- * - * Tcl_WhileCmd -- + * Tcl_WhileObjCmd -- * - * This procedure is invoked to process the "while" Tcl command. - * See the user documentation for details on what it does. + * This procedure is invoked to process the "while" Tcl command. See the + * user documentation for details on what it does. * - * With the bytecode compiler, this procedure is only called when - * a command name is computed at runtime, and is "while" or the name - * to which "while" was renamed: e.g., "set z while; $z {$i<100} {}" + * With the bytecode compiler, this procedure is only called when a + * command name is computed at runtime, and is "while" or the name to + * which "while" was renamed: e.g., "set z while; $z {$i<100} {}" * * Results: - * A standard Tcl result. + * A standard Tcl result. * * Side effects: - * See the user documentation. + * See the user documentation. * *---------------------------------------------------------------------- */ - /* ARGSUSED */ int -Tcl_WhileCmd(dummy, interp, argc, argv) - ClientData dummy; /* Not used. */ - Tcl_Interp *interp; /* Current interpreter. */ - int argc; /* Number of arguments. */ - char **argv; /* Argument strings. */ +Tcl_WhileObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ { - int result, value; + return Tcl_NRCallObjProc(interp, TclNRWhileObjCmd, dummy, objc, objv); +} - if (argc != 3) { - Tcl_AppendResult(interp, "wrong # args: should be \"", - argv[0], " test command\"", (char *) NULL); - return TCL_ERROR; - } +int +TclNRWhileObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + ForIterData *iterPtr; - while (1) { - result = Tcl_ExprBoolean(interp, argv[1], &value); - if (result != TCL_OK) { - return result; - } - if (!value) { - break; - } - result = Tcl_Eval(interp, argv[2]); - if ((result != TCL_OK) && (result != TCL_CONTINUE)) { - if (result == TCL_ERROR) { - char msg[60]; - sprintf(msg, "\n (\"while\" body line %d)", - interp->errorLine); - Tcl_AddErrorInfo(interp, msg); - } - break; - } - } - if (result == TCL_BREAK) { - result = TCL_OK; - } - if (result == TCL_OK) { - Tcl_ResetResult(interp); + if (objc != 3) { + Tcl_WrongNumArgs(interp, 1, objv, "test command"); + return TCL_ERROR; } - return result; + + /* + * We reuse [for]'s callback, passing a NULL for the 'next' script. + */ + + TclSmallAllocEx(interp, sizeof(ForIterData), iterPtr); + iterPtr->cond = objv[1]; + iterPtr->body = objv[2]; + iterPtr->next = NULL; + iterPtr->msg = "\n (\"while\" body line %d)"; + iterPtr->word = 2; + + TclNRAddCallback(interp, TclNRForIterCallback, iterPtr, NULL, + NULL, NULL); + return TCL_OK; } + +/* + *---------------------------------------------------------------------- + * + * TclListLines -- + * + * ??? + * + * Results: + * Filled in array of line numbers? + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ +void +TclListLines( + Tcl_Obj *listObj, /* Pointer to obj holding a string with list + * structure. Assumed to be valid. Assumed to + * contain n elements. */ + int line, /* Line the list as a whole starts on. */ + int n, /* #elements in lines */ + int *lines, /* Array of line numbers, to fill. */ + Tcl_Obj *const *elems) /* The list elems as Tcl_Obj*, in need of + * derived continuation data */ +{ + const char *listStr = Tcl_GetString(listObj); + const char *listHead = listStr; + int i, length = strlen(listStr); + const char *element = NULL, *next = NULL; + ContLineLoc *clLocPtr = TclContinuationsGet(listObj); + int *clNext = (clLocPtr ? &clLocPtr->loc[0] : NULL); + + for (i = 0; i < n; i++) { + TclFindElement(NULL, listStr, length, &element, &next, NULL, NULL); + + TclAdvanceLines(&line, listStr, element); + /* Leading whitespace */ + TclAdvanceContinuations(&line, &clNext, element - listHead); + if (elems && clNext) { + TclContinuationsEnterDerived(elems[i], element-listHead, clNext); + } + lines[i] = line; + length -= (next - listStr); + TclAdvanceLines(&line, element, next); + /* Element */ + listStr = next; + + if (*element == 0) { + /* ASSERT i == n */ + break; + } + } +} + +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ |
