diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2007-11-22 16:39:57 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2007-11-22 16:39:57 (GMT) |
commit | 14a34c4087e1034699a9b588da8b0a9927479f45 (patch) | |
tree | bd55f87de299c03dc8081b5c4a78548493babbd1 | |
parent | 42acb79e42e426c61ca95ebfac491b77ac807f29 (diff) | |
download | tcl-14a34c4087e1034699a9b588da8b0a9927479f45.zip tcl-14a34c4087e1034699a9b588da8b0a9927479f45.tar.gz tcl-14a34c4087e1034699a9b588da8b0a9927479f45.tar.bz2 |
Rewrote the [string] and [dict] implementations to be ready for conversion
to ensembles.
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 3034 | ||||
-rw-r--r-- | generic/tclDictObj.c | 136 |
3 files changed, 2033 insertions, 1143 deletions
@@ -1,4 +1,8 @@ -2007-11-22 Donal K. Fellows <donal.k.fellows@man.ac.uk> +2007-11-22 Donal K. Fellows <donal.k.fellows@manchester.ac.uk> + + * generic/tclCmdMZ.c (Tcl_StringObjCmd): Rewrote the [string] and + * generic/tclDictObj.c (Tcl_DictObjCmd): [dict] implementations to be + ready for conversion to ensembles. * tests/string.test (string-12.22): Flag shimmering bug found in [string range]. diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 662b201..c421d28 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -15,7 +15,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.158 2007/11/19 11:13:10 dkf Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.159 2007/11/22 16:39:57 dkf Exp $ */ #include "tclInt.h" @@ -1093,16 +1093,11 @@ Tcl_SplitObjCmd( /* *---------------------------------------------------------------------- * - * Tcl_StringObjCmd -- - * - * This procedure is invoked to process the "string" Tcl command. See the - * user documentation for details on what it does. Note that this command - * only functions correctly on properly formed Tcl UTF strings. + * StringFirstCmd -- * - * Note that the primary methods here (equal, compare, match, ...) have - * bytecode equivalents. You will find the code for those in - * tclExecute.c. The code here will only be used in the non-bc case (like - * in an 'eval'). + * This procedure is invoked to process the "string first" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. * * Results: * A standard Tcl result. @@ -1113,1336 +1108,2217 @@ Tcl_SplitObjCmd( *---------------------------------------------------------------------- */ -int -Tcl_StringObjCmd( +static int +StringFirstCmd( ClientData dummy, /* Not used. */ Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ - Tcl_Obj *CONST objv[]) /* Argument objects. */ + Tcl_Obj *const objv[]) /* Argument objects. */ { - int index, left, right; - char *string1, *string2; - int length1, length2; - static CONST char *options[] = { - "bytelength", "compare", "equal", "first", - "index", "is", "last", "length", - "map", "match", "range", "repeat", - "replace", "reverse", "tolower", "toupper", - "totitle", "trim", "trimleft", "trimright", - "wordend", "wordstart", NULL - }; - enum options { - STR_BYTELENGTH, STR_COMPARE, STR_EQUAL, STR_FIRST, - STR_INDEX, STR_IS, STR_LAST, STR_LENGTH, - STR_MAP, STR_MATCH, STR_RANGE, STR_REPEAT, - STR_REPLACE, STR_REVERSE, STR_TOLOWER, STR_TOUPPER, - STR_TOTITLE, STR_TRIM, STR_TRIMLEFT, STR_TRIMRIGHT, - STR_WORDEND, STR_WORDSTART - }; - - if (objc < 2) { - Tcl_WrongNumArgs(interp, 1, objv, "option arg ?arg ...?"); - return TCL_ERROR; - } + Tcl_UniChar *ustring1, *ustring2; + int match, start, length1, length2; - if (Tcl_GetIndexFromObj(interp, objv[1], options, "option", 0, - &index) != TCL_OK) { + if (objc < 4 || objc > 5) { + Tcl_WrongNumArgs(interp, 2, objv, + "needleString haystackString ?startIndex?"); return TCL_ERROR; } - switch ((enum options) index) { - case STR_EQUAL: - case STR_COMPARE: { - /* - * Remember to keep code here in some sync with the byte-compiled - * versions in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and - * INST_STR_CMP as well as the expr string comparison in - * INST_EQ/INST_NEQ/INST_LT/...). - */ - - int i, match, length, nocase = 0, reqlength = -1; - typedef int (*strCmpFn_t)(const char *, const char *, unsigned int); - strCmpFn_t strCmpFn; + /* + * We are searching string2 for the sequence string1. + */ - if (objc < 4 || objc > 7) { - str_cmp_args: - Tcl_WrongNumArgs(interp, 2, objv, - "?-nocase? ?-length int? string1 string2"); - return TCL_ERROR; - } + match = -1; + start = 0; + length2 = -1; - for (i = 2; i < objc-2; i++) { - string2 = TclGetStringFromObj(objv[i], &length2); - if ((length2 > 1) - && strncmp(string2, "-nocase", (size_t)length2) == 0) { - nocase = 1; - } else if ((length2 > 1) - && strncmp(string2, "-length", (size_t)length2) == 0) { - if (i+1 >= objc-2) { - goto str_cmp_args; - } - ++i; - if (TclGetIntFromObj(interp, objv[i], - &reqlength) != TCL_OK) { - return TCL_ERROR; - } - } else { - Tcl_AppendResult(interp, "bad option \"", string2, - "\": must be -nocase or -length", NULL); - return TCL_ERROR; - } - } + ustring1 = Tcl_GetUnicodeFromObj(objv[2], &length1); + ustring2 = Tcl_GetUnicodeFromObj(objv[3], &length2); + if (objc == 5) { /* - * From now on, we only access the two objects at the end of the - * argument array. + * If a startIndex is specified, we will need to fast forward to that + * point in the string before we think about a match. */ - objv += objc-2; - - if ((reqlength == 0) || (objv[0] == objv[1])) { + if (TclGetIntForIndexM(interp, objv[4], length2-1, &start) != TCL_OK){ + return TCL_ERROR; + } + if (start >= length2) { + goto str_first_done; + } else if (start > 0) { + ustring2 += start; + length2 -= start; + } else if (start < 0) { /* - * Always match at 0 chars of if it is the same obj. + * Invalid start index mapped to string start; Bug #423581 */ - Tcl_SetObjResult(interp, - Tcl_NewBooleanObj((enum options) index == STR_EQUAL)); - break; - } else if (!nocase && objv[0]->typePtr == &tclByteArrayType && - objv[1]->typePtr == &tclByteArrayType) { - /* - * Use binary versions of comparisons since that won't cause undue - * type conversions and it is much faster. Only do this if we're - * case-sensitive (which is all that really makes sense with byte - * arrays anyway, and we have no memcasecmp() for some - * reason... :^) - */ + start = 0; + } + } - string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1); - string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2); - strCmpFn = (strCmpFn_t) memcmp; - } else if ((objv[0]->typePtr == &tclStringType) - && (objv[1]->typePtr == &tclStringType)) { - /* - * Do a unicode-specific comparison if both of the args are of - * String type. In benchmark testing this proved the most - * efficient check between the unicode and string comparison - * operations. - */ + if (length1 > 0) { + register Tcl_UniChar *p, *end; - string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1); - string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2); - strCmpFn = (strCmpFn_t) - (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); - } else { + end = ustring2 + length2 - length1 + 1; + for (p = ustring2; p < end; p++) { /* - * As a catch-all we will work with UTF-8. We cannot use memcmp() - * as that is unsafe with any string containing NULL (\xC0\x80 in - * Tcl's utf rep). We can use the more efficient TclpUtfNcmp2 if - * we are case-sensitive and no specific length was requested. + * Scan forward to find the first character. */ - string1 = (char *) TclGetStringFromObj(objv[0], &length1); - string2 = (char *) TclGetStringFromObj(objv[1], &length2); - if ((reqlength < 0) && !nocase) { - strCmpFn = (strCmpFn_t) TclpUtfNcmp2; - } else { - length1 = Tcl_NumUtfChars(string1, length1); - length2 = Tcl_NumUtfChars(string2, length2); - strCmpFn = (strCmpFn_t) - (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); + if ((*p == *ustring1) && (TclUniCharNcmp(ustring1, p, + (unsigned long) length1) == 0)) { + match = p - ustring2; + break; } } + } - if (((enum options) index == STR_EQUAL) - && (reqlength < 0) && (length1 != length2)) { - match = 1; /* This will be reversed below. */ - } else { - length = (length1 < length2) ? length1 : length2; - if (reqlength > 0 && reqlength < length) { - length = reqlength; - } else if (reqlength < 0) { - /* - * The requested length is negative, so we ignore it by - * setting it to length + 1 so we correct the match var. - */ + /* + * Compute the character index of the matching string by counting the + * number of characters before the match. + */ - reqlength = length + 1; - } + if ((match != -1) && (objc == 5)) { + match += start; + } - match = strCmpFn(string1, string2, (unsigned) length); - if ((match == 0) && (reqlength > length)) { - match = length1 - length2; - } - } + str_first_done: + Tcl_SetObjResult(interp, Tcl_NewIntObj(match)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringLastCmd -- + * + * This procedure is invoked to process the "string last" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ - if ((enum options) index == STR_EQUAL) { - Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1)); - } else { - Tcl_SetObjResult(interp, Tcl_NewIntObj( - (match > 0) ? 1 : (match < 0) ? -1 : 0)); - } - break; +static int +StringLastCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + Tcl_UniChar *ustring1, *ustring2, *p; + int match, start, length1, length2; + + if (objc < 4 || objc > 5) { + Tcl_WrongNumArgs(interp, 2, objv, + "needleString haystackString ?startIndex?"); + return TCL_ERROR; } - case STR_FIRST: { - Tcl_UniChar *ustring1, *ustring2; - int match, start; - if (objc < 4 || objc > 5) { - Tcl_WrongNumArgs(interp, 2, objv, - "needleString haystackString ?startIndex?"); - return TCL_ERROR; - } + /* + * We are searching string2 for the sequence string1. + */ + match = -1; + start = 0; + length2 = -1; + + ustring1 = Tcl_GetUnicodeFromObj(objv[2], &length1); + ustring2 = Tcl_GetUnicodeFromObj(objv[3], &length2); + + if (objc == 5) { /* - * We are searching string2 for the sequence string1. + * If a startIndex is specified, we will need to restrict the string + * range to that char index in the string */ - match = -1; - start = 0; - length2 = -1; - - ustring1 = Tcl_GetUnicodeFromObj(objv[2], &length1); - ustring2 = Tcl_GetUnicodeFromObj(objv[3], &length2); + if (TclGetIntForIndexM(interp, objv[4], length2-1, &start) != TCL_OK){ + return TCL_ERROR; + } + if (start < 0) { + goto str_last_done; + } else if (start < length2) { + p = ustring2 + start + 1 - length1; + } else { + p = ustring2 + length2 - length1; + } + } else { + p = ustring2 + length2 - length1; + } - if (objc == 5) { + if (length1 > 0) { + for (; p >= ustring2; p--) { /* - * If a startIndex is specified, we will need to fast forward to - * that point in the string before we think about a match. + * Scan backwards to find the first character. */ - if (TclGetIntForIndexM(interp, objv[4], length2 - 1, - &start) != TCL_OK) { - return TCL_ERROR; - } - if (start >= length2) { - goto str_first_done; - } else if (start > 0) { - ustring2 += start; - length2 -= start; - } else if (start < 0) { - /* - * Invalid start index mapped to string start; Bug #423581 - */ - - start = 0; + if ((*p == *ustring1) && !memcmp(ustring1, p, + sizeof(Tcl_UniChar) * (size_t)length1)) { + match = p - ustring2; + break; } } + } + + str_last_done: + Tcl_SetObjResult(interp, Tcl_NewIntObj(match)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringIndexCmd -- + * + * This procedure is invoked to process the "string index" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ - if (length1 > 0) { - register Tcl_UniChar *p, *end; +static int +StringIndexCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int length, index; - end = ustring2 + length2 - length1 + 1; - for (p = ustring2; p < end; p++) { - /* - * Scan forward to find the first character. - */ - if ((*p == *ustring1) && (TclUniCharNcmp(ustring1, p, - (unsigned long) length1) == 0)) { - match = p - ustring2; - break; - } - } - } + if (objc != 4) { + Tcl_WrongNumArgs(interp, 2, objv, "string charIndex"); + return TCL_ERROR; + } - /* - * Compute the character index of the matching string by counting the - * number of characters before the match. - */ + /* + * If we have a ByteArray object, avoid indexing in the Utf string since + * the byte array contains one byte per character. Otherwise, use the + * Unicode string rep to get the index'th char. + */ - if ((match != -1) && (objc == 5)) { - match += start; - } + if (objv[2]->typePtr == &tclByteArrayType) { + const unsigned char *string = + Tcl_GetByteArrayFromObj(objv[2], &length); - str_first_done: - Tcl_SetObjResult(interp, Tcl_NewIntObj(match)); - break; - } - case STR_INDEX: { - if (objc != 4) { - Tcl_WrongNumArgs(interp, 2, objv, "string charIndex"); + if (TclGetIntForIndexM(interp, objv[3], length-1, &index) != TCL_OK){ return TCL_ERROR; } - + if ((index >= 0) && (index < length)) { + Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(string + index, 1)); + } + } else { /* - * If we have a ByteArray object, avoid indexing in the Utf string - * since the byte array contains one byte per character. Otherwise, - * use the Unicode string rep to get the index'th char. + * Get Unicode char length to calulate what 'end' means. */ - if (objv[2]->typePtr == &tclByteArrayType) { - string1 = (char *) Tcl_GetByteArrayFromObj(objv[2], &length1); - - if (TclGetIntForIndexM(interp, objv[3], length1 - 1, - &index) != TCL_OK) { - return TCL_ERROR; - } - if ((index >= 0) && (index < length1)) { - Tcl_SetObjResult(interp, Tcl_NewByteArrayObj( - (unsigned char *)(&string1[index]), 1)); - } - } else { - /* - * Get Unicode char length to calulate what 'end' means. - */ - - length1 = Tcl_GetCharLength(objv[2]); + length = Tcl_GetCharLength(objv[2]); - if (TclGetIntForIndexM(interp, objv[3], length1 - 1, - &index) != TCL_OK) { - return TCL_ERROR; - } - if ((index >= 0) && (index < length1)) { - char buf[TCL_UTF_MAX]; - Tcl_UniChar ch; + if (TclGetIntForIndexM(interp, objv[3], length-1, &index) != TCL_OK){ + return TCL_ERROR; + } + if ((index >= 0) && (index < length)) { + char buf[TCL_UTF_MAX]; + Tcl_UniChar ch; - ch = Tcl_GetUniChar(objv[2], index); - length1 = Tcl_UniCharToUtf(ch, buf); - Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length1)); - } + ch = Tcl_GetUniChar(objv[2], index); + length = Tcl_UniCharToUtf(ch, buf); + Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length)); } - break; } - case STR_IS: { - char *end, *stop; - Tcl_UniChar ch; + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringIsCmd -- + * + * This procedure is invoked to process the "string is" Tcl command. See + * the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ - /* - * The UniChar comparison function - */ +static int +StringIsCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + const char *string1, *string2, *end, *stop; + Tcl_UniChar ch; + int (*chcomp)(int) = NULL; /* The UniChar comparison function. */ + int i, failat = 0, result = 1, strict = 0, index, length1, length2; + Tcl_Obj *objPtr, *failVarObj = NULL; + Tcl_WideInt w; + + static const char *isOptions[] = { + "alnum", "alpha", "ascii", "control", + "boolean", "digit", "double", "false", + "graph", "integer", "list", "lower", + "print", "punct", "space", "true", + "upper", "wideinteger", "wordchar", "xdigit", + NULL + }; + enum isOptions { + STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL, + STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, STR_IS_FALSE, + STR_IS_GRAPH, STR_IS_INT, STR_IS_LIST, STR_IS_LOWER, + STR_IS_PRINT, STR_IS_PUNCT, STR_IS_SPACE, STR_IS_TRUE, + STR_IS_UPPER, STR_IS_WIDE, STR_IS_WORD, STR_IS_XDIGIT + }; - int (*chcomp)(int) = NULL; - int i, failat = 0, result = 1, strict = 0; - Tcl_Obj *objPtr, *failVarObj = NULL; - Tcl_WideInt w; - - static CONST char *isOptions[] = { - "alnum", "alpha", "ascii", "control", - "boolean", "digit", "double", "false", - "graph", "integer", "list", "lower", - "print", "punct", "space", "true", - "upper", "wideinteger", "wordchar", "xdigit", - NULL - }; - enum isOptions { - STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL, - STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, STR_IS_FALSE, - STR_IS_GRAPH, STR_IS_INT, STR_IS_LIST, STR_IS_LOWER, - STR_IS_PRINT, STR_IS_PUNCT, STR_IS_SPACE, STR_IS_TRUE, - STR_IS_UPPER, STR_IS_WIDE, STR_IS_WORD, STR_IS_XDIGIT - }; + if (objc < 4 || objc > 7) { + Tcl_WrongNumArgs(interp, 2, objv, + "class ?-strict? ?-failindex var? str"); + return TCL_ERROR; + } + if (Tcl_GetIndexFromObj(interp, objv[2], isOptions, "class", 0, + &index) != TCL_OK) { + return TCL_ERROR; + } - if (objc < 4 || objc > 7) { - Tcl_WrongNumArgs(interp, 2, objv, - "class ?-strict? ?-failindex var? str"); - return TCL_ERROR; - } - if (Tcl_GetIndexFromObj(interp, objv[2], isOptions, "class", 0, - &index) != TCL_OK) { - return TCL_ERROR; - } - if (objc != 4) { - for (i = 3; i < objc-1; i++) { - string2 = TclGetStringFromObj(objv[i], &length2); - if ((length2 > 1) && - strncmp(string2, "-strict", (size_t) length2) == 0) { - strict = 1; - } else if ((length2 > 1) && - strncmp(string2, "-failindex", (size_t)length2) == 0){ - if (i+1 >= objc-1) { - Tcl_WrongNumArgs(interp, 3, objv, - "?-strict? ?-failindex var? str"); - return TCL_ERROR; - } - failVarObj = objv[++i]; - } else { - Tcl_AppendResult(interp, "bad option \"", string2, - "\": must be -strict or -failindex", NULL); + if (objc != 4) { + for (i = 3; i < objc-1; i++) { + string2 = TclGetStringFromObj(objv[i], &length2); + if ((length2 > 1) && + strncmp(string2, "-strict", (size_t) length2) == 0) { + strict = 1; + } else if ((length2 > 1) && + strncmp(string2, "-failindex", (size_t)length2) == 0){ + if (i+1 >= objc-1) { + Tcl_WrongNumArgs(interp, 3, objv, + "?-strict? ?-failindex var? str"); return TCL_ERROR; } + failVarObj = objv[++i]; + } else { + Tcl_AppendResult(interp, "bad option \"", string2, + "\": must be -strict or -failindex", NULL); + return TCL_ERROR; } } + } - /* - * We get the objPtr so that we can short-cut for some classes by - * checking the object type (int and double), but we need the string - * otherwise, because we don't want any conversion of type occuring - * (as, for example, Tcl_Get*FromObj would do - */ + /* + * We get the objPtr so that we can short-cut for some classes by checking + * the object type (int and double), but we need the string otherwise, + * because we don't want any conversion of type occuring (as, for example, + * Tcl_Get*FromObj would do). + */ - objPtr = objv[objc-1]; - string1 = TclGetStringFromObj(objPtr, &length1); - if (length1 == 0 && index != STR_IS_LIST) { - if (strict) { - result = 0; - } - goto str_is_done; + objPtr = objv[objc-1]; + string1 = TclGetStringFromObj(objPtr, &length1); + if (length1 == 0 && index != STR_IS_LIST) { + if (strict) { + result = 0; } - end = string1 + length1; + goto str_is_done; + } + end = string1 + length1; - /* - * When entering here, result == 1 and failat == 0 - */ + /* + * When entering here, result == 1 and failat == 0. + */ - switch ((enum isOptions) index) { - case STR_IS_ALNUM: - chcomp = Tcl_UniCharIsAlnum; - break; - case STR_IS_ALPHA: - chcomp = Tcl_UniCharIsAlpha; - break; - case STR_IS_ASCII: - chcomp = UniCharIsAscii; - break; - case STR_IS_BOOL: - case STR_IS_TRUE: - case STR_IS_FALSE: - if (TCL_OK != Tcl_ConvertToType(NULL, objPtr, &tclBooleanType)) { - result = 0; - } else if ((((enum isOptions) index == STR_IS_TRUE) && - objPtr->internalRep.longValue == 0) || - (((enum isOptions) index == STR_IS_FALSE) && - objPtr->internalRep.longValue != 0)) { - result = 0; - } - break; - case STR_IS_CONTROL: - chcomp = Tcl_UniCharIsControl; - break; - case STR_IS_DIGIT: - chcomp = Tcl_UniCharIsDigit; - break; - case STR_IS_DOUBLE: { - /* TODO */ - if ((objPtr->typePtr == &tclDoubleType) || - (objPtr->typePtr == &tclIntType) || + switch ((enum isOptions) index) { + case STR_IS_ALNUM: + chcomp = Tcl_UniCharIsAlnum; + break; + case STR_IS_ALPHA: + chcomp = Tcl_UniCharIsAlpha; + break; + case STR_IS_ASCII: + chcomp = UniCharIsAscii; + break; + case STR_IS_BOOL: + case STR_IS_TRUE: + case STR_IS_FALSE: + if (TCL_OK != Tcl_ConvertToType(NULL, objPtr, &tclBooleanType)) { + result = 0; + } else if ((((enum isOptions) index == STR_IS_TRUE) && + objPtr->internalRep.longValue == 0) + || (((enum isOptions) index == STR_IS_FALSE) && + objPtr->internalRep.longValue != 0)) { + result = 0; + } + break; + case STR_IS_CONTROL: + chcomp = Tcl_UniCharIsControl; + break; + case STR_IS_DIGIT: + chcomp = Tcl_UniCharIsDigit; + break; + case STR_IS_DOUBLE: { + /* TODO */ + if ((objPtr->typePtr == &tclDoubleType) || + (objPtr->typePtr == &tclIntType) || #ifndef NO_WIDE_TYPE - (objPtr->typePtr == &tclWideIntType) || + (objPtr->typePtr == &tclWideIntType) || #endif - (objPtr->typePtr == &tclBignumType)) { - break; - } - if (TclParseNumber(NULL, objPtr, NULL, NULL, -1, - (const char **) &stop, 0) != TCL_OK) { + (objPtr->typePtr == &tclBignumType)) { + break; + } + if (TclParseNumber(NULL, objPtr, NULL, NULL, -1, + (const char **) &stop, 0) != TCL_OK) { + result = 0; + failat = 0; + } else { + failat = stop - string1; + if (stop < end) { result = 0; - failat = 0; - } else { - failat = stop - string1; - if (stop < end) { - result = 0; - TclFreeIntRep(objPtr); - objPtr->typePtr = NULL; - } + TclFreeIntRep(objPtr); + objPtr->typePtr = NULL; } + } + break; + } + case STR_IS_GRAPH: + chcomp = Tcl_UniCharIsGraph; + break; + case STR_IS_INT: + case STR_IS_WIDE: + if ((((enum isOptions) index) == STR_IS_INT) + && (TCL_OK == TclGetIntFromObj(NULL, objPtr, &i))) { break; } - case STR_IS_GRAPH: - chcomp = Tcl_UniCharIsGraph; + if ((((enum isOptions) index) == STR_IS_WIDE) + && (TCL_OK == Tcl_GetWideIntFromObj(NULL, objPtr, &w))) { break; - case STR_IS_INT: - case STR_IS_WIDE: - if ((((enum isOptions) index) == STR_IS_INT) - && (TCL_OK == TclGetIntFromObj(NULL, objPtr, &i))) { - break; - } - if ((((enum isOptions) index) == STR_IS_WIDE) - && (TCL_OK == Tcl_GetWideIntFromObj(NULL, objPtr, &w))) { - break; - } + } - result = 0; + result = 0; + + if (failVarObj == NULL) { + /* + * Don't bother computing the failure point if we're not going to + * return it. + */ - if (failVarObj == NULL) { + break; + } + if (TclParseNumber(NULL, objPtr, NULL, NULL, -1, + (const char **) &stop, TCL_PARSE_INTEGER_ONLY) == TCL_OK) { + if (stop == end) { /* - * Don't bother computing the failure point if we're not - * going to return it. + * Entire string parses as an integer, but rejected by + * Tcl_Get(Wide)IntFromObj() so we must have overflowed the + * target type, and our convention is to return failure at + * index -1 in that situation. */ - break; - } - if (TclParseNumber(NULL, objPtr, NULL, NULL, -1, - (const char **) &stop, TCL_PARSE_INTEGER_ONLY) == TCL_OK) { - if (stop == end) { - /* - * Entire string parses as an integer, but rejected by - * Tcl_Get(Wide)IntFromObj() so we must have overflowed - * the target type, and our convention is to return - * failure at index -1 in that situation. - */ - failat = -1; - } else { - /* - * Some prefix parsed as an integer, but not the whole - * string, so return failure index as the point where - * parsing stopped. Clear out the internal rep, since - * keeping it would leave *objPtr in an inconsistent - * state. - */ - failat = stop - string1; - TclFreeIntRep(objPtr); - objPtr->typePtr = NULL; - } + + failat = -1; } else { - /* No prefix is a valid integer. Fail at beginning. */ - failat = 0; + /* + * Some prefix parsed as an integer, but not the whole string, + * so return failure index as the point where parsing stopped. + * Clear out the internal rep, since keeping it would leave + * *objPtr in an inconsistent state. + */ + + failat = stop - string1; + TclFreeIntRep(objPtr); + objPtr->typePtr = NULL; } - break; - case STR_IS_LIST: + } else { /* - * We ignore the strictness here, since empty strings are always - * well-formed lists. + * No prefix is a valid integer. Fail at beginning. */ - if (TCL_OK == TclListObjLength(NULL, objPtr, &length2)) { - break; - } + failat = 0; + } + break; + case STR_IS_LIST: + /* + * We ignore the strictness here, since empty strings are always + * well-formed lists. + */ - if (failVarObj != NULL) { - /* - * Need to figure out where the list parsing failed, which is - * fairly expensive. This is adapted from the core of - * SetListFromAny(). - */ + if (TCL_OK == TclListObjLength(NULL, objPtr, &length2)) { + break; + } + + if (failVarObj != NULL) { + /* + * Need to figure out where the list parsing failed, which is + * fairly expensive. This is adapted from the core of + * SetListFromAny(). + */ - const char *elemStart, *nextElem, *limit; - int lenRemain, elemSize, hasBrace; - register const char *p; + const char *elemStart, *nextElem, *limit; + int lenRemain, elemSize, hasBrace; + register const char *p; - limit = string1 + length1; - failat = -1; - for (p=string1, lenRemain=length1; lenRemain > 0; - p = nextElem, lenRemain = (limit-nextElem)) { - if (TCL_ERROR == TclFindElement(NULL, p, lenRemain, - &elemStart, &nextElem, &elemSize, &hasBrace)) { - /* - * This is the simplest way of getting the number of - * characters parsed. Note that this is not the same - * as the number of bytes when parsing strings with - * non-ASCII characters in them. - */ + limit = string1 + length1; + failat = -1; + for (p=string1, lenRemain=length1; lenRemain > 0; + p = nextElem, lenRemain = (limit-nextElem)) { + if (TCL_ERROR == TclFindElement(NULL, p, lenRemain, + &elemStart, &nextElem, &elemSize, &hasBrace)) { + /* + * This is the simplest way of getting the number of + * characters parsed. Note that this is not the same as + * the number of bytes when parsing strings with non-ASCII + * characters in them. + */ - Tcl_Obj *tmpStr; + Tcl_Obj *tmpStr; - /* - * Skip leading spaces first. This is only really an - * issue if it is the first "element" that has the - * failure. - */ + /* + * Skip leading spaces first. This is only really an issue + * if it is the first "element" that has the failure. + */ - while (isspace(UCHAR(*p))) { /* INTL: ? */ - p++; - } - tmpStr = Tcl_NewStringObj(string1, p-string1); - failat = Tcl_GetCharLength(tmpStr); - TclDecrRefCount(tmpStr); - break; + while (isspace(UCHAR(*p))) { /* INTL: ? */ + p++; } - } - } - result = 0; - break; - case STR_IS_LOWER: - chcomp = Tcl_UniCharIsLower; - break; - case STR_IS_PRINT: - chcomp = Tcl_UniCharIsPrint; - break; - case STR_IS_PUNCT: - chcomp = Tcl_UniCharIsPunct; - break; - case STR_IS_SPACE: - chcomp = Tcl_UniCharIsSpace; - break; - case STR_IS_UPPER: - chcomp = Tcl_UniCharIsUpper; - break; - case STR_IS_WORD: - chcomp = Tcl_UniCharIsWordChar; - break; - case STR_IS_XDIGIT: - for (; string1 < end; string1++, failat++) { - /* INTL: We assume unicode is bad for this class. */ - if ((*((unsigned char *)string1) >= 0xC0) || - !isxdigit(*(unsigned char *)string1)) { - result = 0; + tmpStr = Tcl_NewStringObj(string1, p-string1); + failat = Tcl_GetCharLength(tmpStr); + TclDecrRefCount(tmpStr); break; } } - break; } - if (chcomp != NULL) { - for (; string1 < end; string1 += length2, failat++) { - length2 = TclUtfToUniChar(string1, &ch); - if (!chcomp(ch)) { - result = 0; - break; - } + result = 0; + break; + case STR_IS_LOWER: + chcomp = Tcl_UniCharIsLower; + break; + case STR_IS_PRINT: + chcomp = Tcl_UniCharIsPrint; + break; + case STR_IS_PUNCT: + chcomp = Tcl_UniCharIsPunct; + break; + case STR_IS_SPACE: + chcomp = Tcl_UniCharIsSpace; + break; + case STR_IS_UPPER: + chcomp = Tcl_UniCharIsUpper; + break; + case STR_IS_WORD: + chcomp = Tcl_UniCharIsWordChar; + break; + case STR_IS_XDIGIT: + for (; string1 < end; string1++, failat++) { + /* INTL: We assume unicode is bad for this class. */ + if ((*((unsigned char *)string1) >= 0xC0) || + !isxdigit(*(unsigned char *)string1)) { + result = 0; + break; + } + } + break; + } + if (chcomp != NULL) { + for (; string1 < end; string1 += length2, failat++) { + length2 = TclUtfToUniChar(string1, &ch); + if (!chcomp(ch)) { + result = 0; + break; } } + } - /* - * Only set the failVarObj when we will return 0 and we have indicated - * a valid fail index (>= 0). - */ + /* + * Only set the failVarObj when we will return 0 and we have indicated a + * valid fail index (>= 0). + */ - str_is_done: - if ((result == 0) && (failVarObj != NULL) && - Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(failat), - TCL_LEAVE_ERR_MSG) == NULL) { - return TCL_ERROR; - } - Tcl_SetObjResult(interp, Tcl_NewBooleanObj(result)); - break; + str_is_done: + if ((result == 0) && (failVarObj != NULL) && + Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(failat), + TCL_LEAVE_ERR_MSG) == NULL) { + return TCL_ERROR; + } + Tcl_SetObjResult(interp, Tcl_NewBooleanObj(result)); + return TCL_OK; +} + +static int +UniCharIsAscii( + int character) +{ + return (character >= 0) && (character < 0x80); +} + +/* + *---------------------------------------------------------------------- + * + * StringMapCmd -- + * + * This procedure is invoked to process the "string map" Tcl command. See + * the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringMapCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int length1, length2, mapElemc, index; + int nocase = 0, mapWithDict = 0, copySource = 0; + Tcl_Obj **mapElemv, *sourceObj, *resultPtr; + Tcl_UniChar *ustring1, *ustring2, *p, *end; + int (*strCmpFn)(const Tcl_UniChar*, const Tcl_UniChar*, unsigned long); + + if (objc < 4 || objc > 5) { + Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? charMap string"); + return TCL_ERROR; } - case STR_LAST: { - Tcl_UniChar *ustring1, *ustring2, *p; - int match, start; - if (objc < 4 || objc > 5) { - Tcl_WrongNumArgs(interp, 2, objv, - "needleString haystackString ?startIndex?"); + if (objc == 5) { + const char *string = TclGetStringFromObj(objv[2], &length2); + + if ((length2 > 1) && + strncmp(string, "-nocase", (size_t) length2) == 0) { + nocase = 1; + } else { + Tcl_AppendResult(interp, "bad option \"", string, + "\": must be -nocase", NULL); return TCL_ERROR; } + } - /* - * We are searching string2 for the sequence string1. - */ + /* + * This test is tricky, but has to be that way or you get other strange + * inconsistencies (see test string-10.20 for illustration why!) + */ - match = -1; - start = 0; - length2 = -1; + if (objv[objc-2]->typePtr == &tclDictType && objv[objc-2]->bytes == NULL){ + int i, done; + Tcl_DictSearch search; - ustring1 = Tcl_GetUnicodeFromObj(objv[2], &length1); - ustring2 = Tcl_GetUnicodeFromObj(objv[3], &length2); + /* + * We know the type exactly, so all dict operations will succeed for + * sure. This shortens this code quite a bit. + */ - if (objc == 5) { + Tcl_DictObjSize(interp, objv[objc-2], &mapElemc); + if (mapElemc == 0) { /* - * If a startIndex is specified, we will need to restrict the - * string range to that char index in the string + * Empty charMap, just return whatever string was given. */ - if (TclGetIntForIndexM(interp, objv[4], length2 - 1, - &start) != TCL_OK) { - return TCL_ERROR; - } - if (start < 0) { - goto str_last_done; - } else if (start < length2) { - p = ustring2 + start + 1 - length1; - } else { - p = ustring2 + length2 - length1; - } - } else { - p = ustring2 + length2 - length1; + Tcl_SetObjResult(interp, objv[objc-1]); + return TCL_OK; } - if (length1 > 0) { - for (; p >= ustring2; p--) { - /* - * Scan backwards to find the first character. - */ + mapElemc *= 2; + mapWithDict = 1; - if ((*p == *ustring1) && !memcmp(ustring1, p, - sizeof(Tcl_UniChar) * (size_t)length1)) { - match = p - ustring2; - break; - } - } - } + /* + * Copy the dictionary out into an array; that's the easiest way to + * adapt this code... + */ - str_last_done: - Tcl_SetObjResult(interp, Tcl_NewIntObj(match)); - break; - } - case STR_BYTELENGTH: - case STR_LENGTH: - if (objc != 3) { - Tcl_WrongNumArgs(interp, 2, objv, "string"); + mapElemv = (Tcl_Obj **) + TclStackAlloc(interp, sizeof(Tcl_Obj *) * mapElemc); + Tcl_DictObjFirst(interp, objv[objc-2], &search, mapElemv+0, + mapElemv+1, &done); + for (i=2 ; i<mapElemc ; i+=2) { + Tcl_DictObjNext(&search, mapElemv+i, mapElemv+i+1, &done); + } + Tcl_DictObjDone(&search); + } else { + if (TclListObjGetElements(interp, objv[objc-2], &mapElemc, + &mapElemv) != TCL_OK) { return TCL_ERROR; } + if (mapElemc == 0) { + /* + * empty charMap, just return whatever string was given. + */ - if ((enum options) index == STR_BYTELENGTH) { - (void) TclGetStringFromObj(objv[2], &length1); - } else { + Tcl_SetObjResult(interp, objv[objc-1]); + return TCL_OK; + } else if (mapElemc & 1) { /* - * If we have a ByteArray object, avoid recomputing the string - * since the byte array contains one byte per character. - * Otherwise, use the Unicode string rep to calculate the length. + * The charMap must be an even number of key/value items. */ - if (objv[2]->typePtr == &tclByteArrayType) { - (void) Tcl_GetByteArrayFromObj(objv[2], &length1); - } else { - length1 = Tcl_GetCharLength(objv[2]); - } - } - Tcl_SetObjResult(interp, Tcl_NewIntObj(length1)); - break; - case STR_MAP: { - int mapElemc, nocase = 0, mapWithDict = 0, copySource = 0; - Tcl_Obj **mapElemv, *sourceObj, *resultPtr; - Tcl_UniChar *ustring1, *ustring2, *p, *end; - int (*strCmpFn)(CONST Tcl_UniChar*, CONST Tcl_UniChar*, unsigned long); - - if (objc < 4 || objc > 5) { - Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? charMap string"); + Tcl_SetObjResult(interp, + Tcl_NewStringObj("char map list unbalanced", -1)); return TCL_ERROR; } + } - if (objc == 5) { - string2 = TclGetStringFromObj(objv[2], &length2); - if ((length2 > 1) && - strncmp(string2, "-nocase", (size_t) length2) == 0) { - nocase = 1; - } else { - Tcl_AppendResult(interp, "bad option \"", string2, - "\": must be -nocase", NULL); - return TCL_ERROR; - } - } + /* + * Take a copy of the source string object if it is the same as the map + * string to cut out nasty sharing crashes. [Bug 1018562] + */ + if (objv[objc-2] == objv[objc-1]) { + sourceObj = Tcl_DuplicateObj(objv[objc-1]); + copySource = 1; + } else { + sourceObj = objv[objc-1]; + } + ustring1 = Tcl_GetUnicodeFromObj(sourceObj, &length1); + if (length1 == 0) { /* - * This test is tricky, but has to be that way or you get other - * strange inconsistencies (see test string-10.20 for illustration - * why!) + * Empty input string, just stop now. */ - if (objv[objc-2]->typePtr == &tclDictType && - objv[objc-2]->bytes == NULL) { - int i, done; - Tcl_DictSearch search; + goto done; + } + end = ustring1 + length1; - /* - * We know the type exactly, so all dict operations will succeed - * for sure. This shortens this code quite a bit. - */ + strCmpFn = (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); - Tcl_DictObjSize(interp, objv[objc-2], &mapElemc); - if (mapElemc == 0) { - /* - * Empty charMap, just return whatever string was given. - */ + /* + * Force result to be Unicode + */ - Tcl_SetObjResult(interp, objv[objc-1]); - return TCL_OK; - } + resultPtr = Tcl_NewUnicodeObj(ustring1, 0); + + if (mapElemc == 2) { + /* + * Special case for one map pair which avoids the extra for loop and + * extra calls to get Unicode data. The algorithm is otherwise + * identical to the multi-pair case. This will be >30% faster on + * larger strings. + */ - mapElemc *= 2; - mapWithDict = 1; + int mapLen; + Tcl_UniChar *mapString, u2lc; + ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2); + p = ustring1; + if ((length2 > length1) || (length2 == 0)) { /* - * Copy the dictionary out into an array; that's the easiest way - * to adapt this code... + * Match string is either longer than input or empty. */ - mapElemv = (Tcl_Obj **) - TclStackAlloc(interp, sizeof(Tcl_Obj *) * mapElemc); - Tcl_DictObjFirst(interp, objv[objc-2], &search, mapElemv+0, - mapElemv+1, &done); - for (i=2 ; i<mapElemc ; i+=2) { - Tcl_DictObjNext(&search, mapElemv+i, mapElemv+i+1, &done); - } - Tcl_DictObjDone(&search); + ustring1 = end; } else { - if (TclListObjGetElements(interp, objv[objc-2], - &mapElemc, &mapElemv) != TCL_OK) { - return TCL_ERROR; + mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen); + u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0); + for (; ustring1 < end; ustring1++) { + if (((*ustring1 == *ustring2) || + (nocase&&Tcl_UniCharToLower(*ustring1)==u2lc)) && + (length2==1 || strCmpFn(ustring1, ustring2, + (unsigned long) length2) == 0)) { + if (p != ustring1) { + Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p); + p = ustring1 + length2; + } else { + p += length2; + } + ustring1 = p - 1; + + Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen); + } } - if (mapElemc == 0) { - /* - * empty charMap, just return whatever string was given. - */ + } + } else { + Tcl_UniChar **mapStrings, *u2lc = NULL; + int *mapLens; - Tcl_SetObjResult(interp, objv[objc-1]); - return TCL_OK; - } else if (mapElemc & 1) { + /* + * Precompute pointers to the unicode string and length. This saves us + * repeated function calls later, significantly speeding up the + * algorithm. We only need the lowercase first char in the nocase + * case. + */ + + mapStrings = (Tcl_UniChar **) TclStackAlloc(interp, + mapElemc * 2 * sizeof(Tcl_UniChar *)); + mapLens = (int *) TclStackAlloc(interp, mapElemc * 2 * sizeof(int)); + if (nocase) { + u2lc = (Tcl_UniChar *) TclStackAlloc(interp, + mapElemc * sizeof(Tcl_UniChar)); + } + for (index = 0; index < mapElemc; index++) { + mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index], + mapLens+index); + if (nocase && ((index % 2) == 0)) { + u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]); + } + } + for (p = ustring1; ustring1 < end; ustring1++) { + for (index = 0; index < mapElemc; index += 2) { /* - * The charMap must be an even number of key/value items. + * Get the key string to match on. */ - Tcl_SetObjResult(interp, Tcl_NewStringObj( - "char map list unbalanced", -1)); - return TCL_ERROR; + ustring2 = mapStrings[index]; + length2 = mapLens[index]; + if ((length2 > 0) && ((*ustring1 == *ustring2) || (nocase && + (Tcl_UniCharToLower(*ustring1) == u2lc[index/2]))) && + /* Restrict max compare length. */ + (end-ustring1 >= length2) && ((length2 == 1) || + !strCmpFn(ustring2, ustring1, (unsigned) length2))) { + if (p != ustring1) { + /* + * Put the skipped chars onto the result first. + */ + + Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p); + p = ustring1 + length2; + } else { + p += length2; + } + + /* + * Adjust len to be full length of matched string. + */ + + ustring1 = p - 1; + + /* + * Append the map value to the unicode string. + */ + + Tcl_AppendUnicodeToObj(resultPtr, + mapStrings[index+1], mapLens[index+1]); + break; + } } } - + if (nocase) { + TclStackFree(interp, u2lc); + } + TclStackFree(interp, mapLens); + TclStackFree(interp, mapStrings); + } + if (p != ustring1) { /* - * Take a copy of the source string object if it is the same as the - * map string to cut out nasty sharing crashes. [Bug 1018562] + * Put the rest of the unmapped chars onto result. */ - if (objv[objc-2] == objv[objc-1]) { - sourceObj = Tcl_DuplicateObj(objv[objc-1]); - copySource = 1; + Tcl_AppendUnicodeToObj(resultPtr, p, ustring1 - p); + } + Tcl_SetObjResult(interp, resultPtr); + done: + if (mapWithDict) { + TclStackFree(interp, mapElemv); + } + if (copySource) { + Tcl_DecrRefCount(sourceObj); + } + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringMatchCmd -- + * + * This procedure is invoked to process the "string match" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringMatchCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + Tcl_UniChar *ustring1, *ustring2; + int length1, length2, nocase = 0; + + if (objc < 4 || objc > 5) { + Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? pattern string"); + return TCL_ERROR; + } + + if (objc == 5) { + const char *string = TclGetStringFromObj(objv[2], &length2); + + if ((length2 > 1) && + strncmp(string, "-nocase", (size_t) length2) == 0) { + nocase = 1; } else { - sourceObj = objv[objc-1]; + Tcl_AppendResult(interp, "bad option \"", string, + "\": must be -nocase", NULL); + return TCL_ERROR; } - ustring1 = Tcl_GetUnicodeFromObj(sourceObj, &length1); - if (length1 == 0) { - /* - * Empty input string, just stop now. - */ + } + ustring1 = Tcl_GetUnicodeFromObj(objv[objc-1], &length1); + ustring2 = Tcl_GetUnicodeFromObj(objv[objc-2], &length2); + Tcl_SetObjResult(interp, Tcl_NewBooleanObj( + TclUniCharMatch(ustring1, length1, ustring2, length2, nocase))); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringRangeCmd -- + * + * This procedure is invoked to process the "string range" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ - if (mapWithDict) { - TclStackFree(interp, mapElemv); - } - if (copySource) { - Tcl_DecrRefCount(sourceObj); - } - break; - } - end = ustring1 + length1; +static int +StringRangeCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + const unsigned char *string; + int length, first, last; - strCmpFn = (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); + if (objc != 5) { + Tcl_WrongNumArgs(interp, 2, objv, "string first last"); + return TCL_ERROR; + } + + /* + * If we have a ByteArray object, avoid indexing in the Utf string since + * the byte array contains one byte per character. Otherwise, use the + * Unicode string rep to get the range. + */ + if (objv[2]->typePtr == &tclByteArrayType) { + string = Tcl_GetByteArrayFromObj(objv[2], &length); + length--; + } else { /* - * Force result to be Unicode + * Get the length in actual characters. */ - resultPtr = Tcl_NewUnicodeObj(ustring1, 0); - if (mapElemc == 2) { + string = NULL; + length = Tcl_GetCharLength(objv[2]) - 1; + } + + if (TclGetIntForIndexM(interp, objv[3], length, &first) != TCL_OK || + TclGetIntForIndexM(interp, objv[4], length, &last) != TCL_OK) { + return TCL_ERROR; + } + + if (first < 0) { + first = 0; + } + if (last >= length) { + last = length; + } + if (last >= first) { + if (string != NULL) { /* - * Special case for one map pair which avoids the extra for loop - * and extra calls to get Unicode data. The algorithm is otherwise - * identical to the multi-pair case. This will be >30% faster on - * larger strings. + * Reread the string to prevent shimmering nasties. */ - int mapLen; - Tcl_UniChar *mapString, u2lc; + string = Tcl_GetByteArrayFromObj(objv[2], &length); + Tcl_SetObjResult(interp, + Tcl_NewByteArrayObj(string+first, last - first + 1)); + } else { + Tcl_SetObjResult(interp, Tcl_GetRange(objv[2], first, last)); + } + } + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringReptCmd -- + * + * This procedure is invoked to process the "string repeat" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ - ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2); - p = ustring1; - if ((length2 > length1) || (length2 == 0)) { - /* - * Match string is either longer than input or empty. - */ +static int +StringReptCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + const char *string1; + char *string2; + int count, index, length1, length2; + Tcl_Obj *resultPtr; - ustring1 = end; - } else { - mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen); - u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0); - for (; ustring1 < end; ustring1++) { - if (((*ustring1 == *ustring2) || - (nocase&&Tcl_UniCharToLower(*ustring1)==u2lc)) && - (length2==1 || strCmpFn(ustring1, ustring2, - (unsigned long) length2) == 0)) { - if (p != ustring1) { - Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p); - p = ustring1 + length2; - } else { - p += length2; - } - ustring1 = p - 1; - - Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen); - } - } - } - } else { - Tcl_UniChar **mapStrings, *u2lc = NULL; - int *mapLens; + if (objc != 4) { + Tcl_WrongNumArgs(interp, 2, objv, "string count"); + return TCL_ERROR; + } - /* - * Precompute pointers to the unicode string and length. This - * saves us repeated function calls later, significantly speeding - * up the algorithm. We only need the lowercase first char in the - * nocase case. - */ + if (TclGetIntFromObj(interp, objv[3], &count) != TCL_OK) { + return TCL_ERROR; + } - mapStrings = (Tcl_UniChar **) TclStackAlloc(interp, - (mapElemc * 2) * sizeof(Tcl_UniChar *)); - mapLens = (int *) TclStackAlloc(interp, - (mapElemc * 2) * sizeof(int)); - if (nocase) { - u2lc = (Tcl_UniChar *) TclStackAlloc(interp, - (mapElemc) * sizeof(Tcl_UniChar)); - } - for (index = 0; index < mapElemc; index++) { - mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index], - &(mapLens[index])); - if (nocase && ((index % 2) == 0)) { - u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]); - } - } - for (p = ustring1; ustring1 < end; ustring1++) { - for (index = 0; index < mapElemc; index += 2) { - /* - * Get the key string to match on. - */ + /* + * Check for cases that allow us to skip copying stuff. + */ - ustring2 = mapStrings[index]; - length2 = mapLens[index]; - if ((length2 > 0) && ((*ustring1 == *ustring2) || - (nocase && (Tcl_UniCharToLower(*ustring1) == - u2lc[index/2]))) && - /* Restrict max compare length. */ - ((end - ustring1) >= length2) && - ((length2 == 1) || strCmpFn(ustring2, ustring1, - (unsigned long) length2) == 0)) { - if (p != ustring1) { - /* - * Put the skipped chars onto the result first. - */ - - Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p); - p = ustring1 + length2; - } else { - p += length2; - } + if (count == 1) { + Tcl_SetObjResult(interp, objv[2]); + goto done; + } else if (count < 1) { + goto done; + } + string1 = TclGetStringFromObj(objv[2], &length1); + if (length1 <= 0) { + goto done; + } - /* - * Adjust len to be full length of matched string. - */ + /* + * Only build up a string that has data. Instead of building it up with + * repeated appends, we just allocate the necessary space once and copy + * the string value in. Check for overflow with back-division. [Bug + * #714106] + */ - ustring1 = p - 1; + length2 = length1 * count + 1; + if ((length2-1) / count != length1) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "string size overflow, must be less than %d", INT_MAX)); + return TCL_ERROR; + } - /* - * Append the map value to the unicode string. - */ + /* + * Include space for the NUL. + */ - Tcl_AppendUnicodeToObj(resultPtr, - mapStrings[index+1], mapLens[index+1]); - break; - } - } - } - if (nocase) { - TclStackFree(interp, u2lc); - } - TclStackFree(interp, mapLens); - TclStackFree(interp, mapStrings); - } - if (p != ustring1) { - /* - * Put the rest of the unmapped chars onto result. - */ + string2 = attemptckalloc((size_t) length2); + if (string2 == NULL) { + /* + * Alloc failed. Note that in this case we try to do an error message + * since this is a case that's most likely when the alloc is large and + * that's easy to do with this API. Note that if we fail allocating a + * short string, this will likely keel over too (and fatally). + */ + + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "string size overflow, out of memory allocating %d bytes", + length2)); + return TCL_ERROR; + } + for (index = 0; index < count; index++) { + memcpy(string2 + (length1 * index), string1, (size_t) length1); + } + string2[length2-1] = '\0'; + + /* + * We have to directly assign this instead of using Tcl_SetStringObj (and + * indirectly TclInitStringRep) because that makes another copy of the + * data. + */ + + TclNewObj(resultPtr); + resultPtr->bytes = string2; + resultPtr->length = length2-1; + Tcl_SetObjResult(interp, resultPtr); + + done: + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringRplcCmd -- + * + * This procedure is invoked to process the "string replace" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringRplcCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + Tcl_UniChar *ustring; + int first, last, length; + + if (objc < 5 || objc > 6) { + Tcl_WrongNumArgs(interp, 2, objv, "string first last ?string?"); + return TCL_ERROR; + } + + ustring = Tcl_GetUnicodeFromObj(objv[2], &length); + length--; + + if (TclGetIntForIndexM(interp, objv[3], length, &first) != TCL_OK || + TclGetIntForIndexM(interp, objv[4], length, &last) != TCL_OK){ + return TCL_ERROR; + } + + if ((last < first) || (last < 0) || (first > length)) { + Tcl_SetObjResult(interp, objv[2]); + } else { + Tcl_Obj *resultPtr; - Tcl_AppendUnicodeToObj(resultPtr, p, ustring1 - p); + if (first < 0) { + first = 0; } - if (mapWithDict) { - TclStackFree(interp, mapElemv); + + resultPtr = Tcl_NewUnicodeObj(ustring, first); + if (objc == 6) { + Tcl_AppendObjToObj(resultPtr, objv[5]); } - if (copySource) { - Tcl_DecrRefCount(sourceObj); + if (last < length) { + Tcl_AppendUnicodeToObj(resultPtr, ustring + last + 1, + length - last); } Tcl_SetObjResult(interp, resultPtr); - break; } - case STR_MATCH: { - Tcl_UniChar *ustring1, *ustring2; - int nocase = 0; + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringRevCmd -- + * + * This procedure is invoked to process the "string reverse" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ - if (objc < 4 || objc > 5) { - Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? pattern string"); - return TCL_ERROR; +static int +StringRevCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "string"); + return TCL_ERROR; + } + + Tcl_SetObjResult(interp, TclStringObjReverse(objv[2])); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringStartCmd -- + * + * This procedure is invoked to process the "string wordstart" Tcl + * command. See the user documentation for details on what it does. Note + * that this command only functions correctly on properly formed Tcl UTF + * strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringStartCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + Tcl_UniChar ch; + const char *p, *string; + int cur, index, length, numChars; + + if (objc != 4) { + Tcl_WrongNumArgs(interp, 2, objv, "string index"); + return TCL_ERROR; + } + + string = TclGetStringFromObj(objv[2], &length); + numChars = Tcl_NumUtfChars(string, length); + if (TclGetIntForIndexM(interp, objv[3], numChars-1, &index) != TCL_OK) { + return TCL_ERROR; + } + if (index >= numChars) { + index = numChars - 1; + } + cur = 0; + if (index > 0) { + p = Tcl_UtfAtIndex(string, index); + for (cur = index; cur >= 0; cur--) { + TclUtfToUniChar(p, &ch); + if (!Tcl_UniCharIsWordChar(ch)) { + break; + } + p = Tcl_UtfPrev(p, string); } + if (cur != index) { + cur += 1; + } + } + Tcl_SetObjResult(interp, Tcl_NewIntObj(cur)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringEndCmd -- + * + * This procedure is invoked to process the "string wordend" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ - if (objc == 5) { - string2 = TclGetStringFromObj(objv[2], &length2); - if ((length2 > 1) && - strncmp(string2, "-nocase", (size_t) length2) == 0) { - nocase = 1; - } else { - Tcl_AppendResult(interp, "bad option \"", string2, - "\": must be -nocase", NULL); - return TCL_ERROR; +static int +StringEndCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + Tcl_UniChar ch; + const char *p, *end, *string; + int cur, index, length, numChars; + + if (objc != 4) { + Tcl_WrongNumArgs(interp, 2, objv, "string index"); + return TCL_ERROR; + } + + string = TclGetStringFromObj(objv[2], &length); + numChars = Tcl_NumUtfChars(string, length); + if (TclGetIntForIndexM(interp, objv[3], numChars-1, &index) != TCL_OK) { + return TCL_ERROR; + } + if (index < 0) { + index = 0; + } + if (index < numChars) { + p = Tcl_UtfAtIndex(string, index); + end = string+length; + for (cur = index; p < end; cur++) { + p += TclUtfToUniChar(p, &ch); + if (!Tcl_UniCharIsWordChar(ch)) { + break; } } - ustring1 = Tcl_GetUnicodeFromObj(objv[objc-1], &length1); - ustring2 = Tcl_GetUnicodeFromObj(objv[objc-2], &length2); - Tcl_SetObjResult(interp, Tcl_NewBooleanObj(TclUniCharMatch( - ustring1, length1, ustring2, length2, nocase))); - break; + if (cur == index) { + cur++; + } + } else { + cur = numChars; + } + Tcl_SetObjResult(interp, Tcl_NewIntObj(cur)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringEqualCmd -- + * + * This procedure is invoked to process the "string equal" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringEqualCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + /* + * Remember to keep code here in some sync with the byte-compiled versions + * in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as + * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...). + */ + + char *string1, *string2; + int length1, length2, i, match, length, nocase = 0, reqlength = -1; + typedef int (*strCmpFn_t)(const char *, const char *, unsigned int); + strCmpFn_t strCmpFn; + + if (objc < 4 || objc > 7) { + str_cmp_args: + Tcl_WrongNumArgs(interp, 2, objv, + "?-nocase? ?-length int? string1 string2"); + return TCL_ERROR; } - case STR_RANGE: { - int first, last; - if (objc != 5) { - Tcl_WrongNumArgs(interp, 2, objv, "string first last"); + for (i = 2; i < objc-2; i++) { + string2 = TclGetStringFromObj(objv[i], &length2); + if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) { + nocase = 1; + } else if ((length2 > 1) + && !strncmp(string2, "-length", (size_t)length2)) { + if (i+1 >= objc-2) { + goto str_cmp_args; + } + ++i; + if (TclGetIntFromObj(interp, objv[i], &reqlength) != TCL_OK) { + return TCL_ERROR; + } + } else { + Tcl_AppendResult(interp, "bad option \"", string2, + "\": must be -nocase or -length", NULL); return TCL_ERROR; } + } + + /* + * From now on, we only access the two objects at the end of the argument + * array. + */ + + objv += objc-2; + + if ((reqlength == 0) || (objv[0] == objv[1])) { + /* + * Always match at 0 chars of if it is the same obj. + */ + + Tcl_SetObjResult(interp, Tcl_NewBooleanObj(1)); + return TCL_OK; + } + + if (!nocase && objv[0]->typePtr == &tclByteArrayType && + objv[1]->typePtr == &tclByteArrayType) { + /* + * Use binary versions of comparisons since that won't cause undue + * type conversions and it is much faster. Only do this if we're + * case-sensitive (which is all that really makes sense with byte + * arrays anyway, and we have no memcasecmp() for some reason... :^) + */ + string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1); + string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2); + strCmpFn = (strCmpFn_t) memcmp; + } else if ((objv[0]->typePtr == &tclStringType) + && (objv[1]->typePtr == &tclStringType)) { + /* + * Do a unicode-specific comparison if both of the args are of String + * type. In benchmark testing this proved the most efficient check + * between the unicode and string comparison operations. + */ + + string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1); + string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2); + strCmpFn = (strCmpFn_t) + (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); + } else { /* - * If we have a ByteArray object, avoid indexing in the Utf string - * since the byte array contains one byte per character. Otherwise, - * use the Unicode string rep to get the range. + * As a catch-all we will work with UTF-8. We cannot use memcmp() as + * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's + * utf rep). We can use the more efficient TclpUtfNcmp2 if we are + * case-sensitive and no specific length was requested. */ - if (objv[2]->typePtr == &tclByteArrayType) { - string1 = (char *) Tcl_GetByteArrayFromObj(objv[2], &length1); - length1--; + string1 = (char *) TclGetStringFromObj(objv[0], &length1); + string2 = (char *) TclGetStringFromObj(objv[1], &length2); + if ((reqlength < 0) && !nocase) { + strCmpFn = (strCmpFn_t) TclpUtfNcmp2; } else { + length1 = Tcl_NumUtfChars(string1, length1); + length2 = Tcl_NumUtfChars(string2, length2); + strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); + } + } + + if ((reqlength < 0) && (length1 != length2)) { + match = 1; /* This will be reversed below. */ + } else { + length = (length1 < length2) ? length1 : length2; + if (reqlength > 0 && reqlength < length) { + length = reqlength; + } else if (reqlength < 0) { /* - * Get the length in actual characters. + * The requested length is negative, so we ignore it by setting it + * to length + 1 so we correct the match var. */ - string1 = NULL; - length1 = Tcl_GetCharLength(objv[2]) - 1; + reqlength = length + 1; + } + + match = strCmpFn(string1, string2, (unsigned) length); + if ((match == 0) && (reqlength > length)) { + match = length1 - length2; } + } + + Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringCmpCmd -- + * + * This procedure is invoked to process the "string compare" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringCmpCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + /* + * Remember to keep code here in some sync with the byte-compiled versions + * in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as + * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...). + */ + + char *string1, *string2; + int length1, length2, i, match, length, nocase = 0, reqlength = -1; + typedef int (*strCmpFn_t)(const char *, const char *, unsigned int); + strCmpFn_t strCmpFn; + + if (objc < 4 || objc > 7) { + str_cmp_args: + Tcl_WrongNumArgs(interp, 2, objv, + "?-nocase? ?-length int? string1 string2"); + return TCL_ERROR; + } - if (TclGetIntForIndexM(interp, objv[3], length1, &first) != TCL_OK || - TclGetIntForIndexM(interp, objv[4], length1, &last) != TCL_OK) { + for (i = 2; i < objc-2; i++) { + string2 = TclGetStringFromObj(objv[i], &length2); + if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) { + nocase = 1; + } else if ((length2 > 1) + && !strncmp(string2, "-length", (size_t)length2)) { + if (i+1 >= objc-2) { + goto str_cmp_args; + } + ++i; + if (TclGetIntFromObj(interp, objv[i], &reqlength) != TCL_OK) { + return TCL_ERROR; + } + } else { + Tcl_AppendResult(interp, "bad option \"", string2, + "\": must be -nocase or -length", NULL); return TCL_ERROR; } + } + + /* + * From now on, we only access the two objects at the end of the argument + * array. + */ + + objv += objc-2; + + if ((reqlength == 0) || (objv[0] == objv[1])) { + /* + * Always match at 0 chars of if it is the same obj. + */ + + Tcl_SetObjResult(interp, Tcl_NewBooleanObj(0)); + return TCL_OK; + } + + if (!nocase && objv[0]->typePtr == &tclByteArrayType && + objv[1]->typePtr == &tclByteArrayType) { + /* + * Use binary versions of comparisons since that won't cause undue + * type conversions and it is much faster. Only do this if we're + * case-sensitive (which is all that really makes sense with byte + * arrays anyway, and we have no memcasecmp() for some reason... :^) + */ + + string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1); + string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2); + strCmpFn = (strCmpFn_t) memcmp; + } else if ((objv[0]->typePtr == &tclStringType) + && (objv[1]->typePtr == &tclStringType)) { + /* + * Do a unicode-specific comparison if both of the args are of String + * type. In benchmark testing this proved the most efficient check + * between the unicode and string comparison operations. + */ + + string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1); + string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2); + strCmpFn = (strCmpFn_t) + (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); + } else { + /* + * As a catch-all we will work with UTF-8. We cannot use memcmp() as + * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's + * utf rep). We can use the more efficient TclpUtfNcmp2 if we are + * case-sensitive and no specific length was requested. + */ + + string1 = (char *) TclGetStringFromObj(objv[0], &length1); + string2 = (char *) TclGetStringFromObj(objv[1], &length2); + if ((reqlength < 0) && !nocase) { + strCmpFn = (strCmpFn_t) TclpUtfNcmp2; + } else { + length1 = Tcl_NumUtfChars(string1, length1); + length2 = Tcl_NumUtfChars(string2, length2); + strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); + } + } + + length = (length1 < length2) ? length1 : length2; + if (reqlength > 0 && reqlength < length) { + length = reqlength; + } else if (reqlength < 0) { + /* + * The requested length is negative, so we ignore it by setting it to + * length + 1 so we correct the match var. + */ + + reqlength = length + 1; + } + + match = strCmpFn(string1, string2, (unsigned) length); + if ((match == 0) && (reqlength > length)) { + match = length1 - length2; + } + + Tcl_SetObjResult(interp, + Tcl_NewIntObj((match > 0) ? 1 : (match < 0) ? -1 : 0)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringBytesCmd -- + * + * This procedure is invoked to process the "string bytelength" Tcl + * command. See the user documentation for details on what it does. Note + * that this command only functions correctly on properly formed Tcl UTF + * strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringBytesCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int length; + + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "string"); + return TCL_ERROR; + } + + (void) TclGetStringFromObj(objv[2], &length); + Tcl_SetObjResult(interp, Tcl_NewIntObj(length)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringLenCmd -- + * + * This procedure is invoked to process the "string length" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringLenCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int length; + + if (objc != 3) { + Tcl_WrongNumArgs(interp, 2, objv, "string"); + return TCL_ERROR; + } + + /* + * If we have a ByteArray object, avoid recomputing the string since the + * byte array contains one byte per character. Otherwise, use the Unicode + * string rep to calculate the length. + */ + + if (objv[2]->typePtr == &tclByteArrayType) { + (void) Tcl_GetByteArrayFromObj(objv[2], &length); + } else { + length = Tcl_GetCharLength(objv[2]); + } + Tcl_SetObjResult(interp, Tcl_NewIntObj(length)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringLowerCmd -- + * + * This procedure is invoked to process the "string tolower" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ +static int +StringLowerCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int length1, length2; + char *string1, *string2; + + if (objc < 3 || objc > 5) { + Tcl_WrongNumArgs(interp, 2, objv, "string ?first? ?last?"); + return TCL_ERROR; + } + + string1 = TclGetStringFromObj(objv[2], &length1); + + if (objc == 3) { + Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); + + length1 = Tcl_UtfToLower(TclGetString(resultPtr)); + Tcl_SetObjLength(resultPtr, length1); + Tcl_SetObjResult(interp, resultPtr); + } else { + int first, last; + const char *start, *end; + Tcl_Obj *resultPtr; + + length1 = Tcl_NumUtfChars(string1, length1) - 1; + if (TclGetIntForIndexM(interp,objv[3],length1, &first) != TCL_OK) { + return TCL_ERROR; + } if (first < 0) { first = 0; } + last = first; + + if ((objc == 5) && (TclGetIntForIndexM(interp, objv[4], length1, + &last) != TCL_OK)) { + return TCL_ERROR; + } + if (last >= length1) { last = length1; } - if (last >= first) { - if (string1 != NULL) { - int numBytes = last - first + 1; - Tcl_SetObjResult(interp, Tcl_NewByteArrayObj( - (unsigned char *) &string1[first], numBytes)); - } else { - Tcl_SetObjResult(interp, - Tcl_GetRange(objv[2], first, last)); - } + if (last < first) { + Tcl_SetObjResult(interp, objv[2]); + return TCL_OK; } - break; + + start = Tcl_UtfAtIndex(string1, first); + end = Tcl_UtfAtIndex(start, last - first + 1); + resultPtr = Tcl_NewStringObj(string1, end - string1); + string2 = TclGetString(resultPtr) + (start - string1); + + length2 = Tcl_UtfToLower(string2); + Tcl_SetObjLength(resultPtr, length2 + (start - string1)); + + Tcl_AppendToObj(resultPtr, end, -1); + Tcl_SetObjResult(interp, resultPtr); + } + + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringUpperCmd -- + * + * This procedure is invoked to process the "string toupper" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +static int +StringUpperCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int length1, length2; + char *string1, *string2; + + if (objc < 3 || objc > 5) { + Tcl_WrongNumArgs(interp, 2, objv, "string ?first? ?last?"); + return TCL_ERROR; } - case STR_REPEAT: { - int count; - if (objc != 4) { - Tcl_WrongNumArgs(interp, 2, objv, "string count"); + string1 = TclGetStringFromObj(objv[2], &length1); + + if (objc == 3) { + Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); + + length1 = Tcl_UtfToUpper(TclGetString(resultPtr)); + Tcl_SetObjLength(resultPtr, length1); + Tcl_SetObjResult(interp, resultPtr); + } else { + int first, last; + const char *start, *end; + Tcl_Obj *resultPtr; + + length1 = Tcl_NumUtfChars(string1, length1) - 1; + if (TclGetIntForIndexM(interp,objv[3],length1, &first) != TCL_OK) { return TCL_ERROR; } + if (first < 0) { + first = 0; + } + last = first; - if (TclGetIntFromObj(interp, objv[3], &count) != TCL_OK) { + if ((objc == 5) && (TclGetIntForIndexM(interp, objv[4], length1, + &last) != TCL_OK)) { return TCL_ERROR; } - if (count == 1) { + if (last >= length1) { + last = length1; + } + if (last < first) { Tcl_SetObjResult(interp, objv[2]); - } else if (count > 1) { - string1 = TclGetStringFromObj(objv[2], &length1); - if (length1 > 0) { - /* - * Only build up a string that has data. Instead of building - * it up with repeated appends, we just allocate the necessary - * space once and copy the string value in. Check for overflow - * with back-division. [Bug #714106] - */ + return TCL_OK; + } - Tcl_Obj *resultPtr; + start = Tcl_UtfAtIndex(string1, first); + end = Tcl_UtfAtIndex(start, last - first + 1); + resultPtr = Tcl_NewStringObj(string1, end - string1); + string2 = TclGetString(resultPtr) + (start - string1); - length2 = length1 * count; - if ((length2 / count) != length1) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "string size overflow, must be less than %d", - INT_MAX)); - return TCL_ERROR; - } + length2 = Tcl_UtfToUpper(string2); + Tcl_SetObjLength(resultPtr, length2 + (start - string1)); - /* - * Include space for the NULL. - */ + Tcl_AppendToObj(resultPtr, end, -1); + Tcl_SetObjResult(interp, resultPtr); + } - string2 = (char *) ckalloc((size_t) length2+1); - for (index = 0; index < count; index++) { - memcpy(string2 + (length1 * index), string1, - (size_t) length1); - } - string2[length2] = '\0'; + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringTitleCmd -- + * + * This procedure is invoked to process the "string totitle" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ - /* - * We have to directly assign this instead of using - * Tcl_SetStringObj (and indirectly TclInitStringRep) because - * that makes another copy of the data. - */ +static int +StringTitleCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + int length1, length2; + char *string1, *string2; - TclNewObj(resultPtr); - resultPtr->bytes = string2; - resultPtr->length = length2; - Tcl_SetObjResult(interp, resultPtr); - } - } - break; + if (objc < 3 || objc > 5) { + Tcl_WrongNumArgs(interp, 2, objv, "string ?first? ?last?"); + return TCL_ERROR; } - case STR_REPLACE: { - Tcl_UniChar *ustring1; + + string1 = TclGetStringFromObj(objv[2], &length1); + + if (objc == 3) { + Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); + + length1 = Tcl_UtfToTitle(TclGetString(resultPtr)); + Tcl_SetObjLength(resultPtr, length1); + Tcl_SetObjResult(interp, resultPtr); + } else { int first, last; + const char *start, *end; + Tcl_Obj *resultPtr; - if (objc < 5 || objc > 6) { - Tcl_WrongNumArgs(interp, 2, objv, "string first last ?string?"); + length1 = Tcl_NumUtfChars(string1, length1) - 1; + if (TclGetIntForIndexM(interp,objv[3],length1, &first) != TCL_OK) { return TCL_ERROR; } + if (first < 0) { + first = 0; + } + last = first; - ustring1 = Tcl_GetUnicodeFromObj(objv[2], &length1); - length1--; - - if (TclGetIntForIndexM(interp, objv[3], length1, &first) != TCL_OK || - TclGetIntForIndexM(interp, objv[4], length1, &last) != TCL_OK){ + if ((objc == 5) && (TclGetIntForIndexM(interp, objv[4], length1, + &last) != TCL_OK)) { return TCL_ERROR; } - if ((last < first) || (last < 0) || (first > length1)) { - Tcl_SetObjResult(interp, objv[2]); - } else { - Tcl_Obj *resultPtr; - if (first < 0) { - first = 0; - } - - resultPtr = Tcl_NewUnicodeObj(ustring1, first); - if (objc == 6) { - Tcl_AppendObjToObj(resultPtr, objv[5]); - } - if (last < length1) { - Tcl_AppendUnicodeToObj(resultPtr, ustring1 + last + 1, - length1 - last); - } - Tcl_SetObjResult(interp, resultPtr); + if (last >= length1) { + last = length1; } - break; - } - case STR_REVERSE: { - if (objc != 3) { - Tcl_WrongNumArgs(interp, 2, objv, "string"); - return TCL_ERROR; + if (last < first) { + Tcl_SetObjResult(interp, objv[2]); + return TCL_OK; } - Tcl_SetObjResult(interp, TclStringObjReverse(objv[2])); - break; + start = Tcl_UtfAtIndex(string1, first); + end = Tcl_UtfAtIndex(start, last - first + 1); + resultPtr = Tcl_NewStringObj(string1, end - string1); + string2 = TclGetString(resultPtr) + (start - string1); + + length2 = Tcl_UtfToTitle(string2); + Tcl_SetObjLength(resultPtr, length2 + (start - string1)); + + Tcl_AppendToObj(resultPtr, end, -1); + Tcl_SetObjResult(interp, resultPtr); } - case STR_TOLOWER: - case STR_TOUPPER: - case STR_TOTITLE: - if (objc < 3 || objc > 5) { - Tcl_WrongNumArgs(interp, 2, objv, "string ?first? ?last?"); - return TCL_ERROR; - } - string1 = TclGetStringFromObj(objv[2], &length1); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringTrimCmd -- + * + * This procedure is invoked to process the "string trim" Tcl command. + * See the user documentation for details on what it does. Note that this + * command only functions correctly on properly formed Tcl UTF strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ - if (objc == 3) { - Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); - if ((enum options) index == STR_TOLOWER) { - length1 = Tcl_UtfToLower(TclGetString(resultPtr)); - } else if ((enum options) index == STR_TOUPPER) { - length1 = Tcl_UtfToUpper(TclGetString(resultPtr)); - } else { - length1 = Tcl_UtfToTitle(TclGetString(resultPtr)); - } - Tcl_SetObjLength(resultPtr, length1); - Tcl_SetObjResult(interp, resultPtr); - } else { - int first, last; - CONST char *start, *end; - Tcl_Obj *resultPtr; +static int +StringTrimCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + Tcl_UniChar ch, trim; + register const char *p, *end; + const char *check, *checkEnd, *string1, *string2; + int offset, length1, length2; - length1 = Tcl_NumUtfChars(string1, length1) - 1; - if (TclGetIntForIndexM(interp,objv[3],length1, &first) != TCL_OK) { - return TCL_ERROR; - } - if (first < 0) { - first = 0; - } - last = first; + if (objc == 4) { + string2 = TclGetStringFromObj(objv[3], &length2); + } else if (objc == 3) { + string2 = " \t\n\r"; + length2 = strlen(string2); + } else { + Tcl_WrongNumArgs(interp, 2, objv, "string ?chars?"); + return TCL_ERROR; + } + string1 = TclGetStringFromObj(objv[2], &length1); + checkEnd = string2 + length2; - if ((objc == 5) && (TclGetIntForIndexM(interp, objv[4], length1, - &last) != TCL_OK)) { - return TCL_ERROR; - } + /* + * The outer loop iterates over the string. The inner loop iterates over + * the trim characters. The loops terminate as soon as a non-trim + * character is discovered and string1 is left pointing at the first + * non-trim character. + */ + + end = string1 + length1; + for (p = string1; p < end; p += offset) { + offset = TclUtfToUniChar(p, &ch); - if (last >= length1) { - last = length1; + for (check = string2; ; ) { + if (check >= checkEnd) { + p = end; + break; } - if (last < first) { - Tcl_SetObjResult(interp, objv[2]); + check += TclUtfToUniChar(check, &trim); + if (ch == trim) { + length1 -= offset; + string1 += offset; break; } + } + } - start = Tcl_UtfAtIndex(string1, first); - end = Tcl_UtfAtIndex(start, last - first + 1); - resultPtr = Tcl_NewStringObj(string1, end - string1); - string2 = TclGetString(resultPtr) + (start - string1); + /* + * The outer loop iterates over the string. The inner loop iterates over + * the trim characters. The loops terminate as soon as a non-trim + * character is discovered and length1 marks the last non-trim character. + */ - if ((enum options) index == STR_TOLOWER) { - length2 = Tcl_UtfToLower(string2); - } else if ((enum options) index == STR_TOUPPER) { - length2 = Tcl_UtfToUpper(string2); - } else { - length2 = Tcl_UtfToTitle(string2); + end = string1; + for (p = string1 + length1; p > end; ) { + p = Tcl_UtfPrev(p, string1); + offset = TclUtfToUniChar(p, &ch); + check = string2; + while (1) { + if (check >= checkEnd) { + p = end; + break; + } + check += TclUtfToUniChar(check, &trim); + if (ch == trim) { + length1 -= offset; + break; } - Tcl_SetObjLength(resultPtr, length2 + (start - string1)); - - Tcl_AppendToObj(resultPtr, end, -1); - Tcl_SetObjResult(interp, resultPtr); } - break; + } - case STR_TRIMLEFT: - left = 1; - right = 0; - goto dotrim; - case STR_TRIMRIGHT: - left = 0; - right = 1; - goto dotrim; - case STR_TRIM: { - Tcl_UniChar ch, trim; - register CONST char *p, *end; - char *check, *checkEnd; - int offset; - - left = 1; - right = 1; - - dotrim: - if (objc == 4) { - string2 = TclGetStringFromObj(objv[3], &length2); - } else if (objc == 3) { - string2 = " \t\n\r"; - length2 = strlen(string2); - } else { - Tcl_WrongNumArgs(interp, 2, objv, "string ?chars?"); - return TCL_ERROR; - } - string1 = TclGetStringFromObj(objv[2], &length1); - checkEnd = string2 + length2; + Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringTrimLCmd -- + * + * This procedure is invoked to process the "string trimleft" Tcl + * command. See the user documentation for details on what it does. Note + * that this command only functions correctly on properly formed Tcl UTF + * strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ - if (left) { - end = string1 + length1; - /* - * The outer loop iterates over the string. The inner loop - * iterates over the trim characters. The loops terminate as soon - * as a non-trim character is discovered and string1 is left - * pointing at the first non-trim character. - */ +static int +StringTrimLCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + Tcl_UniChar ch, trim; + register const char *p, *end; + const char *check, *checkEnd, *string1, *string2; + int offset, length1, length2; - for (p = string1; p < end; p += offset) { - offset = TclUtfToUniChar(p, &ch); + if (objc == 4) { + string2 = TclGetStringFromObj(objv[3], &length2); + } else if (objc == 3) { + string2 = " \t\n\r"; + length2 = strlen(string2); + } else { + Tcl_WrongNumArgs(interp, 2, objv, "string ?chars?"); + return TCL_ERROR; + } + string1 = TclGetStringFromObj(objv[2], &length1); + checkEnd = string2 + length2; - for (check = string2; ; ) { - if (check >= checkEnd) { - p = end; - break; - } - check += TclUtfToUniChar(check, &trim); - if (ch == trim) { - length1 -= offset; - string1 += offset; - break; - } - } - } - } - if (right) { - end = string1; + /* + * The outer loop iterates over the string. The inner loop iterates over + * the trim characters. The loops terminate as soon as a non-trim + * character is discovered and string1 is left pointing at the first + * non-trim character. + */ - /* - * The outer loop iterates over the string. The inner loop - * iterates over the trim characters. The loops terminate as soon - * as a non-trim character is discovered and length1 marks the - * last non-trim character. - */ + end = string1 + length1; + for (p = string1; p < end; p += offset) { + offset = TclUtfToUniChar(p, &ch); - for (p = string1 + length1; p > end; ) { - p = Tcl_UtfPrev(p, string1); - offset = TclUtfToUniChar(p, &ch); - check = string2; - while (1) { - if (check >= checkEnd) { - p = end; - break; - } - check += TclUtfToUniChar(check, &trim); - if (ch == trim) { - length1 -= offset; - break; - } - } + for (check = string2; ; ) { + if (check >= checkEnd) { + p = end; + break; + } + check += TclUtfToUniChar(check, &trim); + if (ch == trim) { + length1 -= offset; + string1 += offset; + break; } } - Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1)); - break; } - case STR_WORDEND: { - int cur; - Tcl_UniChar ch; - CONST char *p, *end; - int numChars; - if (objc != 4) { - Tcl_WrongNumArgs(interp, 2, objv, "string index"); - return TCL_ERROR; - } + Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1)); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * StringTrimRCmd -- + * + * This procedure is invoked to process the "string trimright" Tcl + * command. See the user documentation for details on what it does. Note + * that this command only functions correctly on properly formed Tcl UTF + * strings. + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ - string1 = TclGetStringFromObj(objv[2], &length1); - numChars = Tcl_NumUtfChars(string1, length1); - if (TclGetIntForIndexM(interp, objv[3], numChars-1, &index) != TCL_OK){ - return TCL_ERROR; - } - if (index < 0) { - index = 0; - } - if (index < numChars) { - p = Tcl_UtfAtIndex(string1, index); - end = string1+length1; - for (cur = index; p < end; cur++) { - p += TclUtfToUniChar(p, &ch); - if (!Tcl_UniCharIsWordChar(ch)) { - break; - } - } - if (cur == index) { - cur++; - } - } else { - cur = numChars; - } - Tcl_SetObjResult(interp, Tcl_NewIntObj(cur)); - break; +static int +StringTrimRCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ +{ + Tcl_UniChar ch, trim; + register const char *p, *end; + const char *check, *checkEnd, *string1, *string2; + int offset, length1, length2; + + if (objc == 4) { + string2 = TclGetStringFromObj(objv[3], &length2); + } else if (objc == 3) { + string2 = " \t\n\r"; + length2 = strlen(string2); + } else { + Tcl_WrongNumArgs(interp, 2, objv, "string ?chars?"); + return TCL_ERROR; } - case STR_WORDSTART: { - int cur; - Tcl_UniChar ch; - CONST char *p; - int numChars; + string1 = TclGetStringFromObj(objv[2], &length1); + checkEnd = string2 + length2; - if (objc != 4) { - Tcl_WrongNumArgs(interp, 2, objv, "string index"); - return TCL_ERROR; - } + /* + * The outer loop iterates over the string. The inner loop iterates over + * the trim characters. The loops terminate as soon as a non-trim + * character is discovered and length1 marks the last non-trim character. + */ - string1 = TclGetStringFromObj(objv[2], &length1); - numChars = Tcl_NumUtfChars(string1, length1); - if (TclGetIntForIndexM(interp, objv[3], numChars-1, &index) != TCL_OK){ - return TCL_ERROR; - } - if (index >= numChars) { - index = numChars - 1; - } - cur = 0; - if (index > 0) { - p = Tcl_UtfAtIndex(string1, index); - for (cur = index; cur >= 0; cur--) { - TclUtfToUniChar(p, &ch); - if (!Tcl_UniCharIsWordChar(ch)) { - break; - } - p = Tcl_UtfPrev(p, string1); + end = string1; + for (p = string1 + length1; p > end; ) { + p = Tcl_UtfPrev(p, string1); + offset = TclUtfToUniChar(p, &ch); + check = string2; + while (1) { + if (check >= checkEnd) { + p = end; + break; } - if (cur != index) { - cur += 1; + check += TclUtfToUniChar(check, &trim); + if (ch == trim) { + length1 -= offset; + break; } } - Tcl_SetObjResult(interp, Tcl_NewIntObj(cur)); - break; - } } + + Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1)); return TCL_OK; } -static int -UniCharIsAscii( - int character) +/* + *---------------------------------------------------------------------- + * + * Tcl_StringObjCmd -- + * + * This procedure is invoked to process the "string" Tcl command. See the + * user documentation for details on what it does. Note that this command + * only functions correctly on properly formed Tcl UTF strings. + * + * Note that the primary methods here (equal, compare, match, ...) have + * bytecode equivalents. You will find the code for those in + * tclExecute.c. The code here will only be used in the non-bc case (like + * in an 'eval'). + * + * Results: + * A standard Tcl result. + * + * Side effects: + * See the user documentation. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_StringObjCmd( + ClientData dummy, /* Not used. */ + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[]) /* Argument objects. */ { - return (character >= 0) && (character < 0x80); + static const EnsembleImplMap stringImplMap[] = { + {"bytelength", StringBytesCmd, NULL}, + {"compare", StringCmpCmd, NULL}, + {"equal", StringEqualCmd, NULL}, + {"first", StringFirstCmd, NULL}, + {"index", StringIndexCmd, NULL}, + {"is", StringIsCmd, NULL}, + {"last", StringLastCmd, NULL}, + {"length", StringLenCmd, NULL}, + {"map", StringMapCmd, NULL}, + {"match", StringMatchCmd, NULL}, + {"range", StringRangeCmd, NULL}, + {"repeat", StringReptCmd, NULL}, + {"replace", StringRplcCmd, NULL}, + {"reverse", StringRevCmd, NULL}, + {"tolower", StringLowerCmd, NULL}, + {"toupper", StringUpperCmd, NULL}, + {"totitle", StringTitleCmd, NULL}, + {"trim", StringTrimCmd, NULL}, + {"trimleft", StringTrimLCmd, NULL}, + {"trimright", StringTrimRCmd, NULL}, + {"wordend", StringEndCmd, NULL}, + {"wordstart", StringStartCmd, NULL}, + {NULL} + }; + + int index; + + if (objc < 2) { + Tcl_WrongNumArgs(interp, 1, objv, "option arg ?arg ...?"); + return TCL_ERROR; + } + if (Tcl_GetIndexFromObjStruct(interp, objv[1], &stringImplMap[0].name, + sizeof(EnsembleImplMap), "option", 0, &index) != TCL_OK) { + return TCL_ERROR; + } + return stringImplMap[index].proc(dummy, interp, objc, objv); } /* diff --git a/generic/tclDictObj.c b/generic/tclDictObj.c index 1a81260..734b57b 100644 --- a/generic/tclDictObj.c +++ b/generic/tclDictObj.c @@ -9,7 +9,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclDictObj.c,v 1.53 2007/11/20 20:43:11 dkf Exp $ + * RCS: @(#) $Id: tclDictObj.c,v 1.54 2007/11/22 16:39:58 dkf Exp $ */ #include "tclInt.h" @@ -25,43 +25,43 @@ struct Dict; */ static void DeleteDict(struct Dict *dict); -static int DictAppendCmd(Tcl_Interp *interp, +static int DictAppendCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictCreateCmd(Tcl_Interp *interp, +static int DictCreateCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictExistsCmd(Tcl_Interp *interp, +static int DictExistsCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictFilterCmd(Tcl_Interp *interp, +static int DictFilterCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictForCmd(Tcl_Interp *interp, +static int DictForCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictGetCmd(Tcl_Interp *interp, +static int DictGetCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictIncrCmd(Tcl_Interp *interp, +static int DictIncrCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictInfoCmd(Tcl_Interp *interp, +static int DictInfoCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictKeysCmd(Tcl_Interp *interp, +static int DictKeysCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictLappendCmd(Tcl_Interp *interp, +static int DictLappendCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictMergeCmd(Tcl_Interp *interp, +static int DictMergeCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictRemoveCmd(Tcl_Interp *interp, +static int DictRemoveCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictReplaceCmd(Tcl_Interp *interp, +static int DictReplaceCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictSetCmd(Tcl_Interp *interp, +static int DictSetCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictSizeCmd(Tcl_Interp *interp, +static int DictSizeCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictUnsetCmd(Tcl_Interp *interp, +static int DictUnsetCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictValuesCmd(Tcl_Interp *interp, +static int DictUpdateCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictUpdateCmd(Tcl_Interp *interp, +static int DictValuesCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int DictWithCmd(Tcl_Interp *interp, +static int DictWithCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); static void DupDictInternalRep(Tcl_Obj *srcPtr, Tcl_Obj *copyPtr); static void FreeDictInternalRep(Tcl_Obj *dictPtr); @@ -76,6 +76,33 @@ static inline Tcl_HashEntry *CreateChainEntry(struct Dict *dict, static inline int DeleteChainEntry(struct Dict *dict, Tcl_Obj *keyPtr); /* + * Table of dict subcommand names and implementations. + */ + +static const EnsembleImplMap implementationMap[] = { + {"append", DictAppendCmd, NULL/*TclCompileDictAppendCmd*/}, + {"create", DictCreateCmd, NULL}, + {"exists", DictExistsCmd, NULL}, + {"filter", DictFilterCmd, NULL}, + {"for", DictForCmd, NULL/*TclCompileDictForCmd*/}, + {"get", DictGetCmd, NULL/*TclCompileDictGetCmd*/}, + {"incr", DictIncrCmd, NULL/*TclCompileDictIncrCmd*/}, + {"info", DictInfoCmd, NULL}, + {"keys", DictKeysCmd, NULL}, + {"lappend", DictLappendCmd, NULL/*TclCompileDictLappendCmd*/}, + {"merge", DictMergeCmd, NULL}, + {"remove", DictRemoveCmd, NULL}, + {"replace", DictReplaceCmd, NULL}, + {"set", DictSetCmd, NULL/*TclCompileDictSetCmd*/}, + {"size", DictSizeCmd, NULL}, + {"unset", DictUnsetCmd, NULL}, + {"update", DictUpdateCmd, NULL/*TclCompileDictUpdateCmd*/}, + {"values", DictValuesCmd, NULL}, + {"with", DictWithCmd, NULL}, + {NULL} +}; + +/* * Internal representation of the entries in the hash table that backs a * dictionary. */ @@ -136,6 +163,9 @@ Tcl_ObjType tclDictType = { * table defined in the tclObj.c code. This version differs in that it * allocates a bit more space in each hash entry in order to hold the pointers * used to keep the hash entries in a linked list. + * + * Note that this type of hash table is *only* suitable for direct use in + * *this* file. Everything else should use the dict iterator API. */ static Tcl_HashKeyType chainHashType = { @@ -1459,6 +1489,7 @@ Tcl_DbNewDictObj( static int DictCreateCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -1508,6 +1539,7 @@ DictCreateCmd( static int DictGetCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -1598,6 +1630,7 @@ DictGetCmd( static int DictReplaceCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -1649,6 +1682,7 @@ DictReplaceCmd( static int DictRemoveCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -1700,6 +1734,7 @@ DictRemoveCmd( static int DictMergeCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -1785,6 +1820,7 @@ DictMergeCmd( static int DictKeysCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -1866,6 +1902,7 @@ DictKeysCmd( static int DictValuesCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -1922,6 +1959,7 @@ DictValuesCmd( static int DictSizeCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -1959,6 +1997,7 @@ DictSizeCmd( static int DictExistsCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2008,6 +2047,7 @@ DictExistsCmd( static int DictInfoCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2057,6 +2097,7 @@ DictInfoCmd( static int DictIncrCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2169,6 +2210,7 @@ DictIncrCmd( static int DictLappendCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2255,6 +2297,7 @@ DictLappendCmd( static int DictAppendCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2326,6 +2369,7 @@ DictAppendCmd( static int DictForCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2450,6 +2494,7 @@ DictForCmd( static int DictSetCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2509,6 +2554,7 @@ DictSetCmd( static int DictUnsetCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2567,6 +2613,7 @@ DictUnsetCmd( static int DictFilterCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2826,6 +2873,7 @@ DictFilterCmd( static int DictUpdateCmd( + ClientData clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2952,6 +3000,7 @@ DictUpdateCmd( static int DictWithCmd( + ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -3138,61 +3187,22 @@ DictWithCmd( int Tcl_DictObjCmd( - /*ignored*/ ClientData clientData, + ClientData clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) { - static const char *subcommands[] = { - "append", "create", "exists", "filter", "for", - "get", "incr", "info", "keys", "lappend", "merge", - "remove", "replace", "set", "size", "unset", - "update", "values", "with", NULL - }; - enum DictSubcommands { - DICT_APPEND, DICT_CREATE, DICT_EXISTS, DICT_FILTER, DICT_FOR, - DICT_GET, DICT_INCR, DICT_INFO, DICT_KEYS, DICT_LAPPEND, DICT_MERGE, - DICT_REMOVE, DICT_REPLACE, DICT_SET, DICT_SIZE, DICT_UNSET, - DICT_UPDATE, DICT_VALUES, DICT_WITH - }; int index; if (objc < 2) { Tcl_WrongNumArgs(interp, 1, objv, "subcommand ?arg ...?"); return TCL_ERROR; } - if (Tcl_GetIndexFromObj(interp, objv[1], subcommands, "subcommand", - 0, &index) != TCL_OK) { + if (Tcl_GetIndexFromObjStruct(interp, objv[1], &implementationMap[0].name, + sizeof(EnsembleImplMap), "subcommand", 0, &index) != TCL_OK) { return TCL_ERROR; } - switch ((enum DictSubcommands) index) { - case DICT_APPEND: return DictAppendCmd(interp, objc, objv); - case DICT_CREATE: return DictCreateCmd(interp, objc, objv); - case DICT_EXISTS: return DictExistsCmd(interp, objc, objv); - case DICT_FILTER: return DictFilterCmd(interp, objc, objv); - case DICT_FOR: return DictForCmd(interp, objc, objv); - case DICT_GET: return DictGetCmd(interp, objc, objv); - case DICT_INCR: return DictIncrCmd(interp, objc, objv); - case DICT_INFO: return DictInfoCmd(interp, objc, objv); - case DICT_KEYS: return DictKeysCmd(interp, objc, objv); - case DICT_LAPPEND: return DictLappendCmd(interp, objc, objv); - case DICT_MERGE: return DictMergeCmd(interp, objc, objv); - case DICT_REMOVE: return DictRemoveCmd(interp, objc, objv); - case DICT_REPLACE: return DictReplaceCmd(interp, objc, objv); - case DICT_SET: return DictSetCmd(interp, objc, objv); - case DICT_SIZE: return DictSizeCmd(interp, objc, objv); - case DICT_UNSET: return DictUnsetCmd(interp, objc, objv); - case DICT_UPDATE: return DictUpdateCmd(interp, objc, objv); - case DICT_VALUES: return DictValuesCmd(interp, objc, objv); - case DICT_WITH: return DictWithCmd(interp, objc, objv); - } - Tcl_Panic("unexpected fallthrough"); - - /* - * Next line is NOT REACHED - stops compliler complaint though... - */ - - return TCL_ERROR; + return implementationMap[index].proc(clientData, interp, objc, objv); } /* |