diff options
Diffstat (limited to 'generic/tclCmdMZ.c')
-rw-r--r-- | generic/tclCmdMZ.c | 524 |
1 files changed, 249 insertions, 275 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index ba1fc41..f94c094 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -258,7 +258,7 @@ Tcl_RegexpObjCmd( stringLength = Tcl_GetCharLength(objPtr); if (startIndex) { - TclGetIntForIndexM(NULL, startIndex, stringLength, &offset); + TclGetIntForIndexM(interp, startIndex, stringLength, &offset); Tcl_DecrRefCount(startIndex); if (offset < 0) { offset = 0; @@ -309,7 +309,7 @@ Tcl_RegexpObjCmd( eflags = 0; } else if (offset > stringLength) { eflags = TCL_REG_NOTBOL; - } else if (Tcl_GetUniChar(objPtr, offset-1) == (Tcl_UniChar)'\n') { + } else if (Tcl_GetUniChar(objPtr, offset-1) == '\n') { eflags = 0; } else { eflags = TCL_REG_NOTBOL; @@ -487,26 +487,27 @@ Tcl_RegsubObjCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { int idx, result, cflags, all, wlen, wsublen, numMatches, offset; - int start, end, subStart, subEnd, match; + int start, end, subStart, subEnd, match, command, numParts; Tcl_RegExp regExpr; Tcl_RegExpInfo info; Tcl_Obj *resultPtr, *subPtr, *objPtr, *startIndex = NULL; - Tcl_UniChar ch, *wsrc, *wfirstChar, *wstring, *wsubspec, *wend; + Tcl_UniChar ch, *wsrc, *wfirstChar, *wstring, *wsubspec = 0, *wend; static const char *const options[] = { - "-all", "-nocase", "-expanded", - "-line", "-linestop", "-lineanchor", "-start", + "-all", "-command", "-expanded", "-line", + "-linestop", "-lineanchor", "-nocase", "-start", "--", NULL }; enum options { - REGSUB_ALL, REGSUB_NOCASE, REGSUB_EXPANDED, - REGSUB_LINE, REGSUB_LINESTOP, REGSUB_LINEANCHOR, REGSUB_START, + REGSUB_ALL, REGSUB_COMMAND, REGSUB_EXPANDED, REGSUB_LINE, + REGSUB_LINESTOP, REGSUB_LINEANCHOR, REGSUB_NOCASE, REGSUB_START, REGSUB_LAST }; cflags = TCL_REG_ADVANCED; all = 0; offset = 0; + command = 0; resultPtr = NULL; for (idx = 1; idx < objc; idx++) { @@ -528,6 +529,9 @@ Tcl_RegsubObjCmd( case REGSUB_NOCASE: cflags |= TCL_REG_NOCASE; break; + case REGSUB_COMMAND: + command = 1; + break; case REGSUB_EXPANDED: cflags |= TCL_REG_EXPANDED; break; @@ -578,14 +582,14 @@ Tcl_RegsubObjCmd( if (startIndex) { int stringLength = Tcl_GetCharLength(objv[1]); - TclGetIntForIndexM(NULL, startIndex, stringLength, &offset); + TclGetIntForIndexM(interp, startIndex, stringLength, &offset); Tcl_DecrRefCount(startIndex); if (offset < 0) { offset = 0; } } - if (all && (offset == 0) + if (all && (offset == 0) && (command == 0) && (strpbrk(TclGetString(objv[2]), "&\\") == NULL) && (strpbrk(TclGetString(objv[0]), "*+?{}()[].\\|^$") == NULL)) { /* @@ -593,9 +597,9 @@ Tcl_RegsubObjCmd( * slightly modified version of the one pair STR_MAP code. */ - int slen, nocase; + int slen, nocase, wsrclc; int (*strCmpFn)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long); - Tcl_UniChar *p, wsrclc; + Tcl_UniChar *p; numMatches = 0; nocase = (cflags & TCL_REG_NOCASE); @@ -661,6 +665,28 @@ Tcl_RegsubObjCmd( return TCL_ERROR; } + if (command) { + /* + * In command-prefix mode, we require that the third non-option + * argument be a list, so we enforce that here. Afterwards, we fetch + * the RE compilation again in case objv[0] and objv[2] are the same + * object. (If they aren't, that's cheap to do.) + */ + + if (Tcl_ListObjLength(interp, objv[2], &numParts) != TCL_OK) { + return TCL_ERROR; + } + if (numParts < 1) { + Tcl_SetObjResult(interp, Tcl_NewStringObj( + "command prefix must be a list of at least one element", + -1)); + Tcl_SetErrorCode(interp, "TCL", "OPERATION", "REGSUB", + "CMDEMPTY", NULL); + return TCL_ERROR; + } + regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags); + } + /* * Make sure to avoid problems where the objects are shared. This can * cause RegExpObj <> UnicodeObj shimmering that causes data corruption. @@ -678,7 +704,9 @@ Tcl_RegsubObjCmd( } else { subPtr = objv[2]; } - wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen); + if (!command) { + wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen); + } result = TCL_OK; @@ -737,6 +765,90 @@ Tcl_RegsubObjCmd( Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start); /* + * In command-prefix mode, the substitutions are added as quoted + * arguments to the subSpec to form a command, that is then executed + * and the result used as the string to substitute in. Actually, + * everything is passed through Tcl_EvalObjv, as that's much faster. + */ + + if (command) { + Tcl_Obj **args = NULL, **parts; + int numArgs; + + Tcl_ListObjGetElements(interp, subPtr, &numParts, &parts); + numArgs = numParts + info.nsubs + 1; + args = ckalloc(sizeof(Tcl_Obj*) * numArgs); + memcpy(args, parts, sizeof(Tcl_Obj*) * numParts); + + for (idx = 0 ; idx <= info.nsubs ; idx++) { + subStart = info.matches[idx].start; + subEnd = info.matches[idx].end; + if ((subStart >= 0) && (subEnd >= 0)) { + args[idx + numParts] = Tcl_NewUnicodeObj( + wstring + offset + subStart, subEnd - subStart); + } else { + args[idx + numParts] = Tcl_NewObj(); + } + Tcl_IncrRefCount(args[idx + numParts]); + } + + /* + * At this point, we're locally holding the references to the + * argument words we added for this time round the loop, and the + * subPtr is holding the references to the words that the user + * supplied directly. None are zero-refcount, which is important + * because Tcl_EvalObjv is "hairy monster" in terms of refcount + * handling, being able to optionally add references to any of its + * argument words. We'll drop the local refs immediately + * afterwards; subPtr is handled in the main exit stanza. + */ + + result = Tcl_EvalObjv(interp, numArgs, args, 0); + for (idx = 0 ; idx <= info.nsubs ; idx++) { + TclDecrRefCount(args[idx + numParts]); + } + ckfree(args); + if (result != TCL_OK) { + if (result == TCL_ERROR) { + Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf( + "\n (%s substitution computation script)", + options[REGSUB_COMMAND])); + } + goto done; + } + + Tcl_AppendObjToObj(resultPtr, Tcl_GetObjResult(interp)); + Tcl_ResetResult(interp); + + /* + * Refetch the unicode, in case the representation was smashed by + * the user code. + */ + + wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen); + + offset += end; + if (end == 0 || start == end) { + /* + * Always consume at least one character of the input string + * in order to prevent infinite loops, even when we + * technically matched the empty string; we must not match + * again at the same spot. + */ + + if (offset < wlen) { + Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + } + offset++; + } + if (all) { + continue; + } else { + break; + } + } + + /* * Append the subSpec argument to the variable, making appropriate * substitutions. This code is a bit hairy because of the backslash * conventions and because the code saves up ranges of characters in @@ -1061,7 +1173,7 @@ Tcl_SplitObjCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; int len; const char *splitChars; const char *stringPtr; @@ -1104,13 +1216,22 @@ Tcl_SplitObjCmd( Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS); for ( ; stringPtr < end; stringPtr += len) { + int fullchar; len = TclUtfToUniChar(stringPtr, &ch); + fullchar = ch; + +#if TCL_UTF_MAX <= 4 + if (!len) { + len += TclUtfToUniChar(stringPtr, &ch); + fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; + } +#endif /* * Assume Tcl_UniChar is an integral type... */ - hPtr = Tcl_CreateHashEntry(&charReuseTable, INT2PTR((int) ch), + hPtr = Tcl_CreateHashEntry(&charReuseTable, INT2PTR(fullchar), &isNew); if (isNew) { TclNewStringObj(objPtr, stringPtr, len); @@ -1146,7 +1267,7 @@ Tcl_SplitObjCmd( } else { const char *element, *p, *splitEnd; int splitLen; - Tcl_UniChar splitChar; + Tcl_UniChar splitChar = 0; /* * Normal case: split on any of a given set of characters. Discard @@ -1214,16 +1335,8 @@ StringFirstCmd( if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &start)) { return TCL_ERROR; } - - if (start < 0) { - start = 0; - } - if (start >= size) { - Tcl_SetObjResult(interp, Tcl_NewIntObj(-1)); - return TCL_OK; - } } - Tcl_SetObjResult(interp, Tcl_NewIntObj(TclStringFind(objv[1], + Tcl_SetObjResult(interp, Tcl_NewIntObj(TclStringFirst(objv[1], objv[2], start))); return TCL_OK; } @@ -1267,14 +1380,6 @@ StringLastCmd( if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &last)) { return TCL_ERROR; } - - if (last < 0) { - Tcl_SetObjResult(interp, Tcl_NewIntObj(-1)); - return TCL_OK; - } - if (last >= size) { - last = size - 1; - } } Tcl_SetObjResult(interp, Tcl_NewIntObj(TclStringLast(objv[1], objv[2], last))); @@ -1314,7 +1419,7 @@ StringIndexCmd( } /* - * Get the char length to calulate what 'end' means. + * Get the char length to calculate what 'end' means. */ length = Tcl_GetCharLength(objv[1]); @@ -1323,7 +1428,11 @@ StringIndexCmd( } if ((index >= 0) && (index < length)) { - Tcl_UniChar ch = Tcl_GetUniChar(objv[1], index); + int ch = Tcl_GetUniChar(objv[1], index); + + if (ch == -1) { + return TCL_OK; + } /* * If we have a ByteArray object, we're careful to generate a new @@ -1335,9 +1444,12 @@ StringIndexCmd( Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(&uch, 1)); } else { - char buf[TCL_UTF_MAX]; + char buf[4]; length = Tcl_UniCharToUtf(ch, buf); + if (!length) { + length = Tcl_UniCharToUtf(-1, buf); + } Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length)); } } @@ -1370,7 +1482,7 @@ StringIsCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { const char *string1, *end, *stop; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; int (*chcomp)(int) = NULL; /* The UniChar comparison function. */ int i, failat = 0, result = 1, strict = 0, index, length1, length2; Tcl_Obj *objPtr, *failVarObj = NULL; @@ -1467,11 +1579,11 @@ StringIsCmd( string1 = TclGetStringFromObj(objPtr, &length1); result = length1 == 0; } - } else if (((index == STR_IS_TRUE) && - objPtr->internalRep.longValue == 0) - || ((index == STR_IS_FALSE) && - objPtr->internalRep.longValue != 0)) { - result = 0; + } else if (index != STR_IS_BOOL) { + TclGetBooleanFromObj(NULL, objPtr, &i); + if ((index == STR_IS_TRUE) ^ i) { + result = 0; + } } break; case STR_IS_CONTROL: @@ -1481,12 +1593,8 @@ StringIsCmd( chcomp = Tcl_UniCharIsDigit; break; case STR_IS_DOUBLE: { - /* TODO */ if ((objPtr->typePtr == &tclDoubleType) || (objPtr->typePtr == &tclIntType) || -#ifndef TCL_WIDE_INT_IS_LONG - (objPtr->typePtr == &tclWideIntType) || -#endif (objPtr->typePtr == &tclBignumType)) { break; } @@ -1515,15 +1623,8 @@ StringIsCmd( chcomp = Tcl_UniCharIsGraph; break; case STR_IS_INT: - if (TCL_OK == TclGetIntFromObj(NULL, objPtr, &i)) { - break; - } - goto failedIntParse; case STR_IS_ENTIER: if ((objPtr->typePtr == &tclIntType) || -#ifndef TCL_WIDE_INT_IS_LONG - (objPtr->typePtr == &tclWideIntType) || -#endif (objPtr->typePtr == &tclBignumType)) { break; } @@ -1565,11 +1666,10 @@ StringIsCmd( } break; case STR_IS_WIDE: - if (TCL_OK == Tcl_GetWideIntFromObj(NULL, objPtr, &w)) { + if (TCL_OK == TclGetWideIntFromObj(NULL, objPtr, &w)) { break; } - failedIntParse: string1 = TclGetStringFromObj(objPtr, &length1); if (length1 == 0) { if (strict) { @@ -1702,8 +1802,16 @@ StringIsCmd( } end = string1 + length1; for (; string1 < end; string1 += length2, failat++) { + int fullchar; length2 = TclUtfToUniChar(string1, &ch); - if (!chcomp(ch)) { + fullchar = ch; +#if TCL_UTF_MAX <= 4 + if (!length2) { + length2 = TclUtfToUniChar(string1, &ch); + fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; + } +#endif + if (!chcomp(fullchar)) { result = 0; break; } @@ -1736,7 +1844,7 @@ static int UniCharIsHexDigit( int character) { - return (character >= 0) && (character < 0x80) && isxdigit(character); + return (character >= 0) && (character < 0x80) && isxdigit(UCHAR(character)); } /* @@ -1891,8 +1999,8 @@ StringMapCmd( * larger strings. */ - int mapLen; - Tcl_UniChar *mapString, u2lc; + int mapLen, u2lc; + Tcl_UniChar *mapString; ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2); p = ustring1; @@ -1923,8 +2031,8 @@ StringMapCmd( } } } else { - Tcl_UniChar **mapStrings, *u2lc = NULL; - int *mapLens; + Tcl_UniChar **mapStrings; + int *mapLens, *u2lc = NULL; /* * Precompute pointers to the unicode string and length. This saves us @@ -1936,7 +2044,7 @@ StringMapCmd( mapStrings = TclStackAlloc(interp, mapElemc*2*sizeof(Tcl_UniChar *)); mapLens = TclStackAlloc(interp, mapElemc * 2 * sizeof(int)); if (nocase) { - u2lc = TclStackAlloc(interp, mapElemc * sizeof(Tcl_UniChar)); + u2lc = TclStackAlloc(interp, mapElemc * sizeof(int)); } for (index = 0; index < mapElemc; index++) { mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index], @@ -2165,11 +2273,12 @@ StringReptCmd( return TCL_OK; } - if (TCL_OK != TclStringRepeat(interp, objv[1], count, &resultPtr)) { - return TCL_ERROR; + resultPtr = TclStringRepeat(interp, objv[1], count, TCL_STRING_IN_PLACE); + if (resultPtr) { + Tcl_SetObjResult(interp, resultPtr); + return TCL_OK; } - Tcl_SetObjResult(interp, resultPtr); - return TCL_OK; + return TCL_ERROR; } /* @@ -2197,42 +2306,50 @@ StringRplcCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar *ustring; - int first, last, length; + int first, last, length, end; if (objc < 4 || objc > 5) { Tcl_WrongNumArgs(interp, 1, objv, "string first last ?string?"); return TCL_ERROR; } - ustring = Tcl_GetUnicodeFromObj(objv[1], &length); - length--; + length = Tcl_GetCharLength(objv[1]); + end = length - 1; - if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK || - TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK){ + if (TclGetIntForIndexM(interp, objv[2], end, &first) != TCL_OK || + TclGetIntForIndexM(interp, objv[3], end, &last) != TCL_OK){ return TCL_ERROR; } - if ((last < first) || (last < 0) || (first > length)) { + /* + * The following test screens out most empty substrings as + * candidates for replacement. When they are detected, no + * replacement is done, and the result is the original string, + */ + if ((last < 0) || /* Range ends before start of string */ + (first > end) || /* Range begins after end of string */ + (last < first)) { /* Range begins after it starts */ + + /* + * BUT!!! when (end < 0) -- an empty original string -- we can + * have (first <= end < 0 <= last) and an empty string is permitted + * to be replaced. + */ Tcl_SetObjResult(interp, objv[1]); } else { Tcl_Obj *resultPtr; - ustring = Tcl_GetUnicodeFromObj(objv[1], &length); - length--; - if (first < 0) { first = 0; } - - resultPtr = Tcl_NewUnicodeObj(ustring, first); - if (objc == 5) { - Tcl_AppendObjToObj(resultPtr, objv[4]); - } - if (last < length) { - Tcl_AppendUnicodeToObj(resultPtr, ustring + last + 1, - length - last); + if (last > end) { + last = end; } + + resultPtr = TclStringReplace(interp, objv[1], first, + last + 1 - first, (objc == 5) ? objv[4] : NULL, + TCL_STRING_IN_PLACE); + Tcl_SetObjResult(interp, resultPtr); } return TCL_OK; @@ -2268,7 +2385,7 @@ StringRevCmd( return TCL_ERROR; } - Tcl_SetObjResult(interp, TclStringObjReverse(objv[1])); + Tcl_SetObjResult(interp, TclStringReverse(objv[1], TCL_STRING_IN_PLACE)); return TCL_OK; } @@ -2298,7 +2415,7 @@ StringStartCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; const char *p, *string; int cur, index, length, numChars; @@ -2359,7 +2476,7 @@ StringEndCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; const char *p, *end, *string; int cur, index, length, numChars; @@ -2427,10 +2544,8 @@ StringEqualCmd( * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...). */ - const char *string1, *string2; - int length1, length2, i, match, length, nocase = 0, reqlength = -1; - typedef int (*strCmpFn_t)(const char *, const char *, unsigned int); - strCmpFn_t strCmpFn; + const char *string2; + int length, i, match, nocase = 0, reqlength = -1; if (objc < 3 || objc > 6) { str_cmp_args: @@ -2440,11 +2555,11 @@ StringEqualCmd( } for (i = 1; i < objc-2; i++) { - string2 = TclGetStringFromObj(objv[i], &length2); - if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) { + string2 = TclGetStringFromObj(objv[i], &length); + if ((length > 1) && !strncmp(string2, "-nocase", (size_t)length)) { nocase = 1; - } else if ((length2 > 1) - && !strncmp(string2, "-length", (size_t)length2)) { + } else if ((length > 1) + && !strncmp(string2, "-length", (size_t)length)) { if (i+1 >= objc-2) { goto str_cmp_args; } @@ -2469,78 +2584,7 @@ StringEqualCmd( objv += objc-2; - if ((reqlength == 0) || (objv[0] == objv[1])) { - /* - * Always match at 0 chars of if it is the same obj. - */ - - Tcl_SetObjResult(interp, Tcl_NewBooleanObj(1)); - return TCL_OK; - } - - if (!nocase && TclIsPureByteArray(objv[0]) && - TclIsPureByteArray(objv[1])) { - /* - * Use binary versions of comparisons since that won't cause undue - * type conversions and it is much faster. Only do this if we're - * case-sensitive (which is all that really makes sense with byte - * arrays anyway, and we have no memcasecmp() for some reason... :^) - */ - - string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1); - string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2); - strCmpFn = (strCmpFn_t) memcmp; - } else if ((objv[0]->typePtr == &tclStringType) - && (objv[1]->typePtr == &tclStringType)) { - /* - * Do a unicode-specific comparison if both of the args are of String - * type. In benchmark testing this proved the most efficient check - * between the unicode and string comparison operations. - */ - - string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1); - string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2); - strCmpFn = (strCmpFn_t) - (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); - } else { - /* - * As a catch-all we will work with UTF-8. We cannot use memcmp() as - * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's - * utf rep). We can use the more efficient TclpUtfNcmp2 if we are - * case-sensitive and no specific length was requested. - */ - - string1 = (char *) TclGetStringFromObj(objv[0], &length1); - string2 = (char *) TclGetStringFromObj(objv[1], &length2); - if ((reqlength < 0) && !nocase) { - strCmpFn = (strCmpFn_t) TclpUtfNcmp2; - } else { - length1 = Tcl_NumUtfChars(string1, length1); - length2 = Tcl_NumUtfChars(string2, length2); - strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); - } - } - - if ((reqlength < 0) && (length1 != length2)) { - match = 1; /* This will be reversed below. */ - } else { - length = (length1 < length2) ? length1 : length2; - if (reqlength > 0 && reqlength < length) { - length = reqlength; - } else if (reqlength < 0) { - /* - * The requested length is negative, so we ignore it by setting it - * to length + 1 so we correct the match var. - */ - - reqlength = length + 1; - } - - match = strCmpFn(string1, string2, (unsigned) length); - if ((match == 0) && (reqlength > length)) { - match = length1 - length2; - } - } + match = TclStringCmp(objv[0], objv[1], 0, nocase, reqlength); Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1)); return TCL_OK; @@ -2577,11 +2621,31 @@ StringCmpCmd( * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...). */ - const char *string1, *string2; - int length1, length2, i, match, length, nocase = 0, reqlength = -1; - typedef int (*strCmpFn_t)(const char *, const char *, unsigned int); - strCmpFn_t strCmpFn; + int match, nocase, reqlength, status; + + status = TclStringCmpOpts(interp, objc, objv, &nocase, &reqlength); + if (status != TCL_OK) { + return status; + } + objv += objc-2; + match = TclStringCmp(objv[0], objv[1], 0, nocase, reqlength); + Tcl_SetObjResult(interp, Tcl_NewIntObj(match)); + return TCL_OK; +} + +int TclStringCmpOpts( + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[], /* Argument objects. */ + int *nocase, + int *reqlength) +{ + int i, length; + const char *string; + + *reqlength = -1; + *nocase = 0; if (objc < 3 || objc > 6) { str_cmp_args: Tcl_WrongNumArgs(interp, 1, objv, @@ -2590,106 +2654,27 @@ StringCmpCmd( } for (i = 1; i < objc-2; i++) { - string2 = TclGetStringFromObj(objv[i], &length2); - if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) { - nocase = 1; - } else if ((length2 > 1) - && !strncmp(string2, "-length", (size_t)length2)) { + string = TclGetStringFromObj(objv[i], &length); + if ((length > 1) && !strncmp(string, "-nocase", (size_t)length)) { + *nocase = 1; + } else if ((length > 1) + && !strncmp(string, "-length", (size_t)length)) { if (i+1 >= objc-2) { goto str_cmp_args; } i++; - if (TclGetIntFromObj(interp, objv[i], &reqlength) != TCL_OK) { + if (TclGetIntFromObj(interp, objv[i], reqlength) != TCL_OK) { return TCL_ERROR; } } else { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "bad option \"%s\": must be -nocase or -length", - string2)); + string)); Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "INDEX", "option", - string2, NULL); + string, NULL); return TCL_ERROR; } } - - /* - * From now on, we only access the two objects at the end of the argument - * array. - */ - - objv += objc-2; - - if ((reqlength == 0) || (objv[0] == objv[1])) { - /* - * Always match at 0 chars of if it is the same obj. - */ - - Tcl_SetObjResult(interp, Tcl_NewBooleanObj(0)); - return TCL_OK; - } - - if (!nocase && TclIsPureByteArray(objv[0]) && - TclIsPureByteArray(objv[1])) { - /* - * Use binary versions of comparisons since that won't cause undue - * type conversions and it is much faster. Only do this if we're - * case-sensitive (which is all that really makes sense with byte - * arrays anyway, and we have no memcasecmp() for some reason... :^) - */ - - string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1); - string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2); - strCmpFn = (strCmpFn_t) memcmp; - } else if ((objv[0]->typePtr == &tclStringType) - && (objv[1]->typePtr == &tclStringType)) { - /* - * Do a unicode-specific comparison if both of the args are of String - * type. In benchmark testing this proved the most efficient check - * between the unicode and string comparison operations. - */ - - string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1); - string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2); - strCmpFn = (strCmpFn_t) - (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); - } else { - /* - * As a catch-all we will work with UTF-8. We cannot use memcmp() as - * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's - * utf rep). We can use the more efficient TclpUtfNcmp2 if we are - * case-sensitive and no specific length was requested. - */ - - string1 = (char *) TclGetStringFromObj(objv[0], &length1); - string2 = (char *) TclGetStringFromObj(objv[1], &length2); - if ((reqlength < 0) && !nocase) { - strCmpFn = (strCmpFn_t) TclpUtfNcmp2; - } else { - length1 = Tcl_NumUtfChars(string1, length1); - length2 = Tcl_NumUtfChars(string2, length2); - strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); - } - } - - length = (length1 < length2) ? length1 : length2; - if (reqlength > 0 && reqlength < length) { - length = reqlength; - } else if (reqlength < 0) { - /* - * The requested length is negative, so we ignore it by setting it to - * length + 1 so we correct the match var. - */ - - reqlength = length + 1; - } - - match = strCmpFn(string1, string2, (unsigned) length); - if ((match == 0) && (reqlength > length)) { - match = length1 - length2; - } - - Tcl_SetObjResult(interp, - Tcl_NewIntObj((match > 0) ? 1 : (match < 0) ? -1 : 0)); return TCL_OK; } @@ -2717,7 +2702,6 @@ StringCatCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - int code; Tcl_Obj *objResultPtr; if (objc < 2) { @@ -2727,23 +2711,15 @@ StringCatCmd( */ return TCL_OK; } - if (objc == 2) { - /* - * Other trivial case, single arg, just return it. - */ - Tcl_SetObjResult(interp, objv[1]); - return TCL_OK; - } - code = TclStringCatObjv(interp, /* inPlace */ 1, objc-1, objv+1, - &objResultPtr); + objResultPtr = TclStringCat(interp, objc-1, objv+1, TCL_STRING_IN_PLACE); - if (code == TCL_OK) { + if (objResultPtr) { Tcl_SetObjResult(interp, objResultPtr); return TCL_OK; } - return code; + return TCL_ERROR; } /* @@ -2764,7 +2740,6 @@ StringCatCmd( * *---------------------------------------------------------------------- */ - static int StringBytesCmd( ClientData dummy, /* Not used. */ @@ -3112,8 +3087,7 @@ StringTrimCmd( } string1 = TclGetStringFromObj(objv[1], &length1); - triml = TclTrimLeft(string1, length1, string2, length2); - trimr = TclTrimRight(string1 + triml, length1 - triml, string2, length2); + triml = TclTrim(string1, length1, string2, length2, &trimr); Tcl_SetObjResult(interp, Tcl_NewStringObj(string1 + triml, length1 - triml - trimr)); @@ -3418,7 +3392,7 @@ TclNRSwitchObjCmd( OPT_LAST }; typedef int (*strCmpFn_t)(const char *, const char *); - strCmpFn_t strCmpFn = strcmp; + strCmpFn_t strCmpFn = TclUtfCmp; mode = OPT_EXACT; foundmode = 0; @@ -4181,7 +4155,7 @@ TclNRTryObjCmd( } info[0] = objv[i]; /* type */ - TclNewLongObj(info[1], code); /* returnCode */ + TclNewIntObj(info[1], code); /* returnCode */ if (info[2] == NULL) { /* errorCodePrefix */ TclNewObj(info[2]); } |