From eb8c96f7b284ef2edd736fb55e5ff1ea399c631a Mon Sep 17 00:00:00 2001 From: hobbs Date: Fri, 26 May 2000 08:51:11 +0000 Subject: * generic/tclCmdMZ.c (Tcl_RegsubObjCmd): reworked to operate in Unicode, tweaked for performance. (Tcl_StringObjCmd) changed STR_FIRST/STR_LAST error message to something more understandable, reworked STR_FIRST, STR_LAST, STR_MAP, STR_MATCH, STR_RANGE, STR_REPLACE to operate in Unicode. Removed inneffectual STR_RANGE "special" ByteArray support. Optimized STR_MAP algorithm, especially optimized for one-pair case. Fixed possible mem overrun in STR_INDEX bytearray case. --- generic/tclCmdMZ.c | 535 +++++++++++++++++++++++++---------------------------- 1 file changed, 253 insertions(+), 282 deletions(-) diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index cbb2f83..cc17067 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -8,12 +8,12 @@ * * Copyright (c) 1987-1993 The Regents of the University of California. * Copyright (c) 1994-1997 Sun Microsystems, Inc. - * Copyright (c) 1998-1999 by Scriptics Corporation. + * Copyright (c) 1998-2000 Scriptics Corporation. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.26 2000/04/10 21:08:26 ericm Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.27 2000/05/26 08:51:11 hobbs Exp $ */ #include "tclInt.h" @@ -61,7 +61,7 @@ typedef struct { static char * TraceVarProc _ANSI_ARGS_((ClientData clientData, Tcl_Interp *interp, char *name1, char *name2, int flags)); - + /* *---------------------------------------------------------------------- * @@ -444,11 +444,12 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) int objc; /* Number of arguments. */ Tcl_Obj *CONST objv[]; /* Argument objects. */ { - int i, result, cflags, all, wlen, numMatches, offset; + int idx, result, cflags, all, wlen, wsublen, numMatches, offset; + int start, end, subStart, subEnd, match; Tcl_RegExp regExpr; + Tcl_RegExpInfo info; Tcl_Obj *resultPtr, *varPtr, *objPtr; - Tcl_UniChar *wstring; - char *subspec; + Tcl_UniChar ch, *wsrc, *wfirstChar, *wstring, *wsubspec; static char *options[] = { "-all", "-nocase", "-expanded", @@ -465,16 +466,16 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) all = 0; offset = 0; - for (i = 1; i < objc; i++) { + for (idx = 1; idx < objc; idx++) { char *name; int index; - name = Tcl_GetString(objv[i]); + name = Tcl_GetString(objv[idx]); if (name[0] != '-') { break; } - if (Tcl_GetIndexFromObj(interp, objv[i], options, "switch", TCL_EXACT, - &index) != TCL_OK) { + if (Tcl_GetIndexFromObj(interp, objv[idx], options, "switch", + TCL_EXACT, &index) != TCL_OK) { return TCL_ERROR; } switch ((enum options) index) { @@ -503,10 +504,10 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) break; } case REGSUB_START: { - if (++i >= objc) { + if (++idx >= objc) { goto endOfForLoop; } - if (Tcl_GetIntFromObj(interp, objv[i], &offset) != TCL_OK) { + if (Tcl_GetIntFromObj(interp, objv[idx], &offset) != TCL_OK) { return TCL_ERROR; } if (offset < 0) { @@ -515,35 +516,36 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) break; } case REGSUB_LAST: { - i++; + idx++; goto endOfForLoop; } } } endOfForLoop: - if (objc - i != 4) { + if (objc - idx != 4) { Tcl_WrongNumArgs(interp, 1, objv, "?switches? exp string subSpec varName"); return TCL_ERROR; } - objv += i; + objv += idx; regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags); if (regExpr == NULL) { return TCL_ERROR; } + objPtr = objv[1]; + wstring = Tcl_GetUnicode(objPtr); + wlen = Tcl_GetCharLength(objPtr); + wsubspec = Tcl_GetUnicode(objv[2]); + wsublen = Tcl_GetCharLength(objv[2]); + varPtr = objv[3]; + result = TCL_OK; - resultPtr = Tcl_NewObj(); + resultPtr = Tcl_NewUnicodeObj(wstring, 0); Tcl_IncrRefCount(resultPtr); - objPtr = objv[1]; - wlen = Tcl_GetCharLength(objPtr); - wstring = Tcl_GetUnicode(objPtr); - subspec = Tcl_GetString(objv[2]); - varPtr = objv[3]; - /* * The following loop is to handle multiple matches within the * same source string; each iteration handles one match and its @@ -553,10 +555,6 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) numMatches = 0; for ( ; offset < wlen; ) { - int start, end, subStart, subEnd, match; - char *src, *firstChar; - char c; - Tcl_RegExpInfo info; /* * The flags argument is set if string is part of a larger string, @@ -598,22 +596,21 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) * subSpec to reduce the number of calls to Tcl_SetVar. */ - src = subspec; - firstChar = subspec; - for (c = *src; c != '\0'; src++, c = *src) { - int index; - - if (c == '&') { - index = 0; - } else if (c == '\\') { - c = src[1]; - if ((c >= '0') && (c <= '9')) { - index = c - '0'; - } else if ((c == '\\') || (c == '&')) { - Tcl_AppendToObj(resultPtr, firstChar, src - firstChar); - Tcl_AppendToObj(resultPtr, &c, 1); - firstChar = src + 2; - src++; + wsrc = wfirstChar = wsubspec; + for (ch = *wsrc; ch != '\0'; wsrc++, ch = *wsrc) { + if (ch == '&') { + idx = 0; + } else if (ch == '\\') { + ch = wsrc[1]; + if ((ch >= '0') && (ch <= '9')) { + idx = ch - '0'; + } else if ((ch == '\\') || (ch == '&')) { + *wsrc = ch; + Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, + wsrc - wfirstChar + 1); + *wsrc = '\\'; + wfirstChar = wsrc + 2; + wsrc++; continue; } else { continue; @@ -621,24 +618,25 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) } else { continue; } - if (firstChar != src) { - Tcl_AppendToObj(resultPtr, firstChar, src - firstChar); + if (wfirstChar != wsrc) { + Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, + wsrc - wfirstChar); } - if (index <= info.nsubs) { - subStart = info.matches[index].start; - subEnd = info.matches[index].end; + if (idx <= info.nsubs) { + subStart = info.matches[idx].start; + subEnd = info.matches[idx].end; if ((subStart >= 0) && (subEnd >= 0)) { Tcl_AppendUnicodeToObj(resultPtr, wstring + offset + subStart, subEnd - subStart); } } - if (*src == '\\') { - src++; + if (*wsrc == '\\') { + wsrc++; } - firstChar = src + 1; + wfirstChar = wsrc + 1; } - if (firstChar != src) { - Tcl_AppendToObj(resultPtr, firstChar, src - firstChar); + if (wfirstChar != wsrc) { + Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar); } if (end == 0) { /* @@ -648,8 +646,9 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); offset++; + } else { + offset += end; } - offset += end; if (!all) { break; } @@ -675,8 +674,8 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) result = TCL_ERROR; } else { /* - * Set the interpreter's object result to an integer object holding the - * number of matches. + * Set the interpreter's object result to an integer object + * holding the number of matches. */ Tcl_SetIntObj(Tcl_GetObjResult(interp), numMatches); @@ -1021,10 +1020,10 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) for (i = 2; i < objc-2; i++) { string2 = Tcl_GetStringFromObj(objv[i], &length2); if ((length2 > 1) - && strncmp(string2, "-nocase", (size_t) length2) == 0) { + && strncmp(string2, "-nocase", (size_t)length2) == 0) { nocase = 1; } else if ((length2 > 1) - && strncmp(string2, "-length", (size_t) length2) == 0) { + && strncmp(string2, "-length", (size_t)length2) == 0) { if (i+1 >= objc-2) { goto str_cmp_args; } @@ -1103,91 +1102,75 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) break; } case STR_FIRST: { - register char *p, *end; - int match, utflen, start; + Tcl_UniChar *ustring1, *ustring2; + int match, start; if (objc < 4 || objc > 5) { Tcl_WrongNumArgs(interp, 2, objv, - "string1 string2 ?startIndex?"); + "subString string ?startIndex?"); return TCL_ERROR; } /* - * This algorithm fails on improperly formed UTF strings. * We are searching string2 for the sequence string1. */ match = -1; start = 0; - utflen = -1; - string1 = Tcl_GetStringFromObj(objv[2], &length1); - string2 = Tcl_GetStringFromObj(objv[3], &length2); + length2 = -1; + + ustring1 = Tcl_GetUnicode(objv[2]); + length1 = Tcl_GetCharLength(objv[2]); + ustring2 = Tcl_GetUnicode(objv[3]); + length2 = Tcl_GetCharLength(objv[3]); if (objc == 5) { /* - * If a startIndex is specified, we will need to fast forward - * to that point in the string before we think about a match + * If a startIndex is specified, we will need to fast + * forward to that point in the string before we think + * about a match */ - utflen = Tcl_NumUtfChars(string2, length2); - if (TclGetIntForIndex(interp, objv[4], utflen-1, - &start) != TCL_OK) { + if (TclGetIntForIndex(interp, objv[4], length2 - 1, + &start) != TCL_OK) { return TCL_ERROR; } - if (start >= utflen) { + if (start >= length2) { goto str_first_done; } else if (start > 0) { - if (length2 == utflen) { - /* no unicode chars */ - string2 += start; - length2 -= start; - } else { - char *s = Tcl_UtfAtIndex(string2, start); - length2 -= s - string2; - string2 = s; - } + ustring2 += start; + length2 -= start; } } if (length1 > 0) { - end = string2 + length2 - length1 + 1; - for (p = string2; p < end; p++) { + register Tcl_UniChar *p, *end; + + end = ustring2 + length2 - length1 + 1; + for (p = ustring2; p < end; p++) { /* * Scan forward to find the first character. */ - - p = memchr(p, *string1, (unsigned) (end - p)); - if (p == NULL) { - break; - } - if (memcmp(string1, p, (unsigned) length1) == 0) { - match = p - string2; + if ((*p == *ustring1) && + (Tcl_UniCharNcmp(ustring1, p, + (unsigned long) length1) == 0)) { + match = p - ustring2; break; } } } - /* * Compute the character index of the matching string by * counting the number of characters before the match. */ - str_first_done: - if (match != -1) { - if (objc == 4) { - match = Tcl_NumUtfChars(string2, match); - } else if (length2 == utflen) { - /* no unicode chars */ - match += start; - } else { - match = start + Tcl_NumUtfChars(string2, match); - } + if ((match != -1) && (objc == 5)) { + match += start; } + + str_first_done: Tcl_SetIntObj(resultPtr, match); break; } case STR_INDEX: { - char buf[TCL_UTF_MAX]; - Tcl_UniChar unichar; - if (objc != 4) { Tcl_WrongNumArgs(interp, 2, objv, "string charIndex"); return TCL_ERROR; @@ -1201,33 +1184,33 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) */ if (objv[2]->typePtr == &tclByteArrayType) { - - string1 = (char *)Tcl_GetByteArrayFromObj(objv[2], &length1); + string1 = (char *) Tcl_GetByteArrayFromObj(objv[2], &length1); if (TclGetIntForIndex(interp, objv[3], length1 - 1, &index) != TCL_OK) { return TCL_ERROR; } - Tcl_SetByteArrayObj(resultPtr, - (unsigned char *)(&string1[index]), 1); + if ((index >= 0) && (index < length1)) { + Tcl_SetByteArrayObj(resultPtr, + (unsigned char *)(&string1[index]), 1); + } } else { - string1 = Tcl_GetStringFromObj(objv[2], &length1); - /* - * convert to Unicode internal rep to calulate what - * 'end' really means. + * Get Unicode char length to calulate what 'end' means. */ + length1 = Tcl_GetCharLength(objv[2]); - length2 = Tcl_GetCharLength(objv[2]); - - if (TclGetIntForIndex(interp, objv[3], length2 - 1, + if (TclGetIntForIndex(interp, objv[3], length1 - 1, &index) != TCL_OK) { return TCL_ERROR; } - if ((index >= 0) && (index < length2)) { - unichar = Tcl_GetUniChar(objv[2], index); - length2 = Tcl_UniCharToUtf((int)unichar, buf); - Tcl_SetStringObj(resultPtr, buf, length2); + if ((index >= 0) && (index < length1)) { + char buf[TCL_UTF_MAX]; + Tcl_UniChar ch; + + ch = Tcl_GetUniChar(objv[2], index); + length1 = Tcl_UniCharToUtf(ch, buf); + Tcl_SetStringObj(resultPtr, buf, length1); } } break; @@ -1275,7 +1258,8 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) strncmp(string2, "-strict", (size_t) length2) == 0) { strict = 1; } else if ((length2 > 1) && - strncmp(string2, "-failindex", (size_t) length2) == 0) { + strncmp(string2, "-failindex", + (size_t) length2) == 0) { if (i+1 >= objc-1) { Tcl_WrongNumArgs(interp, 3, objv, "?-strict? ?-failindex var? str"); @@ -1508,78 +1492,63 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) break; } case STR_LAST: { - register char *p; - int match, utflen, start; + Tcl_UniChar *ustring1, *ustring2, *p; + int match, start; if (objc < 4 || objc > 5) { Tcl_WrongNumArgs(interp, 2, objv, - "string1 string2 ?startIndex?"); + "subString string ?startIndex?"); return TCL_ERROR; } /* - * This algorithm fails on improperly formed UTF strings. + * We are searching string2 for the sequence string1. */ match = -1; start = 0; - utflen = -1; - string1 = Tcl_GetStringFromObj(objv[2], &length1); - string2 = Tcl_GetStringFromObj(objv[3], &length2); + length2 = -1; + + ustring1 = Tcl_GetUnicode(objv[2]); + length1 = Tcl_GetCharLength(objv[2]); + ustring2 = Tcl_GetUnicode(objv[3]); + length2 = Tcl_GetCharLength(objv[3]); if (objc == 5) { /* * If a startIndex is specified, we will need to restrict * the string range to that char index in the string */ - utflen = Tcl_NumUtfChars(string2, length2); - if (TclGetIntForIndex(interp, objv[4], utflen-1, - &start) != TCL_OK) { + if (TclGetIntForIndex(interp, objv[4], length2 - 1, + &start) != TCL_OK) { return TCL_ERROR; } if (start < 0) { goto str_last_done; - } else if (start < utflen) { - if (length2 == utflen) { - /* no unicode chars */ - p = string2 + start + 1 - length1; - } else { - p = Tcl_UtfAtIndex(string2, start+1) - length1; - } + } else if (start < length2) { + p = ustring2 + start + 1 - length1; } else { - p = string2 + length2 - length1; + p = ustring2 + length2 - length1; } } else { - p = string2 + length2 - length1; + p = ustring2 + length2 - length1; } if (length1 > 0) { - for (; p >= string2; p--) { + for (; p >= ustring2; p--) { /* * Scan backwards to find the first character. */ - - while ((p != string2) && (*p != *string1)) { - p--; - } - if (memcmp(string1, p, (unsigned) length1) == 0) { - match = p - string2; + if ((*p == *ustring1) && + (memcmp((char *) ustring1, (char *) p, (size_t) + (length1 * sizeof(Tcl_UniChar))) == 0)) { + match = p - ustring2; break; } } } - /* - * Compute the character index of the matching string by counting - * the number of characters before the match. - */ - str_last_done: - if (match != -1) { - if ((objc == 4) || (length2 != utflen)) { - /* only check when we've got unicode chars */ - match = Tcl_NumUtfChars(string2, match); - } - } + str_last_done: Tcl_SetIntObj(resultPtr, match); break; } @@ -1592,7 +1561,6 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) if ((enum options) index == STR_BYTELENGTH) { (void) Tcl_GetStringFromObj(objv[2], &length1); - Tcl_SetIntObj(resultPtr, length1); } else { /* * If we have a ByteArray object, avoid recomputing the @@ -1603,20 +1571,19 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) if (objv[2]->typePtr == &tclByteArrayType) { (void) Tcl_GetByteArrayFromObj(objv[2], &length1); - Tcl_SetIntObj(resultPtr, length1); } else { - Tcl_SetIntObj(resultPtr, - Tcl_GetCharLength(objv[2])); + length1 = Tcl_GetCharLength(objv[2]); } } + Tcl_SetIntObj(resultPtr, length1); break; } case STR_MAP: { - int uselen, mapElemc, len, nocase = 0; + int mapElemc, nocase = 0; Tcl_Obj **mapElemv; - char *end; - Tcl_UniChar ch; - int (*str_comp_fn)(); + Tcl_UniChar *ustring1, *ustring2, *p, *end; + int (*strCmpFn)(CONST Tcl_UniChar*, CONST Tcl_UniChar*, + unsigned long); if (objc < 4 || objc > 5) { Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? charMap string"); @@ -1652,63 +1619,111 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) Tcl_SetStringObj(resultPtr, "char map list unbalanced", -1); return TCL_ERROR; } - string1 = Tcl_GetStringFromObj(objv[objc-1], &length1); + objc--; + + ustring1 = Tcl_GetUnicode(objv[objc]); + length1 = Tcl_GetCharLength(objv[objc]); if (length1 == 0) { + /* + * Empty input string, just stop now + */ break; } - end = string1 + length1; + end = ustring1 + length1; - if (nocase) { - length1 = Tcl_NumUtfChars(string1, length1); - str_comp_fn = Tcl_UtfNcasecmp; - } else { - str_comp_fn = memcmp; - } + strCmpFn = (nocase) ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp; - for ( ; string1 < end; string1 += len) { - len = Tcl_UtfToUniChar(string1, &ch); - for (index = 0; index < mapElemc; index +=2) { - /* - * Get the key string to match on - */ - string2 = Tcl_GetStringFromObj(mapElemv[index], - &length2); - if (nocase) { - uselen = Tcl_NumUtfChars(string2, length2); - } else { - uselen = length2; + /* + * Force result to be Unicode + */ + Tcl_SetUnicodeObj(resultPtr, ustring1, 0); + + if (mapElemc == 2) { + /* + * Special case for one map pair which avoids the extra + * for loop and extra calls to get Unicode data. The + * algorithm is otherwise identical to the multi-pair case. + * This will be >30% faster on larger strings. + */ + Tcl_UniChar *mapString = Tcl_GetUnicode(mapElemv[1]); + int mapLen = Tcl_GetCharLength(mapElemv[1]); + ustring2 = Tcl_GetUnicode(mapElemv[0]); + length2 = Tcl_GetCharLength(mapElemv[0]); + for (p = ustring1; ustring1 < end; ustring1++) { + if ((length2 > 0) && + (nocase || (*ustring1 == *ustring2)) && + (strCmpFn(ustring1, ustring2, + (unsigned long) length2) == 0)) { + if (p != ustring1) { + Tcl_AppendUnicodeToObj(resultPtr, p, + ustring1 - p); + p = ustring1 + length2; + } else { + p += length2; + } + ustring1 = p - 1; + + Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen); } - if ((uselen > 0) && (uselen <= length1) && - (str_comp_fn(string2, string1, uselen) == 0)) { - /* - * Adjust len to be full length of matched string - * it has to be the BYTE length - */ - len = length2; + } + } else { + Tcl_UniChar **mapStrings = + (Tcl_UniChar **) ckalloc((mapElemc * 2) + * sizeof(Tcl_UniChar *)); + int *mapLens = + (int *) ckalloc((mapElemc * 2) * sizeof(int)); + /* + * Precompute pointers to the unicode string and length. + * This saves us repeated function calls later, + * significantly speeding up the algorithm. + */ + for (index = 0; index < mapElemc; index++) { + mapStrings[index] = Tcl_GetUnicode(mapElemv[index]); + mapLens[index] = Tcl_GetCharLength(mapElemv[index]); + } + for (p = ustring1; ustring1 < end; ustring1++) { + for (index = 0; index < mapElemc; index += 2) { /* - * Change string2 and length2 to the map value + * Get the key string to match on */ - string2 = Tcl_GetStringFromObj(mapElemv[index+1], - &length2); - Tcl_AppendToObj(resultPtr, string2, length2); - break; + ustring2 = mapStrings[index]; + length2 = mapLens[index]; + if ((length2 > 0) && + (nocase || (*ustring1 == *ustring2)) && + (strCmpFn(ustring2, ustring1, + (unsigned long) length2) == 0)) { + if (p != ustring1) { + /* + * Put the skipped chars onto the result first + */ + Tcl_AppendUnicodeToObj(resultPtr, p, + ustring1 - p); + p = ustring1 + length2; + } else { + p += length2; + } + /* + * Adjust len to be full length of matched string + */ + ustring1 = p - 1; + + /* + * Append the map value to the unicode string + */ + Tcl_AppendUnicodeToObj(resultPtr, + mapStrings[index+1], mapLens[index+1]); + break; + } } } - if (index == mapElemc) { - /* - * No match was found, put the char onto result - */ - Tcl_AppendToObj(resultPtr, string1, len); - } + ckfree((char *) mapStrings); + ckfree((char *) mapLens); + } + if (p != ustring1) { /* - * in nocase, length1 is in chars - * otherwise it is in bytes + * Put the rest of the unmapped chars onto result */ - if (nocase) { - length1--; - } else { - length1 -= len; - } + Tcl_AppendUnicodeToObj(resultPtr, p, ustring1 - p); } break; } @@ -1734,9 +1749,8 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } Tcl_SetBooleanObj(resultPtr, - Tcl_StringCaseMatch(Tcl_GetString(objv[objc-1]), - Tcl_GetString(objv[objc-2]), - nocase)); + Tcl_UniCharCaseMatch(Tcl_GetUnicode(objv[objc-1]), + Tcl_GetUnicode(objv[objc-2]), nocase)); break; } case STR_RANGE: { @@ -1748,64 +1762,24 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } /* - * If we have a ByteArray object, avoid indexing in the - * Utf string since the byte array contains one byte per - * character. Otherwise, use the Unicode string rep to - * get the range. + * Get the length in actual characters. */ + length1 = Tcl_GetCharLength(objv[2]) - 1; - if (objv[2]->typePtr == &tclByteArrayType) { - - string1 = (char *)Tcl_GetByteArrayFromObj(objv[2], &length1); - - if (TclGetIntForIndex(interp, objv[3], length1 - 1, - &first) != TCL_OK) { - return TCL_ERROR; - } - if (TclGetIntForIndex(interp, objv[4], length1 - 1, - &last) != TCL_OK) { - return TCL_ERROR; - } - if (first < 0) { - first = 0; - } - if (last >= length1 - 1) { - last = length1 - 1; - } - if (last >= first) { - int numBytes = last - first + 1; - resultPtr = Tcl_NewByteArrayObj( - (unsigned char *) &string1[first], numBytes); - Tcl_SetObjResult(interp, resultPtr); - } - } else { - string1 = Tcl_GetStringFromObj(objv[2], &length1); - - /* - * Convert to Unicode internal rep to calulate length and - * create a result object. - */ + if ((TclGetIntForIndex(interp, objv[3], length1, &first) != TCL_OK) + || (TclGetIntForIndex(interp, objv[4], length1, + &last) != TCL_OK)) { + return TCL_ERROR; + } - length2 = Tcl_GetCharLength(objv[2]) - 1; - - if (TclGetIntForIndex(interp, objv[3], length2, - &first) != TCL_OK) { - return TCL_ERROR; - } - if (TclGetIntForIndex(interp, objv[4], length2, - &last) != TCL_OK) { - return TCL_ERROR; - } - if (first < 0) { - first = 0; - } - if (last >= length2) { - last = length2; - } - if (last >= first) { - resultPtr = Tcl_GetRange(objv[2], first, last); - Tcl_SetObjResult(interp, resultPtr); - } + if (first < 0) { + first = 0; + } + if (last >= length1) { + last = length1; + } + if (last >= first) { + Tcl_SetObjResult(interp, Tcl_GetRange(objv[2], first, last)); } break; } @@ -1830,6 +1804,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) break; } case STR_REPLACE: { + Tcl_UniChar *ustring1; int first, last; if (objc < 5 || objc > 6) { @@ -1838,33 +1813,29 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) return TCL_ERROR; } - string1 = Tcl_GetStringFromObj(objv[2], &length1); - length1 = Tcl_NumUtfChars(string1, length1) - 1; - if (TclGetIntForIndex(interp, objv[3], length1, - &first) != TCL_OK) { - return TCL_ERROR; - } - if (TclGetIntForIndex(interp, objv[4], length1, - &last) != TCL_OK) { + ustring1 = Tcl_GetUnicode(objv[2]); + length1 = Tcl_GetCharLength(objv[2]) - 1; + + if ((TclGetIntForIndex(interp, objv[3], length1, &first) != TCL_OK) + || (TclGetIntForIndex(interp, objv[4], length1, + &last) != TCL_OK)) { return TCL_ERROR; } - if ((last < first) || (first > length1) || (last < 0)) { + + if ((last < first) || (last < 0) || (first > length1)) { Tcl_SetObjResult(interp, objv[2]); } else { - char *start, *end; - if (first < 0) { first = 0; } - start = Tcl_UtfAtIndex(string1, first); - end = Tcl_UtfAtIndex(start, ((last > length1) ? length1 : last) - - first + 1); - Tcl_SetStringObj(resultPtr, string1, start - string1); + + Tcl_SetUnicodeObj(resultPtr, ustring1, first); if (objc == 6) { Tcl_AppendObjToObj(resultPtr, objv[5]); } if (last < length1) { - Tcl_AppendToObj(resultPtr, end, -1); + Tcl_AppendUnicodeToObj(resultPtr, ustring1 + last + 1, + length1 - last); } } break; -- cgit v0.12