summaryrefslogtreecommitdiffstats
path: root/generic/tclCmdMZ.c
diff options
context:
space:
mode:
authorhobbs <hobbs>2000-05-26 08:51:11 (GMT)
committerhobbs <hobbs>2000-05-26 08:51:11 (GMT)
commiteb8c96f7b284ef2edd736fb55e5ff1ea399c631a (patch)
tree58b6034b2d4a816d2f5b30780b07796d2d8800dc /generic/tclCmdMZ.c
parentbce186e5e3868fcc8c95aa589ff668f6fb767758 (diff)
downloadtcl-eb8c96f7b284ef2edd736fb55e5ff1ea399c631a.zip
tcl-eb8c96f7b284ef2edd736fb55e5ff1ea399c631a.tar.gz
tcl-eb8c96f7b284ef2edd736fb55e5ff1ea399c631a.tar.bz2
* generic/tclCmdMZ.c (Tcl_RegsubObjCmd): reworked to operate in
Unicode, tweaked for performance. (Tcl_StringObjCmd) changed STR_FIRST/STR_LAST error message to something more understandable, reworked STR_FIRST, STR_LAST, STR_MAP, STR_MATCH, STR_RANGE, STR_REPLACE to operate in Unicode. Removed inneffectual STR_RANGE "special" ByteArray support. Optimized STR_MAP algorithm, especially optimized for one-pair case. Fixed possible mem overrun in STR_INDEX bytearray case.
Diffstat (limited to 'generic/tclCmdMZ.c')
-rw-r--r--generic/tclCmdMZ.c535
1 files changed, 253 insertions, 282 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index cbb2f83..cc17067 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -8,12 +8,12 @@
*
* Copyright (c) 1987-1993 The Regents of the University of California.
* Copyright (c) 1994-1997 Sun Microsystems, Inc.
- * Copyright (c) 1998-1999 by Scriptics Corporation.
+ * Copyright (c) 1998-2000 Scriptics Corporation.
*
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.26 2000/04/10 21:08:26 ericm Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.27 2000/05/26 08:51:11 hobbs Exp $
*/
#include "tclInt.h"
@@ -61,7 +61,7 @@ typedef struct {
static char * TraceVarProc _ANSI_ARGS_((ClientData clientData,
Tcl_Interp *interp, char *name1, char *name2,
int flags));
-
+
/*
*----------------------------------------------------------------------
*
@@ -444,11 +444,12 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
int objc; /* Number of arguments. */
Tcl_Obj *CONST objv[]; /* Argument objects. */
{
- int i, result, cflags, all, wlen, numMatches, offset;
+ int idx, result, cflags, all, wlen, wsublen, numMatches, offset;
+ int start, end, subStart, subEnd, match;
Tcl_RegExp regExpr;
+ Tcl_RegExpInfo info;
Tcl_Obj *resultPtr, *varPtr, *objPtr;
- Tcl_UniChar *wstring;
- char *subspec;
+ Tcl_UniChar ch, *wsrc, *wfirstChar, *wstring, *wsubspec;
static char *options[] = {
"-all", "-nocase", "-expanded",
@@ -465,16 +466,16 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
all = 0;
offset = 0;
- for (i = 1; i < objc; i++) {
+ for (idx = 1; idx < objc; idx++) {
char *name;
int index;
- name = Tcl_GetString(objv[i]);
+ name = Tcl_GetString(objv[idx]);
if (name[0] != '-') {
break;
}
- if (Tcl_GetIndexFromObj(interp, objv[i], options, "switch", TCL_EXACT,
- &index) != TCL_OK) {
+ if (Tcl_GetIndexFromObj(interp, objv[idx], options, "switch",
+ TCL_EXACT, &index) != TCL_OK) {
return TCL_ERROR;
}
switch ((enum options) index) {
@@ -503,10 +504,10 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
break;
}
case REGSUB_START: {
- if (++i >= objc) {
+ if (++idx >= objc) {
goto endOfForLoop;
}
- if (Tcl_GetIntFromObj(interp, objv[i], &offset) != TCL_OK) {
+ if (Tcl_GetIntFromObj(interp, objv[idx], &offset) != TCL_OK) {
return TCL_ERROR;
}
if (offset < 0) {
@@ -515,35 +516,36 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
break;
}
case REGSUB_LAST: {
- i++;
+ idx++;
goto endOfForLoop;
}
}
}
endOfForLoop:
- if (objc - i != 4) {
+ if (objc - idx != 4) {
Tcl_WrongNumArgs(interp, 1, objv,
"?switches? exp string subSpec varName");
return TCL_ERROR;
}
- objv += i;
+ objv += idx;
regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags);
if (regExpr == NULL) {
return TCL_ERROR;
}
+ objPtr = objv[1];
+ wstring = Tcl_GetUnicode(objPtr);
+ wlen = Tcl_GetCharLength(objPtr);
+ wsubspec = Tcl_GetUnicode(objv[2]);
+ wsublen = Tcl_GetCharLength(objv[2]);
+ varPtr = objv[3];
+
result = TCL_OK;
- resultPtr = Tcl_NewObj();
+ resultPtr = Tcl_NewUnicodeObj(wstring, 0);
Tcl_IncrRefCount(resultPtr);
- objPtr = objv[1];
- wlen = Tcl_GetCharLength(objPtr);
- wstring = Tcl_GetUnicode(objPtr);
- subspec = Tcl_GetString(objv[2]);
- varPtr = objv[3];
-
/*
* The following loop is to handle multiple matches within the
* same source string; each iteration handles one match and its
@@ -553,10 +555,6 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
numMatches = 0;
for ( ; offset < wlen; ) {
- int start, end, subStart, subEnd, match;
- char *src, *firstChar;
- char c;
- Tcl_RegExpInfo info;
/*
* The flags argument is set if string is part of a larger string,
@@ -598,22 +596,21 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
* subSpec to reduce the number of calls to Tcl_SetVar.
*/
- src = subspec;
- firstChar = subspec;
- for (c = *src; c != '\0'; src++, c = *src) {
- int index;
-
- if (c == '&') {
- index = 0;
- } else if (c == '\\') {
- c = src[1];
- if ((c >= '0') && (c <= '9')) {
- index = c - '0';
- } else if ((c == '\\') || (c == '&')) {
- Tcl_AppendToObj(resultPtr, firstChar, src - firstChar);
- Tcl_AppendToObj(resultPtr, &c, 1);
- firstChar = src + 2;
- src++;
+ wsrc = wfirstChar = wsubspec;
+ for (ch = *wsrc; ch != '\0'; wsrc++, ch = *wsrc) {
+ if (ch == '&') {
+ idx = 0;
+ } else if (ch == '\\') {
+ ch = wsrc[1];
+ if ((ch >= '0') && (ch <= '9')) {
+ idx = ch - '0';
+ } else if ((ch == '\\') || (ch == '&')) {
+ *wsrc = ch;
+ Tcl_AppendUnicodeToObj(resultPtr, wfirstChar,
+ wsrc - wfirstChar + 1);
+ *wsrc = '\\';
+ wfirstChar = wsrc + 2;
+ wsrc++;
continue;
} else {
continue;
@@ -621,24 +618,25 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
} else {
continue;
}
- if (firstChar != src) {
- Tcl_AppendToObj(resultPtr, firstChar, src - firstChar);
+ if (wfirstChar != wsrc) {
+ Tcl_AppendUnicodeToObj(resultPtr, wfirstChar,
+ wsrc - wfirstChar);
}
- if (index <= info.nsubs) {
- subStart = info.matches[index].start;
- subEnd = info.matches[index].end;
+ if (idx <= info.nsubs) {
+ subStart = info.matches[idx].start;
+ subEnd = info.matches[idx].end;
if ((subStart >= 0) && (subEnd >= 0)) {
Tcl_AppendUnicodeToObj(resultPtr,
wstring + offset + subStart, subEnd - subStart);
}
}
- if (*src == '\\') {
- src++;
+ if (*wsrc == '\\') {
+ wsrc++;
}
- firstChar = src + 1;
+ wfirstChar = wsrc + 1;
}
- if (firstChar != src) {
- Tcl_AppendToObj(resultPtr, firstChar, src - firstChar);
+ if (wfirstChar != wsrc) {
+ Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar);
}
if (end == 0) {
/*
@@ -648,8 +646,9 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1);
offset++;
+ } else {
+ offset += end;
}
- offset += end;
if (!all) {
break;
}
@@ -675,8 +674,8 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
result = TCL_ERROR;
} else {
/*
- * Set the interpreter's object result to an integer object holding the
- * number of matches.
+ * Set the interpreter's object result to an integer object
+ * holding the number of matches.
*/
Tcl_SetIntObj(Tcl_GetObjResult(interp), numMatches);
@@ -1021,10 +1020,10 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
for (i = 2; i < objc-2; i++) {
string2 = Tcl_GetStringFromObj(objv[i], &length2);
if ((length2 > 1)
- && strncmp(string2, "-nocase", (size_t) length2) == 0) {
+ && strncmp(string2, "-nocase", (size_t)length2) == 0) {
nocase = 1;
} else if ((length2 > 1)
- && strncmp(string2, "-length", (size_t) length2) == 0) {
+ && strncmp(string2, "-length", (size_t)length2) == 0) {
if (i+1 >= objc-2) {
goto str_cmp_args;
}
@@ -1103,91 +1102,75 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
break;
}
case STR_FIRST: {
- register char *p, *end;
- int match, utflen, start;
+ Tcl_UniChar *ustring1, *ustring2;
+ int match, start;
if (objc < 4 || objc > 5) {
Tcl_WrongNumArgs(interp, 2, objv,
- "string1 string2 ?startIndex?");
+ "subString string ?startIndex?");
return TCL_ERROR;
}
/*
- * This algorithm fails on improperly formed UTF strings.
* We are searching string2 for the sequence string1.
*/
match = -1;
start = 0;
- utflen = -1;
- string1 = Tcl_GetStringFromObj(objv[2], &length1);
- string2 = Tcl_GetStringFromObj(objv[3], &length2);
+ length2 = -1;
+
+ ustring1 = Tcl_GetUnicode(objv[2]);
+ length1 = Tcl_GetCharLength(objv[2]);
+ ustring2 = Tcl_GetUnicode(objv[3]);
+ length2 = Tcl_GetCharLength(objv[3]);
if (objc == 5) {
/*
- * If a startIndex is specified, we will need to fast forward
- * to that point in the string before we think about a match
+ * If a startIndex is specified, we will need to fast
+ * forward to that point in the string before we think
+ * about a match
*/
- utflen = Tcl_NumUtfChars(string2, length2);
- if (TclGetIntForIndex(interp, objv[4], utflen-1,
- &start) != TCL_OK) {
+ if (TclGetIntForIndex(interp, objv[4], length2 - 1,
+ &start) != TCL_OK) {
return TCL_ERROR;
}
- if (start >= utflen) {
+ if (start >= length2) {
goto str_first_done;
} else if (start > 0) {
- if (length2 == utflen) {
- /* no unicode chars */
- string2 += start;
- length2 -= start;
- } else {
- char *s = Tcl_UtfAtIndex(string2, start);
- length2 -= s - string2;
- string2 = s;
- }
+ ustring2 += start;
+ length2 -= start;
}
}
if (length1 > 0) {
- end = string2 + length2 - length1 + 1;
- for (p = string2; p < end; p++) {
+ register Tcl_UniChar *p, *end;
+
+ end = ustring2 + length2 - length1 + 1;
+ for (p = ustring2; p < end; p++) {
/*
* Scan forward to find the first character.
*/
-
- p = memchr(p, *string1, (unsigned) (end - p));
- if (p == NULL) {
- break;
- }
- if (memcmp(string1, p, (unsigned) length1) == 0) {
- match = p - string2;
+ if ((*p == *ustring1) &&
+ (Tcl_UniCharNcmp(ustring1, p,
+ (unsigned long) length1) == 0)) {
+ match = p - ustring2;
break;
}
}
}
-
/*
* Compute the character index of the matching string by
* counting the number of characters before the match.
*/
- str_first_done:
- if (match != -1) {
- if (objc == 4) {
- match = Tcl_NumUtfChars(string2, match);
- } else if (length2 == utflen) {
- /* no unicode chars */
- match += start;
- } else {
- match = start + Tcl_NumUtfChars(string2, match);
- }
+ if ((match != -1) && (objc == 5)) {
+ match += start;
}
+
+ str_first_done:
Tcl_SetIntObj(resultPtr, match);
break;
}
case STR_INDEX: {
- char buf[TCL_UTF_MAX];
- Tcl_UniChar unichar;
-
if (objc != 4) {
Tcl_WrongNumArgs(interp, 2, objv, "string charIndex");
return TCL_ERROR;
@@ -1201,33 +1184,33 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
*/
if (objv[2]->typePtr == &tclByteArrayType) {
-
- string1 = (char *)Tcl_GetByteArrayFromObj(objv[2], &length1);
+ string1 = (char *) Tcl_GetByteArrayFromObj(objv[2], &length1);
if (TclGetIntForIndex(interp, objv[3], length1 - 1,
&index) != TCL_OK) {
return TCL_ERROR;
}
- Tcl_SetByteArrayObj(resultPtr,
- (unsigned char *)(&string1[index]), 1);
+ if ((index >= 0) && (index < length1)) {
+ Tcl_SetByteArrayObj(resultPtr,
+ (unsigned char *)(&string1[index]), 1);
+ }
} else {
- string1 = Tcl_GetStringFromObj(objv[2], &length1);
-
/*
- * convert to Unicode internal rep to calulate what
- * 'end' really means.
+ * Get Unicode char length to calulate what 'end' means.
*/
+ length1 = Tcl_GetCharLength(objv[2]);
- length2 = Tcl_GetCharLength(objv[2]);
-
- if (TclGetIntForIndex(interp, objv[3], length2 - 1,
+ if (TclGetIntForIndex(interp, objv[3], length1 - 1,
&index) != TCL_OK) {
return TCL_ERROR;
}
- if ((index >= 0) && (index < length2)) {
- unichar = Tcl_GetUniChar(objv[2], index);
- length2 = Tcl_UniCharToUtf((int)unichar, buf);
- Tcl_SetStringObj(resultPtr, buf, length2);
+ if ((index >= 0) && (index < length1)) {
+ char buf[TCL_UTF_MAX];
+ Tcl_UniChar ch;
+
+ ch = Tcl_GetUniChar(objv[2], index);
+ length1 = Tcl_UniCharToUtf(ch, buf);
+ Tcl_SetStringObj(resultPtr, buf, length1);
}
}
break;
@@ -1275,7 +1258,8 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
strncmp(string2, "-strict", (size_t) length2) == 0) {
strict = 1;
} else if ((length2 > 1) &&
- strncmp(string2, "-failindex", (size_t) length2) == 0) {
+ strncmp(string2, "-failindex",
+ (size_t) length2) == 0) {
if (i+1 >= objc-1) {
Tcl_WrongNumArgs(interp, 3, objv,
"?-strict? ?-failindex var? str");
@@ -1508,78 +1492,63 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
break;
}
case STR_LAST: {
- register char *p;
- int match, utflen, start;
+ Tcl_UniChar *ustring1, *ustring2, *p;
+ int match, start;
if (objc < 4 || objc > 5) {
Tcl_WrongNumArgs(interp, 2, objv,
- "string1 string2 ?startIndex?");
+ "subString string ?startIndex?");
return TCL_ERROR;
}
/*
- * This algorithm fails on improperly formed UTF strings.
+ * We are searching string2 for the sequence string1.
*/
match = -1;
start = 0;
- utflen = -1;
- string1 = Tcl_GetStringFromObj(objv[2], &length1);
- string2 = Tcl_GetStringFromObj(objv[3], &length2);
+ length2 = -1;
+
+ ustring1 = Tcl_GetUnicode(objv[2]);
+ length1 = Tcl_GetCharLength(objv[2]);
+ ustring2 = Tcl_GetUnicode(objv[3]);
+ length2 = Tcl_GetCharLength(objv[3]);
if (objc == 5) {
/*
* If a startIndex is specified, we will need to restrict
* the string range to that char index in the string
*/
- utflen = Tcl_NumUtfChars(string2, length2);
- if (TclGetIntForIndex(interp, objv[4], utflen-1,
- &start) != TCL_OK) {
+ if (TclGetIntForIndex(interp, objv[4], length2 - 1,
+ &start) != TCL_OK) {
return TCL_ERROR;
}
if (start < 0) {
goto str_last_done;
- } else if (start < utflen) {
- if (length2 == utflen) {
- /* no unicode chars */
- p = string2 + start + 1 - length1;
- } else {
- p = Tcl_UtfAtIndex(string2, start+1) - length1;
- }
+ } else if (start < length2) {
+ p = ustring2 + start + 1 - length1;
} else {
- p = string2 + length2 - length1;
+ p = ustring2 + length2 - length1;
}
} else {
- p = string2 + length2 - length1;
+ p = ustring2 + length2 - length1;
}
if (length1 > 0) {
- for (; p >= string2; p--) {
+ for (; p >= ustring2; p--) {
/*
* Scan backwards to find the first character.
*/
-
- while ((p != string2) && (*p != *string1)) {
- p--;
- }
- if (memcmp(string1, p, (unsigned) length1) == 0) {
- match = p - string2;
+ if ((*p == *ustring1) &&
+ (memcmp((char *) ustring1, (char *) p, (size_t)
+ (length1 * sizeof(Tcl_UniChar))) == 0)) {
+ match = p - ustring2;
break;
}
}
}
- /*
- * Compute the character index of the matching string by counting
- * the number of characters before the match.
- */
- str_last_done:
- if (match != -1) {
- if ((objc == 4) || (length2 != utflen)) {
- /* only check when we've got unicode chars */
- match = Tcl_NumUtfChars(string2, match);
- }
- }
+ str_last_done:
Tcl_SetIntObj(resultPtr, match);
break;
}
@@ -1592,7 +1561,6 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
if ((enum options) index == STR_BYTELENGTH) {
(void) Tcl_GetStringFromObj(objv[2], &length1);
- Tcl_SetIntObj(resultPtr, length1);
} else {
/*
* If we have a ByteArray object, avoid recomputing the
@@ -1603,20 +1571,19 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
if (objv[2]->typePtr == &tclByteArrayType) {
(void) Tcl_GetByteArrayFromObj(objv[2], &length1);
- Tcl_SetIntObj(resultPtr, length1);
} else {
- Tcl_SetIntObj(resultPtr,
- Tcl_GetCharLength(objv[2]));
+ length1 = Tcl_GetCharLength(objv[2]);
}
}
+ Tcl_SetIntObj(resultPtr, length1);
break;
}
case STR_MAP: {
- int uselen, mapElemc, len, nocase = 0;
+ int mapElemc, nocase = 0;
Tcl_Obj **mapElemv;
- char *end;
- Tcl_UniChar ch;
- int (*str_comp_fn)();
+ Tcl_UniChar *ustring1, *ustring2, *p, *end;
+ int (*strCmpFn)(CONST Tcl_UniChar*, CONST Tcl_UniChar*,
+ unsigned long);
if (objc < 4 || objc > 5) {
Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? charMap string");
@@ -1652,63 +1619,111 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
Tcl_SetStringObj(resultPtr, "char map list unbalanced", -1);
return TCL_ERROR;
}
- string1 = Tcl_GetStringFromObj(objv[objc-1], &length1);
+ objc--;
+
+ ustring1 = Tcl_GetUnicode(objv[objc]);
+ length1 = Tcl_GetCharLength(objv[objc]);
if (length1 == 0) {
+ /*
+ * Empty input string, just stop now
+ */
break;
}
- end = string1 + length1;
+ end = ustring1 + length1;
- if (nocase) {
- length1 = Tcl_NumUtfChars(string1, length1);
- str_comp_fn = Tcl_UtfNcasecmp;
- } else {
- str_comp_fn = memcmp;
- }
+ strCmpFn = (nocase) ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp;
- for ( ; string1 < end; string1 += len) {
- len = Tcl_UtfToUniChar(string1, &ch);
- for (index = 0; index < mapElemc; index +=2) {
- /*
- * Get the key string to match on
- */
- string2 = Tcl_GetStringFromObj(mapElemv[index],
- &length2);
- if (nocase) {
- uselen = Tcl_NumUtfChars(string2, length2);
- } else {
- uselen = length2;
+ /*
+ * Force result to be Unicode
+ */
+ Tcl_SetUnicodeObj(resultPtr, ustring1, 0);
+
+ if (mapElemc == 2) {
+ /*
+ * Special case for one map pair which avoids the extra
+ * for loop and extra calls to get Unicode data. The
+ * algorithm is otherwise identical to the multi-pair case.
+ * This will be >30% faster on larger strings.
+ */
+ Tcl_UniChar *mapString = Tcl_GetUnicode(mapElemv[1]);
+ int mapLen = Tcl_GetCharLength(mapElemv[1]);
+ ustring2 = Tcl_GetUnicode(mapElemv[0]);
+ length2 = Tcl_GetCharLength(mapElemv[0]);
+ for (p = ustring1; ustring1 < end; ustring1++) {
+ if ((length2 > 0) &&
+ (nocase || (*ustring1 == *ustring2)) &&
+ (strCmpFn(ustring1, ustring2,
+ (unsigned long) length2) == 0)) {
+ if (p != ustring1) {
+ Tcl_AppendUnicodeToObj(resultPtr, p,
+ ustring1 - p);
+ p = ustring1 + length2;
+ } else {
+ p += length2;
+ }
+ ustring1 = p - 1;
+
+ Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen);
}
- if ((uselen > 0) && (uselen <= length1) &&
- (str_comp_fn(string2, string1, uselen) == 0)) {
- /*
- * Adjust len to be full length of matched string
- * it has to be the BYTE length
- */
- len = length2;
+ }
+ } else {
+ Tcl_UniChar **mapStrings =
+ (Tcl_UniChar **) ckalloc((mapElemc * 2)
+ * sizeof(Tcl_UniChar *));
+ int *mapLens =
+ (int *) ckalloc((mapElemc * 2) * sizeof(int));
+ /*
+ * Precompute pointers to the unicode string and length.
+ * This saves us repeated function calls later,
+ * significantly speeding up the algorithm.
+ */
+ for (index = 0; index < mapElemc; index++) {
+ mapStrings[index] = Tcl_GetUnicode(mapElemv[index]);
+ mapLens[index] = Tcl_GetCharLength(mapElemv[index]);
+ }
+ for (p = ustring1; ustring1 < end; ustring1++) {
+ for (index = 0; index < mapElemc; index += 2) {
/*
- * Change string2 and length2 to the map value
+ * Get the key string to match on
*/
- string2 = Tcl_GetStringFromObj(mapElemv[index+1],
- &length2);
- Tcl_AppendToObj(resultPtr, string2, length2);
- break;
+ ustring2 = mapStrings[index];
+ length2 = mapLens[index];
+ if ((length2 > 0) &&
+ (nocase || (*ustring1 == *ustring2)) &&
+ (strCmpFn(ustring2, ustring1,
+ (unsigned long) length2) == 0)) {
+ if (p != ustring1) {
+ /*
+ * Put the skipped chars onto the result first
+ */
+ Tcl_AppendUnicodeToObj(resultPtr, p,
+ ustring1 - p);
+ p = ustring1 + length2;
+ } else {
+ p += length2;
+ }
+ /*
+ * Adjust len to be full length of matched string
+ */
+ ustring1 = p - 1;
+
+ /*
+ * Append the map value to the unicode string
+ */
+ Tcl_AppendUnicodeToObj(resultPtr,
+ mapStrings[index+1], mapLens[index+1]);
+ break;
+ }
}
}
- if (index == mapElemc) {
- /*
- * No match was found, put the char onto result
- */
- Tcl_AppendToObj(resultPtr, string1, len);
- }
+ ckfree((char *) mapStrings);
+ ckfree((char *) mapLens);
+ }
+ if (p != ustring1) {
/*
- * in nocase, length1 is in chars
- * otherwise it is in bytes
+ * Put the rest of the unmapped chars onto result
*/
- if (nocase) {
- length1--;
- } else {
- length1 -= len;
- }
+ Tcl_AppendUnicodeToObj(resultPtr, p, ustring1 - p);
}
break;
}
@@ -1734,9 +1749,8 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
Tcl_SetBooleanObj(resultPtr,
- Tcl_StringCaseMatch(Tcl_GetString(objv[objc-1]),
- Tcl_GetString(objv[objc-2]),
- nocase));
+ Tcl_UniCharCaseMatch(Tcl_GetUnicode(objv[objc-1]),
+ Tcl_GetUnicode(objv[objc-2]), nocase));
break;
}
case STR_RANGE: {
@@ -1748,64 +1762,24 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
/*
- * If we have a ByteArray object, avoid indexing in the
- * Utf string since the byte array contains one byte per
- * character. Otherwise, use the Unicode string rep to
- * get the range.
+ * Get the length in actual characters.
*/
+ length1 = Tcl_GetCharLength(objv[2]) - 1;
- if (objv[2]->typePtr == &tclByteArrayType) {
-
- string1 = (char *)Tcl_GetByteArrayFromObj(objv[2], &length1);
-
- if (TclGetIntForIndex(interp, objv[3], length1 - 1,
- &first) != TCL_OK) {
- return TCL_ERROR;
- }
- if (TclGetIntForIndex(interp, objv[4], length1 - 1,
- &last) != TCL_OK) {
- return TCL_ERROR;
- }
- if (first < 0) {
- first = 0;
- }
- if (last >= length1 - 1) {
- last = length1 - 1;
- }
- if (last >= first) {
- int numBytes = last - first + 1;
- resultPtr = Tcl_NewByteArrayObj(
- (unsigned char *) &string1[first], numBytes);
- Tcl_SetObjResult(interp, resultPtr);
- }
- } else {
- string1 = Tcl_GetStringFromObj(objv[2], &length1);
-
- /*
- * Convert to Unicode internal rep to calulate length and
- * create a result object.
- */
+ if ((TclGetIntForIndex(interp, objv[3], length1, &first) != TCL_OK)
+ || (TclGetIntForIndex(interp, objv[4], length1,
+ &last) != TCL_OK)) {
+ return TCL_ERROR;
+ }
- length2 = Tcl_GetCharLength(objv[2]) - 1;
-
- if (TclGetIntForIndex(interp, objv[3], length2,
- &first) != TCL_OK) {
- return TCL_ERROR;
- }
- if (TclGetIntForIndex(interp, objv[4], length2,
- &last) != TCL_OK) {
- return TCL_ERROR;
- }
- if (first < 0) {
- first = 0;
- }
- if (last >= length2) {
- last = length2;
- }
- if (last >= first) {
- resultPtr = Tcl_GetRange(objv[2], first, last);
- Tcl_SetObjResult(interp, resultPtr);
- }
+ if (first < 0) {
+ first = 0;
+ }
+ if (last >= length1) {
+ last = length1;
+ }
+ if (last >= first) {
+ Tcl_SetObjResult(interp, Tcl_GetRange(objv[2], first, last));
}
break;
}
@@ -1830,6 +1804,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
break;
}
case STR_REPLACE: {
+ Tcl_UniChar *ustring1;
int first, last;
if (objc < 5 || objc > 6) {
@@ -1838,33 +1813,29 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
return TCL_ERROR;
}
- string1 = Tcl_GetStringFromObj(objv[2], &length1);
- length1 = Tcl_NumUtfChars(string1, length1) - 1;
- if (TclGetIntForIndex(interp, objv[3], length1,
- &first) != TCL_OK) {
- return TCL_ERROR;
- }
- if (TclGetIntForIndex(interp, objv[4], length1,
- &last) != TCL_OK) {
+ ustring1 = Tcl_GetUnicode(objv[2]);
+ length1 = Tcl_GetCharLength(objv[2]) - 1;
+
+ if ((TclGetIntForIndex(interp, objv[3], length1, &first) != TCL_OK)
+ || (TclGetIntForIndex(interp, objv[4], length1,
+ &last) != TCL_OK)) {
return TCL_ERROR;
}
- if ((last < first) || (first > length1) || (last < 0)) {
+
+ if ((last < first) || (last < 0) || (first > length1)) {
Tcl_SetObjResult(interp, objv[2]);
} else {
- char *start, *end;
-
if (first < 0) {
first = 0;
}
- start = Tcl_UtfAtIndex(string1, first);
- end = Tcl_UtfAtIndex(start, ((last > length1) ? length1 : last)
- - first + 1);
- Tcl_SetStringObj(resultPtr, string1, start - string1);
+
+ Tcl_SetUnicodeObj(resultPtr, ustring1, first);
if (objc == 6) {
Tcl_AppendObjToObj(resultPtr, objv[5]);
}
if (last < length1) {
- Tcl_AppendToObj(resultPtr, end, -1);
+ Tcl_AppendUnicodeToObj(resultPtr, ustring1 + last + 1,
+ length1 - last);
}
}
break;