summaryrefslogtreecommitdiffstats
path: root/generic/tclCmdMZ.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclCmdMZ.c')
-rw-r--r--generic/tclCmdMZ.c524
1 files changed, 249 insertions, 275 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index ba1fc41..f94c094 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -258,7 +258,7 @@ Tcl_RegexpObjCmd(
stringLength = Tcl_GetCharLength(objPtr);
if (startIndex) {
- TclGetIntForIndexM(NULL, startIndex, stringLength, &offset);
+ TclGetIntForIndexM(interp, startIndex, stringLength, &offset);
Tcl_DecrRefCount(startIndex);
if (offset < 0) {
offset = 0;
@@ -309,7 +309,7 @@ Tcl_RegexpObjCmd(
eflags = 0;
} else if (offset > stringLength) {
eflags = TCL_REG_NOTBOL;
- } else if (Tcl_GetUniChar(objPtr, offset-1) == (Tcl_UniChar)'\n') {
+ } else if (Tcl_GetUniChar(objPtr, offset-1) == '\n') {
eflags = 0;
} else {
eflags = TCL_REG_NOTBOL;
@@ -487,26 +487,27 @@ Tcl_RegsubObjCmd(
Tcl_Obj *const objv[]) /* Argument objects. */
{
int idx, result, cflags, all, wlen, wsublen, numMatches, offset;
- int start, end, subStart, subEnd, match;
+ int start, end, subStart, subEnd, match, command, numParts;
Tcl_RegExp regExpr;
Tcl_RegExpInfo info;
Tcl_Obj *resultPtr, *subPtr, *objPtr, *startIndex = NULL;
- Tcl_UniChar ch, *wsrc, *wfirstChar, *wstring, *wsubspec, *wend;
+ Tcl_UniChar ch, *wsrc, *wfirstChar, *wstring, *wsubspec = 0, *wend;
static const char *const options[] = {
- "-all", "-nocase", "-expanded",
- "-line", "-linestop", "-lineanchor", "-start",
+ "-all", "-command", "-expanded", "-line",
+ "-linestop", "-lineanchor", "-nocase", "-start",
"--", NULL
};
enum options {
- REGSUB_ALL, REGSUB_NOCASE, REGSUB_EXPANDED,
- REGSUB_LINE, REGSUB_LINESTOP, REGSUB_LINEANCHOR, REGSUB_START,
+ REGSUB_ALL, REGSUB_COMMAND, REGSUB_EXPANDED, REGSUB_LINE,
+ REGSUB_LINESTOP, REGSUB_LINEANCHOR, REGSUB_NOCASE, REGSUB_START,
REGSUB_LAST
};
cflags = TCL_REG_ADVANCED;
all = 0;
offset = 0;
+ command = 0;
resultPtr = NULL;
for (idx = 1; idx < objc; idx++) {
@@ -528,6 +529,9 @@ Tcl_RegsubObjCmd(
case REGSUB_NOCASE:
cflags |= TCL_REG_NOCASE;
break;
+ case REGSUB_COMMAND:
+ command = 1;
+ break;
case REGSUB_EXPANDED:
cflags |= TCL_REG_EXPANDED;
break;
@@ -578,14 +582,14 @@ Tcl_RegsubObjCmd(
if (startIndex) {
int stringLength = Tcl_GetCharLength(objv[1]);
- TclGetIntForIndexM(NULL, startIndex, stringLength, &offset);
+ TclGetIntForIndexM(interp, startIndex, stringLength, &offset);
Tcl_DecrRefCount(startIndex);
if (offset < 0) {
offset = 0;
}
}
- if (all && (offset == 0)
+ if (all && (offset == 0) && (command == 0)
&& (strpbrk(TclGetString(objv[2]), "&\\") == NULL)
&& (strpbrk(TclGetString(objv[0]), "*+?{}()[].\\|^$") == NULL)) {
/*
@@ -593,9 +597,9 @@ Tcl_RegsubObjCmd(
* slightly modified version of the one pair STR_MAP code.
*/
- int slen, nocase;
+ int slen, nocase, wsrclc;
int (*strCmpFn)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long);
- Tcl_UniChar *p, wsrclc;
+ Tcl_UniChar *p;
numMatches = 0;
nocase = (cflags & TCL_REG_NOCASE);
@@ -661,6 +665,28 @@ Tcl_RegsubObjCmd(
return TCL_ERROR;
}
+ if (command) {
+ /*
+ * In command-prefix mode, we require that the third non-option
+ * argument be a list, so we enforce that here. Afterwards, we fetch
+ * the RE compilation again in case objv[0] and objv[2] are the same
+ * object. (If they aren't, that's cheap to do.)
+ */
+
+ if (Tcl_ListObjLength(interp, objv[2], &numParts) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (numParts < 1) {
+ Tcl_SetObjResult(interp, Tcl_NewStringObj(
+ "command prefix must be a list of at least one element",
+ -1));
+ Tcl_SetErrorCode(interp, "TCL", "OPERATION", "REGSUB",
+ "CMDEMPTY", NULL);
+ return TCL_ERROR;
+ }
+ regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags);
+ }
+
/*
* Make sure to avoid problems where the objects are shared. This can
* cause RegExpObj <> UnicodeObj shimmering that causes data corruption.
@@ -678,7 +704,9 @@ Tcl_RegsubObjCmd(
} else {
subPtr = objv[2];
}
- wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen);
+ if (!command) {
+ wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen);
+ }
result = TCL_OK;
@@ -737,6 +765,90 @@ Tcl_RegsubObjCmd(
Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start);
/*
+ * In command-prefix mode, the substitutions are added as quoted
+ * arguments to the subSpec to form a command, that is then executed
+ * and the result used as the string to substitute in. Actually,
+ * everything is passed through Tcl_EvalObjv, as that's much faster.
+ */
+
+ if (command) {
+ Tcl_Obj **args = NULL, **parts;
+ int numArgs;
+
+ Tcl_ListObjGetElements(interp, subPtr, &numParts, &parts);
+ numArgs = numParts + info.nsubs + 1;
+ args = ckalloc(sizeof(Tcl_Obj*) * numArgs);
+ memcpy(args, parts, sizeof(Tcl_Obj*) * numParts);
+
+ for (idx = 0 ; idx <= info.nsubs ; idx++) {
+ subStart = info.matches[idx].start;
+ subEnd = info.matches[idx].end;
+ if ((subStart >= 0) && (subEnd >= 0)) {
+ args[idx + numParts] = Tcl_NewUnicodeObj(
+ wstring + offset + subStart, subEnd - subStart);
+ } else {
+ args[idx + numParts] = Tcl_NewObj();
+ }
+ Tcl_IncrRefCount(args[idx + numParts]);
+ }
+
+ /*
+ * At this point, we're locally holding the references to the
+ * argument words we added for this time round the loop, and the
+ * subPtr is holding the references to the words that the user
+ * supplied directly. None are zero-refcount, which is important
+ * because Tcl_EvalObjv is "hairy monster" in terms of refcount
+ * handling, being able to optionally add references to any of its
+ * argument words. We'll drop the local refs immediately
+ * afterwards; subPtr is handled in the main exit stanza.
+ */
+
+ result = Tcl_EvalObjv(interp, numArgs, args, 0);
+ for (idx = 0 ; idx <= info.nsubs ; idx++) {
+ TclDecrRefCount(args[idx + numParts]);
+ }
+ ckfree(args);
+ if (result != TCL_OK) {
+ if (result == TCL_ERROR) {
+ Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf(
+ "\n (%s substitution computation script)",
+ options[REGSUB_COMMAND]));
+ }
+ goto done;
+ }
+
+ Tcl_AppendObjToObj(resultPtr, Tcl_GetObjResult(interp));
+ Tcl_ResetResult(interp);
+
+ /*
+ * Refetch the unicode, in case the representation was smashed by
+ * the user code.
+ */
+
+ wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen);
+
+ offset += end;
+ if (end == 0 || start == end) {
+ /*
+ * Always consume at least one character of the input string
+ * in order to prevent infinite loops, even when we
+ * technically matched the empty string; we must not match
+ * again at the same spot.
+ */
+
+ if (offset < wlen) {
+ Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1);
+ }
+ offset++;
+ }
+ if (all) {
+ continue;
+ } else {
+ break;
+ }
+ }
+
+ /*
* Append the subSpec argument to the variable, making appropriate
* substitutions. This code is a bit hairy because of the backslash
* conventions and because the code saves up ranges of characters in
@@ -1061,7 +1173,7 @@ Tcl_SplitObjCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- Tcl_UniChar ch;
+ Tcl_UniChar ch = 0;
int len;
const char *splitChars;
const char *stringPtr;
@@ -1104,13 +1216,22 @@ Tcl_SplitObjCmd(
Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS);
for ( ; stringPtr < end; stringPtr += len) {
+ int fullchar;
len = TclUtfToUniChar(stringPtr, &ch);
+ fullchar = ch;
+
+#if TCL_UTF_MAX <= 4
+ if (!len) {
+ len += TclUtfToUniChar(stringPtr, &ch);
+ fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
+ }
+#endif
/*
* Assume Tcl_UniChar is an integral type...
*/
- hPtr = Tcl_CreateHashEntry(&charReuseTable, INT2PTR((int) ch),
+ hPtr = Tcl_CreateHashEntry(&charReuseTable, INT2PTR(fullchar),
&isNew);
if (isNew) {
TclNewStringObj(objPtr, stringPtr, len);
@@ -1146,7 +1267,7 @@ Tcl_SplitObjCmd(
} else {
const char *element, *p, *splitEnd;
int splitLen;
- Tcl_UniChar splitChar;
+ Tcl_UniChar splitChar = 0;
/*
* Normal case: split on any of a given set of characters. Discard
@@ -1214,16 +1335,8 @@ StringFirstCmd(
if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &start)) {
return TCL_ERROR;
}
-
- if (start < 0) {
- start = 0;
- }
- if (start >= size) {
- Tcl_SetObjResult(interp, Tcl_NewIntObj(-1));
- return TCL_OK;
- }
}
- Tcl_SetObjResult(interp, Tcl_NewIntObj(TclStringFind(objv[1],
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(TclStringFirst(objv[1],
objv[2], start)));
return TCL_OK;
}
@@ -1267,14 +1380,6 @@ StringLastCmd(
if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &last)) {
return TCL_ERROR;
}
-
- if (last < 0) {
- Tcl_SetObjResult(interp, Tcl_NewIntObj(-1));
- return TCL_OK;
- }
- if (last >= size) {
- last = size - 1;
- }
}
Tcl_SetObjResult(interp, Tcl_NewIntObj(TclStringLast(objv[1],
objv[2], last)));
@@ -1314,7 +1419,7 @@ StringIndexCmd(
}
/*
- * Get the char length to calulate what 'end' means.
+ * Get the char length to calculate what 'end' means.
*/
length = Tcl_GetCharLength(objv[1]);
@@ -1323,7 +1428,11 @@ StringIndexCmd(
}
if ((index >= 0) && (index < length)) {
- Tcl_UniChar ch = Tcl_GetUniChar(objv[1], index);
+ int ch = Tcl_GetUniChar(objv[1], index);
+
+ if (ch == -1) {
+ return TCL_OK;
+ }
/*
* If we have a ByteArray object, we're careful to generate a new
@@ -1335,9 +1444,12 @@ StringIndexCmd(
Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(&uch, 1));
} else {
- char buf[TCL_UTF_MAX];
+ char buf[4];
length = Tcl_UniCharToUtf(ch, buf);
+ if (!length) {
+ length = Tcl_UniCharToUtf(-1, buf);
+ }
Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length));
}
}
@@ -1370,7 +1482,7 @@ StringIsCmd(
Tcl_Obj *const objv[]) /* Argument objects. */
{
const char *string1, *end, *stop;
- Tcl_UniChar ch;
+ Tcl_UniChar ch = 0;
int (*chcomp)(int) = NULL; /* The UniChar comparison function. */
int i, failat = 0, result = 1, strict = 0, index, length1, length2;
Tcl_Obj *objPtr, *failVarObj = NULL;
@@ -1467,11 +1579,11 @@ StringIsCmd(
string1 = TclGetStringFromObj(objPtr, &length1);
result = length1 == 0;
}
- } else if (((index == STR_IS_TRUE) &&
- objPtr->internalRep.longValue == 0)
- || ((index == STR_IS_FALSE) &&
- objPtr->internalRep.longValue != 0)) {
- result = 0;
+ } else if (index != STR_IS_BOOL) {
+ TclGetBooleanFromObj(NULL, objPtr, &i);
+ if ((index == STR_IS_TRUE) ^ i) {
+ result = 0;
+ }
}
break;
case STR_IS_CONTROL:
@@ -1481,12 +1593,8 @@ StringIsCmd(
chcomp = Tcl_UniCharIsDigit;
break;
case STR_IS_DOUBLE: {
- /* TODO */
if ((objPtr->typePtr == &tclDoubleType) ||
(objPtr->typePtr == &tclIntType) ||
-#ifndef TCL_WIDE_INT_IS_LONG
- (objPtr->typePtr == &tclWideIntType) ||
-#endif
(objPtr->typePtr == &tclBignumType)) {
break;
}
@@ -1515,15 +1623,8 @@ StringIsCmd(
chcomp = Tcl_UniCharIsGraph;
break;
case STR_IS_INT:
- if (TCL_OK == TclGetIntFromObj(NULL, objPtr, &i)) {
- break;
- }
- goto failedIntParse;
case STR_IS_ENTIER:
if ((objPtr->typePtr == &tclIntType) ||
-#ifndef TCL_WIDE_INT_IS_LONG
- (objPtr->typePtr == &tclWideIntType) ||
-#endif
(objPtr->typePtr == &tclBignumType)) {
break;
}
@@ -1565,11 +1666,10 @@ StringIsCmd(
}
break;
case STR_IS_WIDE:
- if (TCL_OK == Tcl_GetWideIntFromObj(NULL, objPtr, &w)) {
+ if (TCL_OK == TclGetWideIntFromObj(NULL, objPtr, &w)) {
break;
}
- failedIntParse:
string1 = TclGetStringFromObj(objPtr, &length1);
if (length1 == 0) {
if (strict) {
@@ -1702,8 +1802,16 @@ StringIsCmd(
}
end = string1 + length1;
for (; string1 < end; string1 += length2, failat++) {
+ int fullchar;
length2 = TclUtfToUniChar(string1, &ch);
- if (!chcomp(ch)) {
+ fullchar = ch;
+#if TCL_UTF_MAX <= 4
+ if (!length2) {
+ length2 = TclUtfToUniChar(string1, &ch);
+ fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
+ }
+#endif
+ if (!chcomp(fullchar)) {
result = 0;
break;
}
@@ -1736,7 +1844,7 @@ static int
UniCharIsHexDigit(
int character)
{
- return (character >= 0) && (character < 0x80) && isxdigit(character);
+ return (character >= 0) && (character < 0x80) && isxdigit(UCHAR(character));
}
/*
@@ -1891,8 +1999,8 @@ StringMapCmd(
* larger strings.
*/
- int mapLen;
- Tcl_UniChar *mapString, u2lc;
+ int mapLen, u2lc;
+ Tcl_UniChar *mapString;
ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2);
p = ustring1;
@@ -1923,8 +2031,8 @@ StringMapCmd(
}
}
} else {
- Tcl_UniChar **mapStrings, *u2lc = NULL;
- int *mapLens;
+ Tcl_UniChar **mapStrings;
+ int *mapLens, *u2lc = NULL;
/*
* Precompute pointers to the unicode string and length. This saves us
@@ -1936,7 +2044,7 @@ StringMapCmd(
mapStrings = TclStackAlloc(interp, mapElemc*2*sizeof(Tcl_UniChar *));
mapLens = TclStackAlloc(interp, mapElemc * 2 * sizeof(int));
if (nocase) {
- u2lc = TclStackAlloc(interp, mapElemc * sizeof(Tcl_UniChar));
+ u2lc = TclStackAlloc(interp, mapElemc * sizeof(int));
}
for (index = 0; index < mapElemc; index++) {
mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index],
@@ -2165,11 +2273,12 @@ StringReptCmd(
return TCL_OK;
}
- if (TCL_OK != TclStringRepeat(interp, objv[1], count, &resultPtr)) {
- return TCL_ERROR;
+ resultPtr = TclStringRepeat(interp, objv[1], count, TCL_STRING_IN_PLACE);
+ if (resultPtr) {
+ Tcl_SetObjResult(interp, resultPtr);
+ return TCL_OK;
}
- Tcl_SetObjResult(interp, resultPtr);
- return TCL_OK;
+ return TCL_ERROR;
}
/*
@@ -2197,42 +2306,50 @@ StringRplcCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- Tcl_UniChar *ustring;
- int first, last, length;
+ int first, last, length, end;
if (objc < 4 || objc > 5) {
Tcl_WrongNumArgs(interp, 1, objv, "string first last ?string?");
return TCL_ERROR;
}
- ustring = Tcl_GetUnicodeFromObj(objv[1], &length);
- length--;
+ length = Tcl_GetCharLength(objv[1]);
+ end = length - 1;
- if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK ||
- TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK){
+ if (TclGetIntForIndexM(interp, objv[2], end, &first) != TCL_OK ||
+ TclGetIntForIndexM(interp, objv[3], end, &last) != TCL_OK){
return TCL_ERROR;
}
- if ((last < first) || (last < 0) || (first > length)) {
+ /*
+ * The following test screens out most empty substrings as
+ * candidates for replacement. When they are detected, no
+ * replacement is done, and the result is the original string,
+ */
+ if ((last < 0) || /* Range ends before start of string */
+ (first > end) || /* Range begins after end of string */
+ (last < first)) { /* Range begins after it starts */
+
+ /*
+ * BUT!!! when (end < 0) -- an empty original string -- we can
+ * have (first <= end < 0 <= last) and an empty string is permitted
+ * to be replaced.
+ */
Tcl_SetObjResult(interp, objv[1]);
} else {
Tcl_Obj *resultPtr;
- ustring = Tcl_GetUnicodeFromObj(objv[1], &length);
- length--;
-
if (first < 0) {
first = 0;
}
-
- resultPtr = Tcl_NewUnicodeObj(ustring, first);
- if (objc == 5) {
- Tcl_AppendObjToObj(resultPtr, objv[4]);
- }
- if (last < length) {
- Tcl_AppendUnicodeToObj(resultPtr, ustring + last + 1,
- length - last);
+ if (last > end) {
+ last = end;
}
+
+ resultPtr = TclStringReplace(interp, objv[1], first,
+ last + 1 - first, (objc == 5) ? objv[4] : NULL,
+ TCL_STRING_IN_PLACE);
+
Tcl_SetObjResult(interp, resultPtr);
}
return TCL_OK;
@@ -2268,7 +2385,7 @@ StringRevCmd(
return TCL_ERROR;
}
- Tcl_SetObjResult(interp, TclStringObjReverse(objv[1]));
+ Tcl_SetObjResult(interp, TclStringReverse(objv[1], TCL_STRING_IN_PLACE));
return TCL_OK;
}
@@ -2298,7 +2415,7 @@ StringStartCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- Tcl_UniChar ch;
+ Tcl_UniChar ch = 0;
const char *p, *string;
int cur, index, length, numChars;
@@ -2359,7 +2476,7 @@ StringEndCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- Tcl_UniChar ch;
+ Tcl_UniChar ch = 0;
const char *p, *end, *string;
int cur, index, length, numChars;
@@ -2427,10 +2544,8 @@ StringEqualCmd(
* the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
*/
- const char *string1, *string2;
- int length1, length2, i, match, length, nocase = 0, reqlength = -1;
- typedef int (*strCmpFn_t)(const char *, const char *, unsigned int);
- strCmpFn_t strCmpFn;
+ const char *string2;
+ int length, i, match, nocase = 0, reqlength = -1;
if (objc < 3 || objc > 6) {
str_cmp_args:
@@ -2440,11 +2555,11 @@ StringEqualCmd(
}
for (i = 1; i < objc-2; i++) {
- string2 = TclGetStringFromObj(objv[i], &length2);
- if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) {
+ string2 = TclGetStringFromObj(objv[i], &length);
+ if ((length > 1) && !strncmp(string2, "-nocase", (size_t)length)) {
nocase = 1;
- } else if ((length2 > 1)
- && !strncmp(string2, "-length", (size_t)length2)) {
+ } else if ((length > 1)
+ && !strncmp(string2, "-length", (size_t)length)) {
if (i+1 >= objc-2) {
goto str_cmp_args;
}
@@ -2469,78 +2584,7 @@ StringEqualCmd(
objv += objc-2;
- if ((reqlength == 0) || (objv[0] == objv[1])) {
- /*
- * Always match at 0 chars of if it is the same obj.
- */
-
- Tcl_SetObjResult(interp, Tcl_NewBooleanObj(1));
- return TCL_OK;
- }
-
- if (!nocase && TclIsPureByteArray(objv[0]) &&
- TclIsPureByteArray(objv[1])) {
- /*
- * Use binary versions of comparisons since that won't cause undue
- * type conversions and it is much faster. Only do this if we're
- * case-sensitive (which is all that really makes sense with byte
- * arrays anyway, and we have no memcasecmp() for some reason... :^)
- */
-
- string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1);
- string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2);
- strCmpFn = (strCmpFn_t) memcmp;
- } else if ((objv[0]->typePtr == &tclStringType)
- && (objv[1]->typePtr == &tclStringType)) {
- /*
- * Do a unicode-specific comparison if both of the args are of String
- * type. In benchmark testing this proved the most efficient check
- * between the unicode and string comparison operations.
- */
-
- string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1);
- string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2);
- strCmpFn = (strCmpFn_t)
- (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
- } else {
- /*
- * As a catch-all we will work with UTF-8. We cannot use memcmp() as
- * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's
- * utf rep). We can use the more efficient TclpUtfNcmp2 if we are
- * case-sensitive and no specific length was requested.
- */
-
- string1 = (char *) TclGetStringFromObj(objv[0], &length1);
- string2 = (char *) TclGetStringFromObj(objv[1], &length2);
- if ((reqlength < 0) && !nocase) {
- strCmpFn = (strCmpFn_t) TclpUtfNcmp2;
- } else {
- length1 = Tcl_NumUtfChars(string1, length1);
- length2 = Tcl_NumUtfChars(string2, length2);
- strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
- }
- }
-
- if ((reqlength < 0) && (length1 != length2)) {
- match = 1; /* This will be reversed below. */
- } else {
- length = (length1 < length2) ? length1 : length2;
- if (reqlength > 0 && reqlength < length) {
- length = reqlength;
- } else if (reqlength < 0) {
- /*
- * The requested length is negative, so we ignore it by setting it
- * to length + 1 so we correct the match var.
- */
-
- reqlength = length + 1;
- }
-
- match = strCmpFn(string1, string2, (unsigned) length);
- if ((match == 0) && (reqlength > length)) {
- match = length1 - length2;
- }
- }
+ match = TclStringCmp(objv[0], objv[1], 0, nocase, reqlength);
Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1));
return TCL_OK;
@@ -2577,11 +2621,31 @@ StringCmpCmd(
* the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
*/
- const char *string1, *string2;
- int length1, length2, i, match, length, nocase = 0, reqlength = -1;
- typedef int (*strCmpFn_t)(const char *, const char *, unsigned int);
- strCmpFn_t strCmpFn;
+ int match, nocase, reqlength, status;
+
+ status = TclStringCmpOpts(interp, objc, objv, &nocase, &reqlength);
+ if (status != TCL_OK) {
+ return status;
+ }
+ objv += objc-2;
+ match = TclStringCmp(objv[0], objv[1], 0, nocase, reqlength);
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(match));
+ return TCL_OK;
+}
+
+int TclStringCmpOpts(
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[], /* Argument objects. */
+ int *nocase,
+ int *reqlength)
+{
+ int i, length;
+ const char *string;
+
+ *reqlength = -1;
+ *nocase = 0;
if (objc < 3 || objc > 6) {
str_cmp_args:
Tcl_WrongNumArgs(interp, 1, objv,
@@ -2590,106 +2654,27 @@ StringCmpCmd(
}
for (i = 1; i < objc-2; i++) {
- string2 = TclGetStringFromObj(objv[i], &length2);
- if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) {
- nocase = 1;
- } else if ((length2 > 1)
- && !strncmp(string2, "-length", (size_t)length2)) {
+ string = TclGetStringFromObj(objv[i], &length);
+ if ((length > 1) && !strncmp(string, "-nocase", (size_t)length)) {
+ *nocase = 1;
+ } else if ((length > 1)
+ && !strncmp(string, "-length", (size_t)length)) {
if (i+1 >= objc-2) {
goto str_cmp_args;
}
i++;
- if (TclGetIntFromObj(interp, objv[i], &reqlength) != TCL_OK) {
+ if (TclGetIntFromObj(interp, objv[i], reqlength) != TCL_OK) {
return TCL_ERROR;
}
} else {
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"bad option \"%s\": must be -nocase or -length",
- string2));
+ string));
Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "INDEX", "option",
- string2, NULL);
+ string, NULL);
return TCL_ERROR;
}
}
-
- /*
- * From now on, we only access the two objects at the end of the argument
- * array.
- */
-
- objv += objc-2;
-
- if ((reqlength == 0) || (objv[0] == objv[1])) {
- /*
- * Always match at 0 chars of if it is the same obj.
- */
-
- Tcl_SetObjResult(interp, Tcl_NewBooleanObj(0));
- return TCL_OK;
- }
-
- if (!nocase && TclIsPureByteArray(objv[0]) &&
- TclIsPureByteArray(objv[1])) {
- /*
- * Use binary versions of comparisons since that won't cause undue
- * type conversions and it is much faster. Only do this if we're
- * case-sensitive (which is all that really makes sense with byte
- * arrays anyway, and we have no memcasecmp() for some reason... :^)
- */
-
- string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1);
- string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2);
- strCmpFn = (strCmpFn_t) memcmp;
- } else if ((objv[0]->typePtr == &tclStringType)
- && (objv[1]->typePtr == &tclStringType)) {
- /*
- * Do a unicode-specific comparison if both of the args are of String
- * type. In benchmark testing this proved the most efficient check
- * between the unicode and string comparison operations.
- */
-
- string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1);
- string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2);
- strCmpFn = (strCmpFn_t)
- (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
- } else {
- /*
- * As a catch-all we will work with UTF-8. We cannot use memcmp() as
- * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's
- * utf rep). We can use the more efficient TclpUtfNcmp2 if we are
- * case-sensitive and no specific length was requested.
- */
-
- string1 = (char *) TclGetStringFromObj(objv[0], &length1);
- string2 = (char *) TclGetStringFromObj(objv[1], &length2);
- if ((reqlength < 0) && !nocase) {
- strCmpFn = (strCmpFn_t) TclpUtfNcmp2;
- } else {
- length1 = Tcl_NumUtfChars(string1, length1);
- length2 = Tcl_NumUtfChars(string2, length2);
- strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
- }
- }
-
- length = (length1 < length2) ? length1 : length2;
- if (reqlength > 0 && reqlength < length) {
- length = reqlength;
- } else if (reqlength < 0) {
- /*
- * The requested length is negative, so we ignore it by setting it to
- * length + 1 so we correct the match var.
- */
-
- reqlength = length + 1;
- }
-
- match = strCmpFn(string1, string2, (unsigned) length);
- if ((match == 0) && (reqlength > length)) {
- match = length1 - length2;
- }
-
- Tcl_SetObjResult(interp,
- Tcl_NewIntObj((match > 0) ? 1 : (match < 0) ? -1 : 0));
return TCL_OK;
}
@@ -2717,7 +2702,6 @@ StringCatCmd(
int objc, /* Number of arguments. */
Tcl_Obj *const objv[]) /* Argument objects. */
{
- int code;
Tcl_Obj *objResultPtr;
if (objc < 2) {
@@ -2727,23 +2711,15 @@ StringCatCmd(
*/
return TCL_OK;
}
- if (objc == 2) {
- /*
- * Other trivial case, single arg, just return it.
- */
- Tcl_SetObjResult(interp, objv[1]);
- return TCL_OK;
- }
- code = TclStringCatObjv(interp, /* inPlace */ 1, objc-1, objv+1,
- &objResultPtr);
+ objResultPtr = TclStringCat(interp, objc-1, objv+1, TCL_STRING_IN_PLACE);
- if (code == TCL_OK) {
+ if (objResultPtr) {
Tcl_SetObjResult(interp, objResultPtr);
return TCL_OK;
}
- return code;
+ return TCL_ERROR;
}
/*
@@ -2764,7 +2740,6 @@ StringCatCmd(
*
*----------------------------------------------------------------------
*/
-
static int
StringBytesCmd(
ClientData dummy, /* Not used. */
@@ -3112,8 +3087,7 @@ StringTrimCmd(
}
string1 = TclGetStringFromObj(objv[1], &length1);
- triml = TclTrimLeft(string1, length1, string2, length2);
- trimr = TclTrimRight(string1 + triml, length1 - triml, string2, length2);
+ triml = TclTrim(string1, length1, string2, length2, &trimr);
Tcl_SetObjResult(interp,
Tcl_NewStringObj(string1 + triml, length1 - triml - trimr));
@@ -3418,7 +3392,7 @@ TclNRSwitchObjCmd(
OPT_LAST
};
typedef int (*strCmpFn_t)(const char *, const char *);
- strCmpFn_t strCmpFn = strcmp;
+ strCmpFn_t strCmpFn = TclUtfCmp;
mode = OPT_EXACT;
foundmode = 0;
@@ -4181,7 +4155,7 @@ TclNRTryObjCmd(
}
info[0] = objv[i]; /* type */
- TclNewLongObj(info[1], code); /* returnCode */
+ TclNewIntObj(info[1], code); /* returnCode */
if (info[2] == NULL) { /* errorCodePrefix */
TclNewObj(info[2]);
}