summaryrefslogtreecommitdiffstats
path: root/generic/tclCmdMZ.c
diff options
context:
space:
mode:
authorstanton <stanton>1999-05-06 18:46:42 (GMT)
committerstanton <stanton>1999-05-06 18:46:42 (GMT)
commitf832cd22b120385368e264c684cf8d874014bf3b (patch)
tree9c149c65795f698ce02226359670d8bc28d9895a /generic/tclCmdMZ.c
parenta23a8f73b3f2aba2722a1363e2d822018fbf504c (diff)
downloadtcl-f832cd22b120385368e264c684cf8d874014bf3b.zip
tcl-f832cd22b120385368e264c684cf8d874014bf3b.tar.gz
tcl-f832cd22b120385368e264c684cf8d874014bf3b.tar.bz2
* doc/string.n:
* tests/cmdIL.test: * tests/cmdMZ.test: * tests/error.test: * tests/ioCmd.test: * tests/lindex.test: * tests/linsert.test: * tests/lrange.test: * tests/lreplace.test: * tests/string.test: * tests/cmdIL.test: * generic/tclUtil.c: * generic/tclCmdMZ.c: Replaced "string icompare/iequal" with -nocase and -length switches to "string compare/equal". Added a -nocase option to "string map". Changed index syntax to allow integer or end?-integer? instead of a full expression. This is much simpler with safeTcl scripts since it avoids double substitution issues.
Diffstat (limited to 'generic/tclCmdMZ.c')
-rw-r--r--generic/tclCmdMZ.c466
1 files changed, 371 insertions, 95 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 90b9687..5a3833b 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -13,7 +13,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.5 1999/05/04 02:57:55 stanton Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.6 1999/05/06 18:46:42 stanton Exp $
*/
#include "tclInt.h"
@@ -807,23 +807,21 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
int length1, length2;
static char *options[] = {
"bytelength", "compare", "equal", "first",
- "icompare", "iequal", "index",
- "last", "length", "map",
- "match", "range", "repeat", "replace",
- "tolower", "toupper", "totitle",
+ "index", "is", "last", "length",
+ "map", "match", "range", "repeat",
+ "replace", "tolower", "toupper", "totitle",
"trim", "trimleft", "trimright",
"wordend", "wordstart", (char *) NULL
};
enum options {
STR_BYTELENGTH, STR_COMPARE, STR_EQUAL, STR_FIRST,
- STR_ICOMPARE, STR_IEQUAL, STR_INDEX,
- STR_LAST, STR_LENGTH, STR_MAP,
- STR_MATCH, STR_RANGE, STR_REPEAT, STR_REPLACE,
- STR_TOLOWER, STR_TOUPPER, STR_TOTITLE,
+ STR_INDEX, STR_IS, STR_LAST, STR_LENGTH,
+ STR_MAP, STR_MATCH, STR_RANGE, STR_REPEAT,
+ STR_REPLACE, STR_TOLOWER, STR_TOUPPER, STR_TOTITLE,
STR_TRIM, STR_TRIMLEFT, STR_TRIMRIGHT,
STR_WORDEND, STR_WORDSTART
};
-
+
if (objc < 2) {
Tcl_WrongNumArgs(interp, 1, objv, "option arg ?arg ...?");
return TCL_ERROR;
@@ -838,37 +836,73 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
switch ((enum options) index) {
case STR_EQUAL:
case STR_COMPARE: {
- int match, length, reqlength = -1;
+ int i, match, length, nocase = 0, reqlength = -1;
- if (!(objc == 4 || objc == 5)) {
- Tcl_WrongNumArgs(interp, 2, objv, "string1 string2 ?length?");
- return TCL_ERROR;
- }
- if ((objc == 5) &&
- Tcl_GetIntFromObj(interp, objv[4], &reqlength) != TCL_OK) {
+ if (objc < 4 || objc > 7) {
+ str_cmp_args:
+ Tcl_WrongNumArgs(interp, 2, objv,
+ "?-nocase? ?-length int? string1 string2");
return TCL_ERROR;
}
- string1 = Tcl_GetStringFromObj(objv[2], &length1);
- string2 = Tcl_GetStringFromObj(objv[3], &length2);
+ for (i = 2; i < objc-2; i++) {
+ string2 = Tcl_GetStringFromObj(objv[i], &length2);
+ if ((length2 > 1)
+ && strncmp(string2, "-nocase", length2) == 0) {
+ nocase = 1;
+ } else if ((length2 > 1)
+ && strncmp(string2, "-length", length2) == 0) {
+ if (i+1 >= objc-2) {
+ goto str_cmp_args;
+ }
+ if (Tcl_GetIntFromObj(interp, objv[++i],
+ &reqlength) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ } else {
+ Tcl_AppendStringsToObj(resultPtr, "bad option \"",
+ string2, "\": must be -nocase or -length",
+ (char *) NULL);
+ return TCL_ERROR;
+ }
+ }
+ string1 = Tcl_GetStringFromObj(objv[objc-2], &length1);
+ string2 = Tcl_GetStringFromObj(objv[objc-1], &length2);
+ /*
+ * This is the min length IN BYTES of the two strings
+ */
length = (length1 < length2) ? length1 : length2;
- if ((reqlength >= 0) && (reqlength < length)) {
- Tcl_UniChar ch1, ch2;
+ if (reqlength == 0) {
/*
- * reqlength must be interpreted as chars, not bytes
- * we will only enter here when both strings are of
- * at least reqlength chars long (no need for \0 check)
+ * Anything matches at 0 chars, right?
*/
match = 0;
- while (reqlength-- > 0) {
- string1 += Tcl_UtfToUniChar(string1, &ch1);
- string2 += Tcl_UtfToUniChar(string2, &ch2);
- if (ch1 != ch2) {
- match = ch1 - ch2;
- break;
- }
+ } else if (nocase || ((reqlength > 0) && (reqlength < length))) {
+ /*
+ * with -nocase or -length we have to check true char length
+ * as it could be smaller than expected
+ */
+ length1 = Tcl_NumUtfChars(string1, length1);
+ length2 = Tcl_NumUtfChars(string2, length2);
+ length = (length1 < length2) ? length1 : length2;
+ /*
+ * Do the reqlength check again, against 0 as well for
+ * the benfit of nocase
+ */
+ if ((reqlength > 0) && (reqlength < length)) {
+ length = reqlength;
+ }
+ if (nocase) {
+ match = Tcl_UtfNcasecmp(string1, string2,
+ (unsigned)length);
+ } else {
+ match = Tcl_UtfNcmp(string1, string2,
+ (unsigned) length);
+ }
+ if ((match == 0) && (reqlength >= length)) {
+ match = length1 - length2;
}
} else {
match = memcmp(string1, string2, (unsigned) length);
@@ -876,55 +910,8 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
match = length1 - length2;
}
}
- if ((enum options) index == STR_EQUAL) {
- Tcl_SetIntObj(resultPtr, (match) ? 0 : 1);
- } else {
- Tcl_SetIntObj(resultPtr, ((match > 0) ? 1 :
- (match < 0) ? -1 : 0));
- }
- break;
- }
- case STR_IEQUAL:
- case STR_ICOMPARE: {
- int match, length, reqlength = -1;
- Tcl_UniChar ch1, ch2;
-
- if (objc < 4 || objc > 5) {
- Tcl_WrongNumArgs(interp, 2, objv, "string1 string2 ?length?");
- return TCL_ERROR;
- }
- if ((objc == 5) &&
- Tcl_GetIntFromObj(interp, objv[4], &reqlength) != TCL_OK) {
- return TCL_ERROR;
- }
- string1 = Tcl_GetStringFromObj(objv[2], &length1);
- string2 = Tcl_GetStringFromObj(objv[3], &length2);
-
- length = (length1 < length2) ? length1 : length2;
- if ((reqlength >= 0) && (reqlength < length)) {
- length = reqlength;
- }
- /*
- * length must be interpreted as chars, not bytes
- * we will only enter here when both strings are of
- * at least length chars long (no need for \0 check)
- */
- match = 0;
- while (length-- > 0) {
- string1 += Tcl_UtfToUniChar(string1, &ch1);
- string2 += Tcl_UtfToUniChar(string2, &ch2);
- if ((ch1 != ch2) &&
- (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2))) {
- match = ch1 - ch2;
- break;
- }
- }
- if ((match == 0) && (reqlength >= length)) {
- match = length1 - length2;
- }
-
- if ((enum options) index == STR_IEQUAL) {
+ if ((enum options) index == STR_EQUAL) {
Tcl_SetIntObj(resultPtr, (match) ? 0 : 1);
} else {
Tcl_SetIntObj(resultPtr, ((match > 0) ? 1 :
@@ -1002,6 +989,253 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
break;
}
+ case STR_IS: {
+ char *end;
+ Tcl_UniChar ch;
+ int (*chcomp)(int) = NULL; /* The UniChar comparison function */
+ int i, failat = 0, result = 1, strict = 0;
+ Tcl_Obj *objPtr, *failVarObj = NULL;
+
+ static char *isOptions[] = {
+ "alnum", "alpha", "ascii",
+ "boolean", "digit", "double",
+ "false", "integer", "lower",
+ "space", "true", "upper",
+ "wordchar", (char *) NULL
+ };
+ enum isOptions {
+ STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII,
+ STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE,
+ STR_IS_FALSE, STR_IS_INT, STR_IS_LOWER,
+ STR_IS_SPACE, STR_IS_TRUE, STR_IS_UPPER,
+ STR_IS_WORD
+ };
+
+ if (objc < 4 || objc > 7) {
+ Tcl_WrongNumArgs(interp, 2, objv,
+ "class ?-strict? ?-failindex var? str");
+ return TCL_ERROR;
+ }
+ if (Tcl_GetIndexFromObj(interp, objv[2], isOptions, "class", 0,
+ &index) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (objc != 4) {
+ for (i = 3; i < objc-1; i++) {
+ string2 = Tcl_GetStringFromObj(objv[i], &length2);
+ if ((length2 > 1) &&
+ strncmp(string2, "-strict", length2) == 0) {
+ strict = 1;
+ } else if ((length2 > 1) &&
+ strncmp(string2, "-failindex", length2) == 0) {
+ if (i+1 >= objc-1) {
+ Tcl_WrongNumArgs(interp, 3, objv,
+ "?-strict? ?-failindex var? str");
+ return TCL_ERROR;
+ }
+ failVarObj = objv[++i];
+ } else {
+ Tcl_AppendStringsToObj(resultPtr, "bad option \"",
+ string2, "\": must be -strict or -failindex",
+ (char *) NULL);
+ return TCL_ERROR;
+ }
+ }
+ }
+
+ /*
+ * We get the objPtr so that we can short-cut for some classes
+ * by checking the object type (int and double), but we need
+ * the string otherwise, because we don't want any conversion
+ * of type occuring (as, for example, Tcl_Get*FromObj would do
+ */
+ objPtr = objv[objc-1];
+ string1 = Tcl_GetStringFromObj(objPtr, &length1);
+ if (length1 == 0) {
+ if (strict) {
+ result = 0;
+ }
+ goto str_is_done;
+ }
+ end = string1 + length1;
+
+ /*
+ * When entering here, result == 1 and failat == 0
+ */
+ switch ((enum isOptions) index) {
+ case STR_IS_ALNUM:
+ chcomp = Tcl_UniCharIsAlnum;
+ break;
+ case STR_IS_ALPHA:
+ chcomp = Tcl_UniCharIsAlpha;
+ break;
+ case STR_IS_ASCII:
+ for (; string1 < end; string1++, failat++) {
+ /*
+ * This is a valid check in unicode, because all
+ * bytes < 0xC0 are single byte chars (but isascii
+ * limits that def'n to 0x80).
+ */
+ if (*((unsigned char *)string1) >= 0x80) {
+ result = 0;
+ break;
+ }
+ }
+ break;
+ case STR_IS_BOOL:
+ case STR_IS_TRUE:
+ case STR_IS_FALSE:
+ if (objPtr->typePtr == &tclBooleanType) {
+ if ((((enum isOptions) index == STR_IS_TRUE) &&
+ objPtr->internalRep.longValue == 0) ||
+ (((enum isOptions) index == STR_IS_FALSE) &&
+ objPtr->internalRep.longValue != 0)) {
+ result = 0;
+ }
+ } else if ((Tcl_GetBoolean(NULL, string1, &i)
+ == TCL_ERROR) ||
+ (((enum isOptions) index == STR_IS_TRUE) &&
+ i == 0) ||
+ (((enum isOptions) index == STR_IS_FALSE) &&
+ i != 0)) {
+ result = 0;
+ }
+ break;
+ case STR_IS_DIGIT:
+ chcomp = Tcl_UniCharIsDigit;
+ break;
+ case STR_IS_DOUBLE: {
+ char *stop;
+
+ if ((objPtr->typePtr == &tclDoubleType) ||
+ (objPtr->typePtr == &tclIntType)) {
+ break;
+ }
+ /*
+ * This is adapted from Tcl_GetDouble
+ *
+ * The danger in this function is that
+ * "12345678901234567890" is an acceptable 'double',
+ * but will later be interp'd as an int by something
+ * like [expr]. Therefore, we check to see if it looks
+ * like an int, and if so we do a range check on it.
+ * If strtoul gets to the end, we know we either
+ * received an acceptable int, or over/underflow
+ */
+ if (TclLooksLikeInt(string1, length1)) {
+ errno = 0;
+ strtoul(string1, &stop, 0);
+ if (stop == end) {
+ if (errno == ERANGE) {
+ result = 0;
+ failat = -1;
+ }
+ break;
+ }
+ }
+ errno = 0;
+ strtod(string1, &stop); /* INTL: Tcl source. */
+ if (errno == ERANGE) {
+ /*
+ * if (errno == ERANGE), then it was an over/underflow
+ * problem, but in this method, we only want to know
+ * yes or no, so bad flow returns 0 (false) and sets
+ * the failVarObj to the string length.
+ */
+ result = 0;
+ failat = -1;
+ } else if (stop == string1) {
+ /*
+ * In this case, nothing like a number was found
+ */
+ result = 0;
+ failat = 0;
+ } else {
+ /*
+ * Assume we sucked up one char per byte
+ * and then we go onto SPACE, since we are
+ * allowed trailing whitespace
+ */
+ failat = stop - string1;
+ string1 = stop;
+ chcomp = Tcl_UniCharIsSpace;
+ }
+ break;
+ }
+ case STR_IS_INT: {
+ char *stop;
+
+ if ((objPtr->typePtr == &tclIntType) ||
+ (Tcl_GetInt(NULL, string1, &i) == TCL_OK)) {
+ break;
+ }
+ /*
+ * Like STR_IS_DOUBLE, but we don't use strtoul.
+ * Since Tcl_GetInt already failed, we set result to 0.
+ */
+ result = 0;
+ errno = 0;
+ strtoul(string1, &stop, 0); /* INTL: Tcl source. */
+ if (errno == ERANGE) {
+ /*
+ * if (errno == ERANGE), then it was an over/underflow
+ * problem, but in this method, we only want to know
+ * yes or no, so bad flow returns 0 (false) and sets
+ * the failVarObj to the string length.
+ */
+ failat = -1;
+ } else if (stop == string1) {
+ /*
+ * In this case, nothing like a number was found
+ */
+ failat = 0;
+ } else {
+ /*
+ * Assume we sucked up one char per byte
+ * and then we go onto SPACE, since we are
+ * allowed trailing whitespace
+ */
+ failat = stop - string1;
+ string1 = stop;
+ chcomp = Tcl_UniCharIsSpace;
+ }
+ break;
+ }
+ case STR_IS_LOWER:
+ chcomp = Tcl_UniCharIsLower;
+ break;
+ case STR_IS_SPACE:
+ chcomp = Tcl_UniCharIsSpace;
+ break;
+ case STR_IS_UPPER:
+ chcomp = Tcl_UniCharIsUpper;
+ break;
+ case STR_IS_WORD:
+ chcomp = Tcl_UniCharIsWordChar;
+ break;
+ }
+ if (chcomp != NULL) {
+ for (; string1 < end; string1 += length2, failat++) {
+ length2 = Tcl_UtfToUniChar(string1, &ch);
+ if (!chcomp(ch)) {
+ result = 0;
+ break;
+ }
+ }
+ }
+ str_is_done:
+ /*
+ * Only set the failVarObj when we will return 0
+ * and we have indicated a valid fail index (>= 0)
+ */
+ if ((result == 0) && (failVarObj != NULL) &&
+ Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(failat),
+ TCL_LEAVE_ERR_MSG) == NULL) {
+ return TCL_ERROR;
+ }
+ Tcl_SetBooleanObj(resultPtr, result);
+ break;
+ }
case STR_LAST: {
register char *p;
int match;
@@ -1073,21 +1307,40 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
break;
}
case STR_MAP: {
- int mapElemc, len;
+ int uselen, mapElemc, len, nocase = 0;
Tcl_Obj **mapElemv;
char *end;
Tcl_UniChar ch;
+ int (*str_comp_fn)();
- if (objc != 4) {
- Tcl_WrongNumArgs(interp, 2, objv, "charMap string");
+ if (objc < 4 || objc > 5) {
+ Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? charMap string");
return TCL_ERROR;
}
+ if (objc == 5) {
+ string2 = Tcl_GetStringFromObj(objv[2], &length2);
+ if ((length2 > 1) &&
+ strncmp(string2, "-nocase", length2) == 0) {
+ nocase = 1;
+ } else {
+ Tcl_AppendStringsToObj(resultPtr, "bad option \"",
+ string2, "\": must be -nocase",
+ (char *) NULL);
+ return TCL_ERROR;
+ }
+ }
+
if (Tcl_ListObjGetElements(interp, objv[objc-2], &mapElemc,
&mapElemv) != TCL_OK) {
return TCL_ERROR;
}
- if (mapElemc & 1) {
+ if (mapElemc == 0) {
+ /*
+ * empty charMap, just return whatever string was given
+ */
+ Tcl_SetObjResult(interp, objv[objc-1]);
+ } else if (mapElemc & 1) {
/*
* The charMap must be an even number of key/value items
*/
@@ -1100,21 +1353,35 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
}
end = string1 + length1;
+ if (nocase) {
+ length1 = Tcl_NumUtfChars(string1, length1);
+ str_comp_fn = Tcl_UtfNcasecmp;
+ } else {
+ str_comp_fn = memcmp;
+ }
+
for ( ; string1 < end; string1 += len) {
len = Tcl_UtfToUniChar(string1, &ch);
for (index = 0; index < mapElemc; index +=2) {
/*
* Get the key string to match on
*/
- string2 = Tcl_GetStringFromObj(mapElemv[index], &length2);
- if ((*string2 == *string1) &&
- (memcmp(string2, string1, length2) == 0)) {
+ string2 = Tcl_GetStringFromObj(mapElemv[index],
+ &length2);
+ if (nocase) {
+ uselen = Tcl_NumUtfChars(string2, length2);
+ } else {
+ uselen = length2;
+ }
+ if ((uselen <= length1) &&
+ (str_comp_fn(string2, string1, uselen) == 0)) {
/*
* Adjust len to be full length of matched string
+ * it has to be the BYTE length
*/
len = length2;
/*
- * Change string2 and length2 to the replacement value
+ * Change string2 and length2 to the map value
*/
string2 = Tcl_GetStringFromObj(mapElemv[index+1],
&length2);
@@ -1128,6 +1395,15 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
*/
Tcl_AppendToObj(resultPtr, string1, len);
}
+ /*
+ * in nocase, length1 is in chars
+ * otherwise it is in bytes
+ */
+ if (nocase) {
+ length1--;
+ } else {
+ length1 -= len;
+ }
}
break;
}
@@ -1246,19 +1522,19 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
if (objc == 3) {
/*
- * Since the result object is not a shared object, it is
- * safe to copy the string into the result and do the
- * conversion in place. The conversion may change the length
- * of the string, so reset the length after conversion.
- */
+ * Since the result object is not a shared object, it is
+ * safe to copy the string into the result and do the
+ * conversion in place. The conversion may change the length
+ * of the string, so reset the length after conversion.
+ */
Tcl_SetStringObj(resultPtr, string1, length1);
if ((enum options) index == STR_TOLOWER) {
- length1 = Tcl_UtfToLower(Tcl_GetStringFromObj(resultPtr, NULL));
+ length1 = Tcl_UtfToLower(Tcl_GetString(resultPtr));
} else if ((enum options) index == STR_TOUPPER) {
- length1 = Tcl_UtfToUpper(Tcl_GetStringFromObj(resultPtr, NULL));
+ length1 = Tcl_UtfToUpper(Tcl_GetString(resultPtr));
} else {
- length1 = Tcl_UtfToTitle(Tcl_GetStringFromObj(resultPtr, NULL));
+ length1 = Tcl_UtfToTitle(Tcl_GetString(resultPtr));
}
Tcl_SetObjLength(resultPtr, length1);
} else {