summaryrefslogtreecommitdiffstats
path: root/generic/tclCmdMZ.c
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2007-11-22 16:39:57 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2007-11-22 16:39:57 (GMT)
commit14a34c4087e1034699a9b588da8b0a9927479f45 (patch)
treebd55f87de299c03dc8081b5c4a78548493babbd1 /generic/tclCmdMZ.c
parent42acb79e42e426c61ca95ebfac491b77ac807f29 (diff)
downloadtcl-14a34c4087e1034699a9b588da8b0a9927479f45.zip
tcl-14a34c4087e1034699a9b588da8b0a9927479f45.tar.gz
tcl-14a34c4087e1034699a9b588da8b0a9927479f45.tar.bz2
Rewrote the [string] and [dict] implementations to be ready for conversion
to ensembles.
Diffstat (limited to 'generic/tclCmdMZ.c')
-rw-r--r--generic/tclCmdMZ.c3034
1 files changed, 1955 insertions, 1079 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 662b201..c421d28 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -15,7 +15,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.158 2007/11/19 11:13:10 dkf Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.159 2007/11/22 16:39:57 dkf Exp $
*/
#include "tclInt.h"
@@ -1093,16 +1093,11 @@ Tcl_SplitObjCmd(
/*
*----------------------------------------------------------------------
*
- * Tcl_StringObjCmd --
- *
- * This procedure is invoked to process the "string" Tcl command. See the
- * user documentation for details on what it does. Note that this command
- * only functions correctly on properly formed Tcl UTF strings.
+ * StringFirstCmd --
*
- * Note that the primary methods here (equal, compare, match, ...) have
- * bytecode equivalents. You will find the code for those in
- * tclExecute.c. The code here will only be used in the non-bc case (like
- * in an 'eval').
+ * This procedure is invoked to process the "string first" Tcl command.
+ * See the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
*
* Results:
* A standard Tcl result.
@@ -1113,1336 +1108,2217 @@ Tcl_SplitObjCmd(
*----------------------------------------------------------------------
*/
-int
-Tcl_StringObjCmd(
+static int
+StringFirstCmd(
ClientData dummy, /* Not used. */
Tcl_Interp *interp, /* Current interpreter. */
int objc, /* Number of arguments. */
- Tcl_Obj *CONST objv[]) /* Argument objects. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
{
- int index, left, right;
- char *string1, *string2;
- int length1, length2;
- static CONST char *options[] = {
- "bytelength", "compare", "equal", "first",
- "index", "is", "last", "length",
- "map", "match", "range", "repeat",
- "replace", "reverse", "tolower", "toupper",
- "totitle", "trim", "trimleft", "trimright",
- "wordend", "wordstart", NULL
- };
- enum options {
- STR_BYTELENGTH, STR_COMPARE, STR_EQUAL, STR_FIRST,
- STR_INDEX, STR_IS, STR_LAST, STR_LENGTH,
- STR_MAP, STR_MATCH, STR_RANGE, STR_REPEAT,
- STR_REPLACE, STR_REVERSE, STR_TOLOWER, STR_TOUPPER,
- STR_TOTITLE, STR_TRIM, STR_TRIMLEFT, STR_TRIMRIGHT,
- STR_WORDEND, STR_WORDSTART
- };
-
- if (objc < 2) {
- Tcl_WrongNumArgs(interp, 1, objv, "option arg ?arg ...?");
- return TCL_ERROR;
- }
+ Tcl_UniChar *ustring1, *ustring2;
+ int match, start, length1, length2;
- if (Tcl_GetIndexFromObj(interp, objv[1], options, "option", 0,
- &index) != TCL_OK) {
+ if (objc < 4 || objc > 5) {
+ Tcl_WrongNumArgs(interp, 2, objv,
+ "needleString haystackString ?startIndex?");
return TCL_ERROR;
}
- switch ((enum options) index) {
- case STR_EQUAL:
- case STR_COMPARE: {
- /*
- * Remember to keep code here in some sync with the byte-compiled
- * versions in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and
- * INST_STR_CMP as well as the expr string comparison in
- * INST_EQ/INST_NEQ/INST_LT/...).
- */
-
- int i, match, length, nocase = 0, reqlength = -1;
- typedef int (*strCmpFn_t)(const char *, const char *, unsigned int);
- strCmpFn_t strCmpFn;
+ /*
+ * We are searching string2 for the sequence string1.
+ */
- if (objc < 4 || objc > 7) {
- str_cmp_args:
- Tcl_WrongNumArgs(interp, 2, objv,
- "?-nocase? ?-length int? string1 string2");
- return TCL_ERROR;
- }
+ match = -1;
+ start = 0;
+ length2 = -1;
- for (i = 2; i < objc-2; i++) {
- string2 = TclGetStringFromObj(objv[i], &length2);
- if ((length2 > 1)
- && strncmp(string2, "-nocase", (size_t)length2) == 0) {
- nocase = 1;
- } else if ((length2 > 1)
- && strncmp(string2, "-length", (size_t)length2) == 0) {
- if (i+1 >= objc-2) {
- goto str_cmp_args;
- }
- ++i;
- if (TclGetIntFromObj(interp, objv[i],
- &reqlength) != TCL_OK) {
- return TCL_ERROR;
- }
- } else {
- Tcl_AppendResult(interp, "bad option \"", string2,
- "\": must be -nocase or -length", NULL);
- return TCL_ERROR;
- }
- }
+ ustring1 = Tcl_GetUnicodeFromObj(objv[2], &length1);
+ ustring2 = Tcl_GetUnicodeFromObj(objv[3], &length2);
+ if (objc == 5) {
/*
- * From now on, we only access the two objects at the end of the
- * argument array.
+ * If a startIndex is specified, we will need to fast forward to that
+ * point in the string before we think about a match.
*/
- objv += objc-2;
-
- if ((reqlength == 0) || (objv[0] == objv[1])) {
+ if (TclGetIntForIndexM(interp, objv[4], length2-1, &start) != TCL_OK){
+ return TCL_ERROR;
+ }
+ if (start >= length2) {
+ goto str_first_done;
+ } else if (start > 0) {
+ ustring2 += start;
+ length2 -= start;
+ } else if (start < 0) {
/*
- * Always match at 0 chars of if it is the same obj.
+ * Invalid start index mapped to string start; Bug #423581
*/
- Tcl_SetObjResult(interp,
- Tcl_NewBooleanObj((enum options) index == STR_EQUAL));
- break;
- } else if (!nocase && objv[0]->typePtr == &tclByteArrayType &&
- objv[1]->typePtr == &tclByteArrayType) {
- /*
- * Use binary versions of comparisons since that won't cause undue
- * type conversions and it is much faster. Only do this if we're
- * case-sensitive (which is all that really makes sense with byte
- * arrays anyway, and we have no memcasecmp() for some
- * reason... :^)
- */
+ start = 0;
+ }
+ }
- string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1);
- string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2);
- strCmpFn = (strCmpFn_t) memcmp;
- } else if ((objv[0]->typePtr == &tclStringType)
- && (objv[1]->typePtr == &tclStringType)) {
- /*
- * Do a unicode-specific comparison if both of the args are of
- * String type. In benchmark testing this proved the most
- * efficient check between the unicode and string comparison
- * operations.
- */
+ if (length1 > 0) {
+ register Tcl_UniChar *p, *end;
- string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1);
- string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2);
- strCmpFn = (strCmpFn_t)
- (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
- } else {
+ end = ustring2 + length2 - length1 + 1;
+ for (p = ustring2; p < end; p++) {
/*
- * As a catch-all we will work with UTF-8. We cannot use memcmp()
- * as that is unsafe with any string containing NULL (\xC0\x80 in
- * Tcl's utf rep). We can use the more efficient TclpUtfNcmp2 if
- * we are case-sensitive and no specific length was requested.
+ * Scan forward to find the first character.
*/
- string1 = (char *) TclGetStringFromObj(objv[0], &length1);
- string2 = (char *) TclGetStringFromObj(objv[1], &length2);
- if ((reqlength < 0) && !nocase) {
- strCmpFn = (strCmpFn_t) TclpUtfNcmp2;
- } else {
- length1 = Tcl_NumUtfChars(string1, length1);
- length2 = Tcl_NumUtfChars(string2, length2);
- strCmpFn = (strCmpFn_t)
- (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
+ if ((*p == *ustring1) && (TclUniCharNcmp(ustring1, p,
+ (unsigned long) length1) == 0)) {
+ match = p - ustring2;
+ break;
}
}
+ }
- if (((enum options) index == STR_EQUAL)
- && (reqlength < 0) && (length1 != length2)) {
- match = 1; /* This will be reversed below. */
- } else {
- length = (length1 < length2) ? length1 : length2;
- if (reqlength > 0 && reqlength < length) {
- length = reqlength;
- } else if (reqlength < 0) {
- /*
- * The requested length is negative, so we ignore it by
- * setting it to length + 1 so we correct the match var.
- */
+ /*
+ * Compute the character index of the matching string by counting the
+ * number of characters before the match.
+ */
- reqlength = length + 1;
- }
+ if ((match != -1) && (objc == 5)) {
+ match += start;
+ }
- match = strCmpFn(string1, string2, (unsigned) length);
- if ((match == 0) && (reqlength > length)) {
- match = length1 - length2;
- }
- }
+ str_first_done:
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(match));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringLastCmd --
+ *
+ * This procedure is invoked to process the "string last" Tcl command.
+ * See the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
- if ((enum options) index == STR_EQUAL) {
- Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1));
- } else {
- Tcl_SetObjResult(interp, Tcl_NewIntObj(
- (match > 0) ? 1 : (match < 0) ? -1 : 0));
- }
- break;
+static int
+StringLastCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ Tcl_UniChar *ustring1, *ustring2, *p;
+ int match, start, length1, length2;
+
+ if (objc < 4 || objc > 5) {
+ Tcl_WrongNumArgs(interp, 2, objv,
+ "needleString haystackString ?startIndex?");
+ return TCL_ERROR;
}
- case STR_FIRST: {
- Tcl_UniChar *ustring1, *ustring2;
- int match, start;
- if (objc < 4 || objc > 5) {
- Tcl_WrongNumArgs(interp, 2, objv,
- "needleString haystackString ?startIndex?");
- return TCL_ERROR;
- }
+ /*
+ * We are searching string2 for the sequence string1.
+ */
+ match = -1;
+ start = 0;
+ length2 = -1;
+
+ ustring1 = Tcl_GetUnicodeFromObj(objv[2], &length1);
+ ustring2 = Tcl_GetUnicodeFromObj(objv[3], &length2);
+
+ if (objc == 5) {
/*
- * We are searching string2 for the sequence string1.
+ * If a startIndex is specified, we will need to restrict the string
+ * range to that char index in the string
*/
- match = -1;
- start = 0;
- length2 = -1;
-
- ustring1 = Tcl_GetUnicodeFromObj(objv[2], &length1);
- ustring2 = Tcl_GetUnicodeFromObj(objv[3], &length2);
+ if (TclGetIntForIndexM(interp, objv[4], length2-1, &start) != TCL_OK){
+ return TCL_ERROR;
+ }
+ if (start < 0) {
+ goto str_last_done;
+ } else if (start < length2) {
+ p = ustring2 + start + 1 - length1;
+ } else {
+ p = ustring2 + length2 - length1;
+ }
+ } else {
+ p = ustring2 + length2 - length1;
+ }
- if (objc == 5) {
+ if (length1 > 0) {
+ for (; p >= ustring2; p--) {
/*
- * If a startIndex is specified, we will need to fast forward to
- * that point in the string before we think about a match.
+ * Scan backwards to find the first character.
*/
- if (TclGetIntForIndexM(interp, objv[4], length2 - 1,
- &start) != TCL_OK) {
- return TCL_ERROR;
- }
- if (start >= length2) {
- goto str_first_done;
- } else if (start > 0) {
- ustring2 += start;
- length2 -= start;
- } else if (start < 0) {
- /*
- * Invalid start index mapped to string start; Bug #423581
- */
-
- start = 0;
+ if ((*p == *ustring1) && !memcmp(ustring1, p,
+ sizeof(Tcl_UniChar) * (size_t)length1)) {
+ match = p - ustring2;
+ break;
}
}
+ }
+
+ str_last_done:
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(match));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringIndexCmd --
+ *
+ * This procedure is invoked to process the "string index" Tcl command.
+ * See the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
- if (length1 > 0) {
- register Tcl_UniChar *p, *end;
+static int
+StringIndexCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ int length, index;
- end = ustring2 + length2 - length1 + 1;
- for (p = ustring2; p < end; p++) {
- /*
- * Scan forward to find the first character.
- */
- if ((*p == *ustring1) && (TclUniCharNcmp(ustring1, p,
- (unsigned long) length1) == 0)) {
- match = p - ustring2;
- break;
- }
- }
- }
+ if (objc != 4) {
+ Tcl_WrongNumArgs(interp, 2, objv, "string charIndex");
+ return TCL_ERROR;
+ }
- /*
- * Compute the character index of the matching string by counting the
- * number of characters before the match.
- */
+ /*
+ * If we have a ByteArray object, avoid indexing in the Utf string since
+ * the byte array contains one byte per character. Otherwise, use the
+ * Unicode string rep to get the index'th char.
+ */
- if ((match != -1) && (objc == 5)) {
- match += start;
- }
+ if (objv[2]->typePtr == &tclByteArrayType) {
+ const unsigned char *string =
+ Tcl_GetByteArrayFromObj(objv[2], &length);
- str_first_done:
- Tcl_SetObjResult(interp, Tcl_NewIntObj(match));
- break;
- }
- case STR_INDEX: {
- if (objc != 4) {
- Tcl_WrongNumArgs(interp, 2, objv, "string charIndex");
+ if (TclGetIntForIndexM(interp, objv[3], length-1, &index) != TCL_OK){
return TCL_ERROR;
}
-
+ if ((index >= 0) && (index < length)) {
+ Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(string + index, 1));
+ }
+ } else {
/*
- * If we have a ByteArray object, avoid indexing in the Utf string
- * since the byte array contains one byte per character. Otherwise,
- * use the Unicode string rep to get the index'th char.
+ * Get Unicode char length to calulate what 'end' means.
*/
- if (objv[2]->typePtr == &tclByteArrayType) {
- string1 = (char *) Tcl_GetByteArrayFromObj(objv[2], &length1);
-
- if (TclGetIntForIndexM(interp, objv[3], length1 - 1,
- &index) != TCL_OK) {
- return TCL_ERROR;
- }
- if ((index >= 0) && (index < length1)) {
- Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(
- (unsigned char *)(&string1[index]), 1));
- }
- } else {
- /*
- * Get Unicode char length to calulate what 'end' means.
- */
-
- length1 = Tcl_GetCharLength(objv[2]);
+ length = Tcl_GetCharLength(objv[2]);
- if (TclGetIntForIndexM(interp, objv[3], length1 - 1,
- &index) != TCL_OK) {
- return TCL_ERROR;
- }
- if ((index >= 0) && (index < length1)) {
- char buf[TCL_UTF_MAX];
- Tcl_UniChar ch;
+ if (TclGetIntForIndexM(interp, objv[3], length-1, &index) != TCL_OK){
+ return TCL_ERROR;
+ }
+ if ((index >= 0) && (index < length)) {
+ char buf[TCL_UTF_MAX];
+ Tcl_UniChar ch;
- ch = Tcl_GetUniChar(objv[2], index);
- length1 = Tcl_UniCharToUtf(ch, buf);
- Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length1));
- }
+ ch = Tcl_GetUniChar(objv[2], index);
+ length = Tcl_UniCharToUtf(ch, buf);
+ Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length));
}
- break;
}
- case STR_IS: {
- char *end, *stop;
- Tcl_UniChar ch;
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringIsCmd --
+ *
+ * This procedure is invoked to process the "string is" Tcl command. See
+ * the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
- /*
- * The UniChar comparison function
- */
+static int
+StringIsCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ const char *string1, *string2, *end, *stop;
+ Tcl_UniChar ch;
+ int (*chcomp)(int) = NULL; /* The UniChar comparison function. */
+ int i, failat = 0, result = 1, strict = 0, index, length1, length2;
+ Tcl_Obj *objPtr, *failVarObj = NULL;
+ Tcl_WideInt w;
+
+ static const char *isOptions[] = {
+ "alnum", "alpha", "ascii", "control",
+ "boolean", "digit", "double", "false",
+ "graph", "integer", "list", "lower",
+ "print", "punct", "space", "true",
+ "upper", "wideinteger", "wordchar", "xdigit",
+ NULL
+ };
+ enum isOptions {
+ STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL,
+ STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, STR_IS_FALSE,
+ STR_IS_GRAPH, STR_IS_INT, STR_IS_LIST, STR_IS_LOWER,
+ STR_IS_PRINT, STR_IS_PUNCT, STR_IS_SPACE, STR_IS_TRUE,
+ STR_IS_UPPER, STR_IS_WIDE, STR_IS_WORD, STR_IS_XDIGIT
+ };
- int (*chcomp)(int) = NULL;
- int i, failat = 0, result = 1, strict = 0;
- Tcl_Obj *objPtr, *failVarObj = NULL;
- Tcl_WideInt w;
-
- static CONST char *isOptions[] = {
- "alnum", "alpha", "ascii", "control",
- "boolean", "digit", "double", "false",
- "graph", "integer", "list", "lower",
- "print", "punct", "space", "true",
- "upper", "wideinteger", "wordchar", "xdigit",
- NULL
- };
- enum isOptions {
- STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL,
- STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, STR_IS_FALSE,
- STR_IS_GRAPH, STR_IS_INT, STR_IS_LIST, STR_IS_LOWER,
- STR_IS_PRINT, STR_IS_PUNCT, STR_IS_SPACE, STR_IS_TRUE,
- STR_IS_UPPER, STR_IS_WIDE, STR_IS_WORD, STR_IS_XDIGIT
- };
+ if (objc < 4 || objc > 7) {
+ Tcl_WrongNumArgs(interp, 2, objv,
+ "class ?-strict? ?-failindex var? str");
+ return TCL_ERROR;
+ }
+ if (Tcl_GetIndexFromObj(interp, objv[2], isOptions, "class", 0,
+ &index) != TCL_OK) {
+ return TCL_ERROR;
+ }
- if (objc < 4 || objc > 7) {
- Tcl_WrongNumArgs(interp, 2, objv,
- "class ?-strict? ?-failindex var? str");
- return TCL_ERROR;
- }
- if (Tcl_GetIndexFromObj(interp, objv[2], isOptions, "class", 0,
- &index) != TCL_OK) {
- return TCL_ERROR;
- }
- if (objc != 4) {
- for (i = 3; i < objc-1; i++) {
- string2 = TclGetStringFromObj(objv[i], &length2);
- if ((length2 > 1) &&
- strncmp(string2, "-strict", (size_t) length2) == 0) {
- strict = 1;
- } else if ((length2 > 1) &&
- strncmp(string2, "-failindex", (size_t)length2) == 0){
- if (i+1 >= objc-1) {
- Tcl_WrongNumArgs(interp, 3, objv,
- "?-strict? ?-failindex var? str");
- return TCL_ERROR;
- }
- failVarObj = objv[++i];
- } else {
- Tcl_AppendResult(interp, "bad option \"", string2,
- "\": must be -strict or -failindex", NULL);
+ if (objc != 4) {
+ for (i = 3; i < objc-1; i++) {
+ string2 = TclGetStringFromObj(objv[i], &length2);
+ if ((length2 > 1) &&
+ strncmp(string2, "-strict", (size_t) length2) == 0) {
+ strict = 1;
+ } else if ((length2 > 1) &&
+ strncmp(string2, "-failindex", (size_t)length2) == 0){
+ if (i+1 >= objc-1) {
+ Tcl_WrongNumArgs(interp, 3, objv,
+ "?-strict? ?-failindex var? str");
return TCL_ERROR;
}
+ failVarObj = objv[++i];
+ } else {
+ Tcl_AppendResult(interp, "bad option \"", string2,
+ "\": must be -strict or -failindex", NULL);
+ return TCL_ERROR;
}
}
+ }
- /*
- * We get the objPtr so that we can short-cut for some classes by
- * checking the object type (int and double), but we need the string
- * otherwise, because we don't want any conversion of type occuring
- * (as, for example, Tcl_Get*FromObj would do
- */
+ /*
+ * We get the objPtr so that we can short-cut for some classes by checking
+ * the object type (int and double), but we need the string otherwise,
+ * because we don't want any conversion of type occuring (as, for example,
+ * Tcl_Get*FromObj would do).
+ */
- objPtr = objv[objc-1];
- string1 = TclGetStringFromObj(objPtr, &length1);
- if (length1 == 0 && index != STR_IS_LIST) {
- if (strict) {
- result = 0;
- }
- goto str_is_done;
+ objPtr = objv[objc-1];
+ string1 = TclGetStringFromObj(objPtr, &length1);
+ if (length1 == 0 && index != STR_IS_LIST) {
+ if (strict) {
+ result = 0;
}
- end = string1 + length1;
+ goto str_is_done;
+ }
+ end = string1 + length1;
- /*
- * When entering here, result == 1 and failat == 0
- */
+ /*
+ * When entering here, result == 1 and failat == 0.
+ */
- switch ((enum isOptions) index) {
- case STR_IS_ALNUM:
- chcomp = Tcl_UniCharIsAlnum;
- break;
- case STR_IS_ALPHA:
- chcomp = Tcl_UniCharIsAlpha;
- break;
- case STR_IS_ASCII:
- chcomp = UniCharIsAscii;
- break;
- case STR_IS_BOOL:
- case STR_IS_TRUE:
- case STR_IS_FALSE:
- if (TCL_OK != Tcl_ConvertToType(NULL, objPtr, &tclBooleanType)) {
- result = 0;
- } else if ((((enum isOptions) index == STR_IS_TRUE) &&
- objPtr->internalRep.longValue == 0) ||
- (((enum isOptions) index == STR_IS_FALSE) &&
- objPtr->internalRep.longValue != 0)) {
- result = 0;
- }
- break;
- case STR_IS_CONTROL:
- chcomp = Tcl_UniCharIsControl;
- break;
- case STR_IS_DIGIT:
- chcomp = Tcl_UniCharIsDigit;
- break;
- case STR_IS_DOUBLE: {
- /* TODO */
- if ((objPtr->typePtr == &tclDoubleType) ||
- (objPtr->typePtr == &tclIntType) ||
+ switch ((enum isOptions) index) {
+ case STR_IS_ALNUM:
+ chcomp = Tcl_UniCharIsAlnum;
+ break;
+ case STR_IS_ALPHA:
+ chcomp = Tcl_UniCharIsAlpha;
+ break;
+ case STR_IS_ASCII:
+ chcomp = UniCharIsAscii;
+ break;
+ case STR_IS_BOOL:
+ case STR_IS_TRUE:
+ case STR_IS_FALSE:
+ if (TCL_OK != Tcl_ConvertToType(NULL, objPtr, &tclBooleanType)) {
+ result = 0;
+ } else if ((((enum isOptions) index == STR_IS_TRUE) &&
+ objPtr->internalRep.longValue == 0)
+ || (((enum isOptions) index == STR_IS_FALSE) &&
+ objPtr->internalRep.longValue != 0)) {
+ result = 0;
+ }
+ break;
+ case STR_IS_CONTROL:
+ chcomp = Tcl_UniCharIsControl;
+ break;
+ case STR_IS_DIGIT:
+ chcomp = Tcl_UniCharIsDigit;
+ break;
+ case STR_IS_DOUBLE: {
+ /* TODO */
+ if ((objPtr->typePtr == &tclDoubleType) ||
+ (objPtr->typePtr == &tclIntType) ||
#ifndef NO_WIDE_TYPE
- (objPtr->typePtr == &tclWideIntType) ||
+ (objPtr->typePtr == &tclWideIntType) ||
#endif
- (objPtr->typePtr == &tclBignumType)) {
- break;
- }
- if (TclParseNumber(NULL, objPtr, NULL, NULL, -1,
- (const char **) &stop, 0) != TCL_OK) {
+ (objPtr->typePtr == &tclBignumType)) {
+ break;
+ }
+ if (TclParseNumber(NULL, objPtr, NULL, NULL, -1,
+ (const char **) &stop, 0) != TCL_OK) {
+ result = 0;
+ failat = 0;
+ } else {
+ failat = stop - string1;
+ if (stop < end) {
result = 0;
- failat = 0;
- } else {
- failat = stop - string1;
- if (stop < end) {
- result = 0;
- TclFreeIntRep(objPtr);
- objPtr->typePtr = NULL;
- }
+ TclFreeIntRep(objPtr);
+ objPtr->typePtr = NULL;
}
+ }
+ break;
+ }
+ case STR_IS_GRAPH:
+ chcomp = Tcl_UniCharIsGraph;
+ break;
+ case STR_IS_INT:
+ case STR_IS_WIDE:
+ if ((((enum isOptions) index) == STR_IS_INT)
+ && (TCL_OK == TclGetIntFromObj(NULL, objPtr, &i))) {
break;
}
- case STR_IS_GRAPH:
- chcomp = Tcl_UniCharIsGraph;
+ if ((((enum isOptions) index) == STR_IS_WIDE)
+ && (TCL_OK == Tcl_GetWideIntFromObj(NULL, objPtr, &w))) {
break;
- case STR_IS_INT:
- case STR_IS_WIDE:
- if ((((enum isOptions) index) == STR_IS_INT)
- && (TCL_OK == TclGetIntFromObj(NULL, objPtr, &i))) {
- break;
- }
- if ((((enum isOptions) index) == STR_IS_WIDE)
- && (TCL_OK == Tcl_GetWideIntFromObj(NULL, objPtr, &w))) {
- break;
- }
+ }
- result = 0;
+ result = 0;
+
+ if (failVarObj == NULL) {
+ /*
+ * Don't bother computing the failure point if we're not going to
+ * return it.
+ */
- if (failVarObj == NULL) {
+ break;
+ }
+ if (TclParseNumber(NULL, objPtr, NULL, NULL, -1,
+ (const char **) &stop, TCL_PARSE_INTEGER_ONLY) == TCL_OK) {
+ if (stop == end) {
/*
- * Don't bother computing the failure point if we're not
- * going to return it.
+ * Entire string parses as an integer, but rejected by
+ * Tcl_Get(Wide)IntFromObj() so we must have overflowed the
+ * target type, and our convention is to return failure at
+ * index -1 in that situation.
*/
- break;
- }
- if (TclParseNumber(NULL, objPtr, NULL, NULL, -1,
- (const char **) &stop, TCL_PARSE_INTEGER_ONLY) == TCL_OK) {
- if (stop == end) {
- /*
- * Entire string parses as an integer, but rejected by
- * Tcl_Get(Wide)IntFromObj() so we must have overflowed
- * the target type, and our convention is to return
- * failure at index -1 in that situation.
- */
- failat = -1;
- } else {
- /*
- * Some prefix parsed as an integer, but not the whole
- * string, so return failure index as the point where
- * parsing stopped. Clear out the internal rep, since
- * keeping it would leave *objPtr in an inconsistent
- * state.
- */
- failat = stop - string1;
- TclFreeIntRep(objPtr);
- objPtr->typePtr = NULL;
- }
+
+ failat = -1;
} else {
- /* No prefix is a valid integer. Fail at beginning. */
- failat = 0;
+ /*
+ * Some prefix parsed as an integer, but not the whole string,
+ * so return failure index as the point where parsing stopped.
+ * Clear out the internal rep, since keeping it would leave
+ * *objPtr in an inconsistent state.
+ */
+
+ failat = stop - string1;
+ TclFreeIntRep(objPtr);
+ objPtr->typePtr = NULL;
}
- break;
- case STR_IS_LIST:
+ } else {
/*
- * We ignore the strictness here, since empty strings are always
- * well-formed lists.
+ * No prefix is a valid integer. Fail at beginning.
*/
- if (TCL_OK == TclListObjLength(NULL, objPtr, &length2)) {
- break;
- }
+ failat = 0;
+ }
+ break;
+ case STR_IS_LIST:
+ /*
+ * We ignore the strictness here, since empty strings are always
+ * well-formed lists.
+ */
- if (failVarObj != NULL) {
- /*
- * Need to figure out where the list parsing failed, which is
- * fairly expensive. This is adapted from the core of
- * SetListFromAny().
- */
+ if (TCL_OK == TclListObjLength(NULL, objPtr, &length2)) {
+ break;
+ }
+
+ if (failVarObj != NULL) {
+ /*
+ * Need to figure out where the list parsing failed, which is
+ * fairly expensive. This is adapted from the core of
+ * SetListFromAny().
+ */
- const char *elemStart, *nextElem, *limit;
- int lenRemain, elemSize, hasBrace;
- register const char *p;
+ const char *elemStart, *nextElem, *limit;
+ int lenRemain, elemSize, hasBrace;
+ register const char *p;
- limit = string1 + length1;
- failat = -1;
- for (p=string1, lenRemain=length1; lenRemain > 0;
- p = nextElem, lenRemain = (limit-nextElem)) {
- if (TCL_ERROR == TclFindElement(NULL, p, lenRemain,
- &elemStart, &nextElem, &elemSize, &hasBrace)) {
- /*
- * This is the simplest way of getting the number of
- * characters parsed. Note that this is not the same
- * as the number of bytes when parsing strings with
- * non-ASCII characters in them.
- */
+ limit = string1 + length1;
+ failat = -1;
+ for (p=string1, lenRemain=length1; lenRemain > 0;
+ p = nextElem, lenRemain = (limit-nextElem)) {
+ if (TCL_ERROR == TclFindElement(NULL, p, lenRemain,
+ &elemStart, &nextElem, &elemSize, &hasBrace)) {
+ /*
+ * This is the simplest way of getting the number of
+ * characters parsed. Note that this is not the same as
+ * the number of bytes when parsing strings with non-ASCII
+ * characters in them.
+ */
- Tcl_Obj *tmpStr;
+ Tcl_Obj *tmpStr;
- /*
- * Skip leading spaces first. This is only really an
- * issue if it is the first "element" that has the
- * failure.
- */
+ /*
+ * Skip leading spaces first. This is only really an issue
+ * if it is the first "element" that has the failure.
+ */
- while (isspace(UCHAR(*p))) { /* INTL: ? */
- p++;
- }
- tmpStr = Tcl_NewStringObj(string1, p-string1);
- failat = Tcl_GetCharLength(tmpStr);
- TclDecrRefCount(tmpStr);
- break;
+ while (isspace(UCHAR(*p))) { /* INTL: ? */
+ p++;
}
- }
- }
- result = 0;
- break;
- case STR_IS_LOWER:
- chcomp = Tcl_UniCharIsLower;
- break;
- case STR_IS_PRINT:
- chcomp = Tcl_UniCharIsPrint;
- break;
- case STR_IS_PUNCT:
- chcomp = Tcl_UniCharIsPunct;
- break;
- case STR_IS_SPACE:
- chcomp = Tcl_UniCharIsSpace;
- break;
- case STR_IS_UPPER:
- chcomp = Tcl_UniCharIsUpper;
- break;
- case STR_IS_WORD:
- chcomp = Tcl_UniCharIsWordChar;
- break;
- case STR_IS_XDIGIT:
- for (; string1 < end; string1++, failat++) {
- /* INTL: We assume unicode is bad for this class. */
- if ((*((unsigned char *)string1) >= 0xC0) ||
- !isxdigit(*(unsigned char *)string1)) {
- result = 0;
+ tmpStr = Tcl_NewStringObj(string1, p-string1);
+ failat = Tcl_GetCharLength(tmpStr);
+ TclDecrRefCount(tmpStr);
break;
}
}
- break;
}
- if (chcomp != NULL) {
- for (; string1 < end; string1 += length2, failat++) {
- length2 = TclUtfToUniChar(string1, &ch);
- if (!chcomp(ch)) {
- result = 0;
- break;
- }
+ result = 0;
+ break;
+ case STR_IS_LOWER:
+ chcomp = Tcl_UniCharIsLower;
+ break;
+ case STR_IS_PRINT:
+ chcomp = Tcl_UniCharIsPrint;
+ break;
+ case STR_IS_PUNCT:
+ chcomp = Tcl_UniCharIsPunct;
+ break;
+ case STR_IS_SPACE:
+ chcomp = Tcl_UniCharIsSpace;
+ break;
+ case STR_IS_UPPER:
+ chcomp = Tcl_UniCharIsUpper;
+ break;
+ case STR_IS_WORD:
+ chcomp = Tcl_UniCharIsWordChar;
+ break;
+ case STR_IS_XDIGIT:
+ for (; string1 < end; string1++, failat++) {
+ /* INTL: We assume unicode is bad for this class. */
+ if ((*((unsigned char *)string1) >= 0xC0) ||
+ !isxdigit(*(unsigned char *)string1)) {
+ result = 0;
+ break;
+ }
+ }
+ break;
+ }
+ if (chcomp != NULL) {
+ for (; string1 < end; string1 += length2, failat++) {
+ length2 = TclUtfToUniChar(string1, &ch);
+ if (!chcomp(ch)) {
+ result = 0;
+ break;
}
}
+ }
- /*
- * Only set the failVarObj when we will return 0 and we have indicated
- * a valid fail index (>= 0).
- */
+ /*
+ * Only set the failVarObj when we will return 0 and we have indicated a
+ * valid fail index (>= 0).
+ */
- str_is_done:
- if ((result == 0) && (failVarObj != NULL) &&
- Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(failat),
- TCL_LEAVE_ERR_MSG) == NULL) {
- return TCL_ERROR;
- }
- Tcl_SetObjResult(interp, Tcl_NewBooleanObj(result));
- break;
+ str_is_done:
+ if ((result == 0) && (failVarObj != NULL) &&
+ Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(failat),
+ TCL_LEAVE_ERR_MSG) == NULL) {
+ return TCL_ERROR;
+ }
+ Tcl_SetObjResult(interp, Tcl_NewBooleanObj(result));
+ return TCL_OK;
+}
+
+static int
+UniCharIsAscii(
+ int character)
+{
+ return (character >= 0) && (character < 0x80);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringMapCmd --
+ *
+ * This procedure is invoked to process the "string map" Tcl command. See
+ * the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringMapCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ int length1, length2, mapElemc, index;
+ int nocase = 0, mapWithDict = 0, copySource = 0;
+ Tcl_Obj **mapElemv, *sourceObj, *resultPtr;
+ Tcl_UniChar *ustring1, *ustring2, *p, *end;
+ int (*strCmpFn)(const Tcl_UniChar*, const Tcl_UniChar*, unsigned long);
+
+ if (objc < 4 || objc > 5) {
+ Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? charMap string");
+ return TCL_ERROR;
}
- case STR_LAST: {
- Tcl_UniChar *ustring1, *ustring2, *p;
- int match, start;
- if (objc < 4 || objc > 5) {
- Tcl_WrongNumArgs(interp, 2, objv,
- "needleString haystackString ?startIndex?");
+ if (objc == 5) {
+ const char *string = TclGetStringFromObj(objv[2], &length2);
+
+ if ((length2 > 1) &&
+ strncmp(string, "-nocase", (size_t) length2) == 0) {
+ nocase = 1;
+ } else {
+ Tcl_AppendResult(interp, "bad option \"", string,
+ "\": must be -nocase", NULL);
return TCL_ERROR;
}
+ }
- /*
- * We are searching string2 for the sequence string1.
- */
+ /*
+ * This test is tricky, but has to be that way or you get other strange
+ * inconsistencies (see test string-10.20 for illustration why!)
+ */
- match = -1;
- start = 0;
- length2 = -1;
+ if (objv[objc-2]->typePtr == &tclDictType && objv[objc-2]->bytes == NULL){
+ int i, done;
+ Tcl_DictSearch search;
- ustring1 = Tcl_GetUnicodeFromObj(objv[2], &length1);
- ustring2 = Tcl_GetUnicodeFromObj(objv[3], &length2);
+ /*
+ * We know the type exactly, so all dict operations will succeed for
+ * sure. This shortens this code quite a bit.
+ */
- if (objc == 5) {
+ Tcl_DictObjSize(interp, objv[objc-2], &mapElemc);
+ if (mapElemc == 0) {
/*
- * If a startIndex is specified, we will need to restrict the
- * string range to that char index in the string
+ * Empty charMap, just return whatever string was given.
*/
- if (TclGetIntForIndexM(interp, objv[4], length2 - 1,
- &start) != TCL_OK) {
- return TCL_ERROR;
- }
- if (start < 0) {
- goto str_last_done;
- } else if (start < length2) {
- p = ustring2 + start + 1 - length1;
- } else {
- p = ustring2 + length2 - length1;
- }
- } else {
- p = ustring2 + length2 - length1;
+ Tcl_SetObjResult(interp, objv[objc-1]);
+ return TCL_OK;
}
- if (length1 > 0) {
- for (; p >= ustring2; p--) {
- /*
- * Scan backwards to find the first character.
- */
+ mapElemc *= 2;
+ mapWithDict = 1;
- if ((*p == *ustring1) && !memcmp(ustring1, p,
- sizeof(Tcl_UniChar) * (size_t)length1)) {
- match = p - ustring2;
- break;
- }
- }
- }
+ /*
+ * Copy the dictionary out into an array; that's the easiest way to
+ * adapt this code...
+ */
- str_last_done:
- Tcl_SetObjResult(interp, Tcl_NewIntObj(match));
- break;
- }
- case STR_BYTELENGTH:
- case STR_LENGTH:
- if (objc != 3) {
- Tcl_WrongNumArgs(interp, 2, objv, "string");
+ mapElemv = (Tcl_Obj **)
+ TclStackAlloc(interp, sizeof(Tcl_Obj *) * mapElemc);
+ Tcl_DictObjFirst(interp, objv[objc-2], &search, mapElemv+0,
+ mapElemv+1, &done);
+ for (i=2 ; i<mapElemc ; i+=2) {
+ Tcl_DictObjNext(&search, mapElemv+i, mapElemv+i+1, &done);
+ }
+ Tcl_DictObjDone(&search);
+ } else {
+ if (TclListObjGetElements(interp, objv[objc-2], &mapElemc,
+ &mapElemv) != TCL_OK) {
return TCL_ERROR;
}
+ if (mapElemc == 0) {
+ /*
+ * empty charMap, just return whatever string was given.
+ */
- if ((enum options) index == STR_BYTELENGTH) {
- (void) TclGetStringFromObj(objv[2], &length1);
- } else {
+ Tcl_SetObjResult(interp, objv[objc-1]);
+ return TCL_OK;
+ } else if (mapElemc & 1) {
/*
- * If we have a ByteArray object, avoid recomputing the string
- * since the byte array contains one byte per character.
- * Otherwise, use the Unicode string rep to calculate the length.
+ * The charMap must be an even number of key/value items.
*/
- if (objv[2]->typePtr == &tclByteArrayType) {
- (void) Tcl_GetByteArrayFromObj(objv[2], &length1);
- } else {
- length1 = Tcl_GetCharLength(objv[2]);
- }
- }
- Tcl_SetObjResult(interp, Tcl_NewIntObj(length1));
- break;
- case STR_MAP: {
- int mapElemc, nocase = 0, mapWithDict = 0, copySource = 0;
- Tcl_Obj **mapElemv, *sourceObj, *resultPtr;
- Tcl_UniChar *ustring1, *ustring2, *p, *end;
- int (*strCmpFn)(CONST Tcl_UniChar*, CONST Tcl_UniChar*, unsigned long);
-
- if (objc < 4 || objc > 5) {
- Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? charMap string");
+ Tcl_SetObjResult(interp,
+ Tcl_NewStringObj("char map list unbalanced", -1));
return TCL_ERROR;
}
+ }
- if (objc == 5) {
- string2 = TclGetStringFromObj(objv[2], &length2);
- if ((length2 > 1) &&
- strncmp(string2, "-nocase", (size_t) length2) == 0) {
- nocase = 1;
- } else {
- Tcl_AppendResult(interp, "bad option \"", string2,
- "\": must be -nocase", NULL);
- return TCL_ERROR;
- }
- }
+ /*
+ * Take a copy of the source string object if it is the same as the map
+ * string to cut out nasty sharing crashes. [Bug 1018562]
+ */
+ if (objv[objc-2] == objv[objc-1]) {
+ sourceObj = Tcl_DuplicateObj(objv[objc-1]);
+ copySource = 1;
+ } else {
+ sourceObj = objv[objc-1];
+ }
+ ustring1 = Tcl_GetUnicodeFromObj(sourceObj, &length1);
+ if (length1 == 0) {
/*
- * This test is tricky, but has to be that way or you get other
- * strange inconsistencies (see test string-10.20 for illustration
- * why!)
+ * Empty input string, just stop now.
*/
- if (objv[objc-2]->typePtr == &tclDictType &&
- objv[objc-2]->bytes == NULL) {
- int i, done;
- Tcl_DictSearch search;
+ goto done;
+ }
+ end = ustring1 + length1;
- /*
- * We know the type exactly, so all dict operations will succeed
- * for sure. This shortens this code quite a bit.
- */
+ strCmpFn = (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
- Tcl_DictObjSize(interp, objv[objc-2], &mapElemc);
- if (mapElemc == 0) {
- /*
- * Empty charMap, just return whatever string was given.
- */
+ /*
+ * Force result to be Unicode
+ */
- Tcl_SetObjResult(interp, objv[objc-1]);
- return TCL_OK;
- }
+ resultPtr = Tcl_NewUnicodeObj(ustring1, 0);
+
+ if (mapElemc == 2) {
+ /*
+ * Special case for one map pair which avoids the extra for loop and
+ * extra calls to get Unicode data. The algorithm is otherwise
+ * identical to the multi-pair case. This will be >30% faster on
+ * larger strings.
+ */
- mapElemc *= 2;
- mapWithDict = 1;
+ int mapLen;
+ Tcl_UniChar *mapString, u2lc;
+ ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2);
+ p = ustring1;
+ if ((length2 > length1) || (length2 == 0)) {
/*
- * Copy the dictionary out into an array; that's the easiest way
- * to adapt this code...
+ * Match string is either longer than input or empty.
*/
- mapElemv = (Tcl_Obj **)
- TclStackAlloc(interp, sizeof(Tcl_Obj *) * mapElemc);
- Tcl_DictObjFirst(interp, objv[objc-2], &search, mapElemv+0,
- mapElemv+1, &done);
- for (i=2 ; i<mapElemc ; i+=2) {
- Tcl_DictObjNext(&search, mapElemv+i, mapElemv+i+1, &done);
- }
- Tcl_DictObjDone(&search);
+ ustring1 = end;
} else {
- if (TclListObjGetElements(interp, objv[objc-2],
- &mapElemc, &mapElemv) != TCL_OK) {
- return TCL_ERROR;
+ mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen);
+ u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0);
+ for (; ustring1 < end; ustring1++) {
+ if (((*ustring1 == *ustring2) ||
+ (nocase&&Tcl_UniCharToLower(*ustring1)==u2lc)) &&
+ (length2==1 || strCmpFn(ustring1, ustring2,
+ (unsigned long) length2) == 0)) {
+ if (p != ustring1) {
+ Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p);
+ p = ustring1 + length2;
+ } else {
+ p += length2;
+ }
+ ustring1 = p - 1;
+
+ Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen);
+ }
}
- if (mapElemc == 0) {
- /*
- * empty charMap, just return whatever string was given.
- */
+ }
+ } else {
+ Tcl_UniChar **mapStrings, *u2lc = NULL;
+ int *mapLens;
- Tcl_SetObjResult(interp, objv[objc-1]);
- return TCL_OK;
- } else if (mapElemc & 1) {
+ /*
+ * Precompute pointers to the unicode string and length. This saves us
+ * repeated function calls later, significantly speeding up the
+ * algorithm. We only need the lowercase first char in the nocase
+ * case.
+ */
+
+ mapStrings = (Tcl_UniChar **) TclStackAlloc(interp,
+ mapElemc * 2 * sizeof(Tcl_UniChar *));
+ mapLens = (int *) TclStackAlloc(interp, mapElemc * 2 * sizeof(int));
+ if (nocase) {
+ u2lc = (Tcl_UniChar *) TclStackAlloc(interp,
+ mapElemc * sizeof(Tcl_UniChar));
+ }
+ for (index = 0; index < mapElemc; index++) {
+ mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index],
+ mapLens+index);
+ if (nocase && ((index % 2) == 0)) {
+ u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]);
+ }
+ }
+ for (p = ustring1; ustring1 < end; ustring1++) {
+ for (index = 0; index < mapElemc; index += 2) {
/*
- * The charMap must be an even number of key/value items.
+ * Get the key string to match on.
*/
- Tcl_SetObjResult(interp, Tcl_NewStringObj(
- "char map list unbalanced", -1));
- return TCL_ERROR;
+ ustring2 = mapStrings[index];
+ length2 = mapLens[index];
+ if ((length2 > 0) && ((*ustring1 == *ustring2) || (nocase &&
+ (Tcl_UniCharToLower(*ustring1) == u2lc[index/2]))) &&
+ /* Restrict max compare length. */
+ (end-ustring1 >= length2) && ((length2 == 1) ||
+ !strCmpFn(ustring2, ustring1, (unsigned) length2))) {
+ if (p != ustring1) {
+ /*
+ * Put the skipped chars onto the result first.
+ */
+
+ Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p);
+ p = ustring1 + length2;
+ } else {
+ p += length2;
+ }
+
+ /*
+ * Adjust len to be full length of matched string.
+ */
+
+ ustring1 = p - 1;
+
+ /*
+ * Append the map value to the unicode string.
+ */
+
+ Tcl_AppendUnicodeToObj(resultPtr,
+ mapStrings[index+1], mapLens[index+1]);
+ break;
+ }
}
}
-
+ if (nocase) {
+ TclStackFree(interp, u2lc);
+ }
+ TclStackFree(interp, mapLens);
+ TclStackFree(interp, mapStrings);
+ }
+ if (p != ustring1) {
/*
- * Take a copy of the source string object if it is the same as the
- * map string to cut out nasty sharing crashes. [Bug 1018562]
+ * Put the rest of the unmapped chars onto result.
*/
- if (objv[objc-2] == objv[objc-1]) {
- sourceObj = Tcl_DuplicateObj(objv[objc-1]);
- copySource = 1;
+ Tcl_AppendUnicodeToObj(resultPtr, p, ustring1 - p);
+ }
+ Tcl_SetObjResult(interp, resultPtr);
+ done:
+ if (mapWithDict) {
+ TclStackFree(interp, mapElemv);
+ }
+ if (copySource) {
+ Tcl_DecrRefCount(sourceObj);
+ }
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringMatchCmd --
+ *
+ * This procedure is invoked to process the "string match" Tcl command.
+ * See the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringMatchCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ Tcl_UniChar *ustring1, *ustring2;
+ int length1, length2, nocase = 0;
+
+ if (objc < 4 || objc > 5) {
+ Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? pattern string");
+ return TCL_ERROR;
+ }
+
+ if (objc == 5) {
+ const char *string = TclGetStringFromObj(objv[2], &length2);
+
+ if ((length2 > 1) &&
+ strncmp(string, "-nocase", (size_t) length2) == 0) {
+ nocase = 1;
} else {
- sourceObj = objv[objc-1];
+ Tcl_AppendResult(interp, "bad option \"", string,
+ "\": must be -nocase", NULL);
+ return TCL_ERROR;
}
- ustring1 = Tcl_GetUnicodeFromObj(sourceObj, &length1);
- if (length1 == 0) {
- /*
- * Empty input string, just stop now.
- */
+ }
+ ustring1 = Tcl_GetUnicodeFromObj(objv[objc-1], &length1);
+ ustring2 = Tcl_GetUnicodeFromObj(objv[objc-2], &length2);
+ Tcl_SetObjResult(interp, Tcl_NewBooleanObj(
+ TclUniCharMatch(ustring1, length1, ustring2, length2, nocase)));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringRangeCmd --
+ *
+ * This procedure is invoked to process the "string range" Tcl command.
+ * See the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
- if (mapWithDict) {
- TclStackFree(interp, mapElemv);
- }
- if (copySource) {
- Tcl_DecrRefCount(sourceObj);
- }
- break;
- }
- end = ustring1 + length1;
+static int
+StringRangeCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ const unsigned char *string;
+ int length, first, last;
- strCmpFn = (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
+ if (objc != 5) {
+ Tcl_WrongNumArgs(interp, 2, objv, "string first last");
+ return TCL_ERROR;
+ }
+
+ /*
+ * If we have a ByteArray object, avoid indexing in the Utf string since
+ * the byte array contains one byte per character. Otherwise, use the
+ * Unicode string rep to get the range.
+ */
+ if (objv[2]->typePtr == &tclByteArrayType) {
+ string = Tcl_GetByteArrayFromObj(objv[2], &length);
+ length--;
+ } else {
/*
- * Force result to be Unicode
+ * Get the length in actual characters.
*/
- resultPtr = Tcl_NewUnicodeObj(ustring1, 0);
- if (mapElemc == 2) {
+ string = NULL;
+ length = Tcl_GetCharLength(objv[2]) - 1;
+ }
+
+ if (TclGetIntForIndexM(interp, objv[3], length, &first) != TCL_OK ||
+ TclGetIntForIndexM(interp, objv[4], length, &last) != TCL_OK) {
+ return TCL_ERROR;
+ }
+
+ if (first < 0) {
+ first = 0;
+ }
+ if (last >= length) {
+ last = length;
+ }
+ if (last >= first) {
+ if (string != NULL) {
/*
- * Special case for one map pair which avoids the extra for loop
- * and extra calls to get Unicode data. The algorithm is otherwise
- * identical to the multi-pair case. This will be >30% faster on
- * larger strings.
+ * Reread the string to prevent shimmering nasties.
*/
- int mapLen;
- Tcl_UniChar *mapString, u2lc;
+ string = Tcl_GetByteArrayFromObj(objv[2], &length);
+ Tcl_SetObjResult(interp,
+ Tcl_NewByteArrayObj(string+first, last - first + 1));
+ } else {
+ Tcl_SetObjResult(interp, Tcl_GetRange(objv[2], first, last));
+ }
+ }
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringReptCmd --
+ *
+ * This procedure is invoked to process the "string repeat" Tcl command.
+ * See the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
- ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2);
- p = ustring1;
- if ((length2 > length1) || (length2 == 0)) {
- /*
- * Match string is either longer than input or empty.
- */
+static int
+StringReptCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ const char *string1;
+ char *string2;
+ int count, index, length1, length2;
+ Tcl_Obj *resultPtr;
- ustring1 = end;
- } else {
- mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen);
- u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0);
- for (; ustring1 < end; ustring1++) {
- if (((*ustring1 == *ustring2) ||
- (nocase&&Tcl_UniCharToLower(*ustring1)==u2lc)) &&
- (length2==1 || strCmpFn(ustring1, ustring2,
- (unsigned long) length2) == 0)) {
- if (p != ustring1) {
- Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p);
- p = ustring1 + length2;
- } else {
- p += length2;
- }
- ustring1 = p - 1;
-
- Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen);
- }
- }
- }
- } else {
- Tcl_UniChar **mapStrings, *u2lc = NULL;
- int *mapLens;
+ if (objc != 4) {
+ Tcl_WrongNumArgs(interp, 2, objv, "string count");
+ return TCL_ERROR;
+ }
- /*
- * Precompute pointers to the unicode string and length. This
- * saves us repeated function calls later, significantly speeding
- * up the algorithm. We only need the lowercase first char in the
- * nocase case.
- */
+ if (TclGetIntFromObj(interp, objv[3], &count) != TCL_OK) {
+ return TCL_ERROR;
+ }
- mapStrings = (Tcl_UniChar **) TclStackAlloc(interp,
- (mapElemc * 2) * sizeof(Tcl_UniChar *));
- mapLens = (int *) TclStackAlloc(interp,
- (mapElemc * 2) * sizeof(int));
- if (nocase) {
- u2lc = (Tcl_UniChar *) TclStackAlloc(interp,
- (mapElemc) * sizeof(Tcl_UniChar));
- }
- for (index = 0; index < mapElemc; index++) {
- mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index],
- &(mapLens[index]));
- if (nocase && ((index % 2) == 0)) {
- u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]);
- }
- }
- for (p = ustring1; ustring1 < end; ustring1++) {
- for (index = 0; index < mapElemc; index += 2) {
- /*
- * Get the key string to match on.
- */
+ /*
+ * Check for cases that allow us to skip copying stuff.
+ */
- ustring2 = mapStrings[index];
- length2 = mapLens[index];
- if ((length2 > 0) && ((*ustring1 == *ustring2) ||
- (nocase && (Tcl_UniCharToLower(*ustring1) ==
- u2lc[index/2]))) &&
- /* Restrict max compare length. */
- ((end - ustring1) >= length2) &&
- ((length2 == 1) || strCmpFn(ustring2, ustring1,
- (unsigned long) length2) == 0)) {
- if (p != ustring1) {
- /*
- * Put the skipped chars onto the result first.
- */
-
- Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p);
- p = ustring1 + length2;
- } else {
- p += length2;
- }
+ if (count == 1) {
+ Tcl_SetObjResult(interp, objv[2]);
+ goto done;
+ } else if (count < 1) {
+ goto done;
+ }
+ string1 = TclGetStringFromObj(objv[2], &length1);
+ if (length1 <= 0) {
+ goto done;
+ }
- /*
- * Adjust len to be full length of matched string.
- */
+ /*
+ * Only build up a string that has data. Instead of building it up with
+ * repeated appends, we just allocate the necessary space once and copy
+ * the string value in. Check for overflow with back-division. [Bug
+ * #714106]
+ */
- ustring1 = p - 1;
+ length2 = length1 * count + 1;
+ if ((length2-1) / count != length1) {
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "string size overflow, must be less than %d", INT_MAX));
+ return TCL_ERROR;
+ }
- /*
- * Append the map value to the unicode string.
- */
+ /*
+ * Include space for the NUL.
+ */
- Tcl_AppendUnicodeToObj(resultPtr,
- mapStrings[index+1], mapLens[index+1]);
- break;
- }
- }
- }
- if (nocase) {
- TclStackFree(interp, u2lc);
- }
- TclStackFree(interp, mapLens);
- TclStackFree(interp, mapStrings);
- }
- if (p != ustring1) {
- /*
- * Put the rest of the unmapped chars onto result.
- */
+ string2 = attemptckalloc((size_t) length2);
+ if (string2 == NULL) {
+ /*
+ * Alloc failed. Note that in this case we try to do an error message
+ * since this is a case that's most likely when the alloc is large and
+ * that's easy to do with this API. Note that if we fail allocating a
+ * short string, this will likely keel over too (and fatally).
+ */
+
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "string size overflow, out of memory allocating %d bytes",
+ length2));
+ return TCL_ERROR;
+ }
+ for (index = 0; index < count; index++) {
+ memcpy(string2 + (length1 * index), string1, (size_t) length1);
+ }
+ string2[length2-1] = '\0';
+
+ /*
+ * We have to directly assign this instead of using Tcl_SetStringObj (and
+ * indirectly TclInitStringRep) because that makes another copy of the
+ * data.
+ */
+
+ TclNewObj(resultPtr);
+ resultPtr->bytes = string2;
+ resultPtr->length = length2-1;
+ Tcl_SetObjResult(interp, resultPtr);
+
+ done:
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringRplcCmd --
+ *
+ * This procedure is invoked to process the "string replace" Tcl command.
+ * See the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringRplcCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ Tcl_UniChar *ustring;
+ int first, last, length;
+
+ if (objc < 5 || objc > 6) {
+ Tcl_WrongNumArgs(interp, 2, objv, "string first last ?string?");
+ return TCL_ERROR;
+ }
+
+ ustring = Tcl_GetUnicodeFromObj(objv[2], &length);
+ length--;
+
+ if (TclGetIntForIndexM(interp, objv[3], length, &first) != TCL_OK ||
+ TclGetIntForIndexM(interp, objv[4], length, &last) != TCL_OK){
+ return TCL_ERROR;
+ }
+
+ if ((last < first) || (last < 0) || (first > length)) {
+ Tcl_SetObjResult(interp, objv[2]);
+ } else {
+ Tcl_Obj *resultPtr;
- Tcl_AppendUnicodeToObj(resultPtr, p, ustring1 - p);
+ if (first < 0) {
+ first = 0;
}
- if (mapWithDict) {
- TclStackFree(interp, mapElemv);
+
+ resultPtr = Tcl_NewUnicodeObj(ustring, first);
+ if (objc == 6) {
+ Tcl_AppendObjToObj(resultPtr, objv[5]);
}
- if (copySource) {
- Tcl_DecrRefCount(sourceObj);
+ if (last < length) {
+ Tcl_AppendUnicodeToObj(resultPtr, ustring + last + 1,
+ length - last);
}
Tcl_SetObjResult(interp, resultPtr);
- break;
}
- case STR_MATCH: {
- Tcl_UniChar *ustring1, *ustring2;
- int nocase = 0;
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringRevCmd --
+ *
+ * This procedure is invoked to process the "string reverse" Tcl command.
+ * See the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
- if (objc < 4 || objc > 5) {
- Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? pattern string");
- return TCL_ERROR;
+static int
+StringRevCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ if (objc != 3) {
+ Tcl_WrongNumArgs(interp, 2, objv, "string");
+ return TCL_ERROR;
+ }
+
+ Tcl_SetObjResult(interp, TclStringObjReverse(objv[2]));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringStartCmd --
+ *
+ * This procedure is invoked to process the "string wordstart" Tcl
+ * command. See the user documentation for details on what it does. Note
+ * that this command only functions correctly on properly formed Tcl UTF
+ * strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringStartCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ Tcl_UniChar ch;
+ const char *p, *string;
+ int cur, index, length, numChars;
+
+ if (objc != 4) {
+ Tcl_WrongNumArgs(interp, 2, objv, "string index");
+ return TCL_ERROR;
+ }
+
+ string = TclGetStringFromObj(objv[2], &length);
+ numChars = Tcl_NumUtfChars(string, length);
+ if (TclGetIntForIndexM(interp, objv[3], numChars-1, &index) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (index >= numChars) {
+ index = numChars - 1;
+ }
+ cur = 0;
+ if (index > 0) {
+ p = Tcl_UtfAtIndex(string, index);
+ for (cur = index; cur >= 0; cur--) {
+ TclUtfToUniChar(p, &ch);
+ if (!Tcl_UniCharIsWordChar(ch)) {
+ break;
+ }
+ p = Tcl_UtfPrev(p, string);
}
+ if (cur != index) {
+ cur += 1;
+ }
+ }
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(cur));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringEndCmd --
+ *
+ * This procedure is invoked to process the "string wordend" Tcl command.
+ * See the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
- if (objc == 5) {
- string2 = TclGetStringFromObj(objv[2], &length2);
- if ((length2 > 1) &&
- strncmp(string2, "-nocase", (size_t) length2) == 0) {
- nocase = 1;
- } else {
- Tcl_AppendResult(interp, "bad option \"", string2,
- "\": must be -nocase", NULL);
- return TCL_ERROR;
+static int
+StringEndCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ Tcl_UniChar ch;
+ const char *p, *end, *string;
+ int cur, index, length, numChars;
+
+ if (objc != 4) {
+ Tcl_WrongNumArgs(interp, 2, objv, "string index");
+ return TCL_ERROR;
+ }
+
+ string = TclGetStringFromObj(objv[2], &length);
+ numChars = Tcl_NumUtfChars(string, length);
+ if (TclGetIntForIndexM(interp, objv[3], numChars-1, &index) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (index < 0) {
+ index = 0;
+ }
+ if (index < numChars) {
+ p = Tcl_UtfAtIndex(string, index);
+ end = string+length;
+ for (cur = index; p < end; cur++) {
+ p += TclUtfToUniChar(p, &ch);
+ if (!Tcl_UniCharIsWordChar(ch)) {
+ break;
}
}
- ustring1 = Tcl_GetUnicodeFromObj(objv[objc-1], &length1);
- ustring2 = Tcl_GetUnicodeFromObj(objv[objc-2], &length2);
- Tcl_SetObjResult(interp, Tcl_NewBooleanObj(TclUniCharMatch(
- ustring1, length1, ustring2, length2, nocase)));
- break;
+ if (cur == index) {
+ cur++;
+ }
+ } else {
+ cur = numChars;
+ }
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(cur));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringEqualCmd --
+ *
+ * This procedure is invoked to process the "string equal" Tcl command.
+ * See the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringEqualCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ /*
+ * Remember to keep code here in some sync with the byte-compiled versions
+ * in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as
+ * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
+ */
+
+ char *string1, *string2;
+ int length1, length2, i, match, length, nocase = 0, reqlength = -1;
+ typedef int (*strCmpFn_t)(const char *, const char *, unsigned int);
+ strCmpFn_t strCmpFn;
+
+ if (objc < 4 || objc > 7) {
+ str_cmp_args:
+ Tcl_WrongNumArgs(interp, 2, objv,
+ "?-nocase? ?-length int? string1 string2");
+ return TCL_ERROR;
}
- case STR_RANGE: {
- int first, last;
- if (objc != 5) {
- Tcl_WrongNumArgs(interp, 2, objv, "string first last");
+ for (i = 2; i < objc-2; i++) {
+ string2 = TclGetStringFromObj(objv[i], &length2);
+ if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) {
+ nocase = 1;
+ } else if ((length2 > 1)
+ && !strncmp(string2, "-length", (size_t)length2)) {
+ if (i+1 >= objc-2) {
+ goto str_cmp_args;
+ }
+ ++i;
+ if (TclGetIntFromObj(interp, objv[i], &reqlength) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ } else {
+ Tcl_AppendResult(interp, "bad option \"", string2,
+ "\": must be -nocase or -length", NULL);
return TCL_ERROR;
}
+ }
+
+ /*
+ * From now on, we only access the two objects at the end of the argument
+ * array.
+ */
+
+ objv += objc-2;
+
+ if ((reqlength == 0) || (objv[0] == objv[1])) {
+ /*
+ * Always match at 0 chars of if it is the same obj.
+ */
+
+ Tcl_SetObjResult(interp, Tcl_NewBooleanObj(1));
+ return TCL_OK;
+ }
+
+ if (!nocase && objv[0]->typePtr == &tclByteArrayType &&
+ objv[1]->typePtr == &tclByteArrayType) {
+ /*
+ * Use binary versions of comparisons since that won't cause undue
+ * type conversions and it is much faster. Only do this if we're
+ * case-sensitive (which is all that really makes sense with byte
+ * arrays anyway, and we have no memcasecmp() for some reason... :^)
+ */
+ string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1);
+ string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2);
+ strCmpFn = (strCmpFn_t) memcmp;
+ } else if ((objv[0]->typePtr == &tclStringType)
+ && (objv[1]->typePtr == &tclStringType)) {
+ /*
+ * Do a unicode-specific comparison if both of the args are of String
+ * type. In benchmark testing this proved the most efficient check
+ * between the unicode and string comparison operations.
+ */
+
+ string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1);
+ string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2);
+ strCmpFn = (strCmpFn_t)
+ (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
+ } else {
/*
- * If we have a ByteArray object, avoid indexing in the Utf string
- * since the byte array contains one byte per character. Otherwise,
- * use the Unicode string rep to get the range.
+ * As a catch-all we will work with UTF-8. We cannot use memcmp() as
+ * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's
+ * utf rep). We can use the more efficient TclpUtfNcmp2 if we are
+ * case-sensitive and no specific length was requested.
*/
- if (objv[2]->typePtr == &tclByteArrayType) {
- string1 = (char *) Tcl_GetByteArrayFromObj(objv[2], &length1);
- length1--;
+ string1 = (char *) TclGetStringFromObj(objv[0], &length1);
+ string2 = (char *) TclGetStringFromObj(objv[1], &length2);
+ if ((reqlength < 0) && !nocase) {
+ strCmpFn = (strCmpFn_t) TclpUtfNcmp2;
} else {
+ length1 = Tcl_NumUtfChars(string1, length1);
+ length2 = Tcl_NumUtfChars(string2, length2);
+ strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
+ }
+ }
+
+ if ((reqlength < 0) && (length1 != length2)) {
+ match = 1; /* This will be reversed below. */
+ } else {
+ length = (length1 < length2) ? length1 : length2;
+ if (reqlength > 0 && reqlength < length) {
+ length = reqlength;
+ } else if (reqlength < 0) {
/*
- * Get the length in actual characters.
+ * The requested length is negative, so we ignore it by setting it
+ * to length + 1 so we correct the match var.
*/
- string1 = NULL;
- length1 = Tcl_GetCharLength(objv[2]) - 1;
+ reqlength = length + 1;
+ }
+
+ match = strCmpFn(string1, string2, (unsigned) length);
+ if ((match == 0) && (reqlength > length)) {
+ match = length1 - length2;
}
+ }
+
+ Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringCmpCmd --
+ *
+ * This procedure is invoked to process the "string compare" Tcl command.
+ * See the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringCmpCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ /*
+ * Remember to keep code here in some sync with the byte-compiled versions
+ * in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as
+ * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
+ */
+
+ char *string1, *string2;
+ int length1, length2, i, match, length, nocase = 0, reqlength = -1;
+ typedef int (*strCmpFn_t)(const char *, const char *, unsigned int);
+ strCmpFn_t strCmpFn;
+
+ if (objc < 4 || objc > 7) {
+ str_cmp_args:
+ Tcl_WrongNumArgs(interp, 2, objv,
+ "?-nocase? ?-length int? string1 string2");
+ return TCL_ERROR;
+ }
- if (TclGetIntForIndexM(interp, objv[3], length1, &first) != TCL_OK ||
- TclGetIntForIndexM(interp, objv[4], length1, &last) != TCL_OK) {
+ for (i = 2; i < objc-2; i++) {
+ string2 = TclGetStringFromObj(objv[i], &length2);
+ if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) {
+ nocase = 1;
+ } else if ((length2 > 1)
+ && !strncmp(string2, "-length", (size_t)length2)) {
+ if (i+1 >= objc-2) {
+ goto str_cmp_args;
+ }
+ ++i;
+ if (TclGetIntFromObj(interp, objv[i], &reqlength) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ } else {
+ Tcl_AppendResult(interp, "bad option \"", string2,
+ "\": must be -nocase or -length", NULL);
return TCL_ERROR;
}
+ }
+
+ /*
+ * From now on, we only access the two objects at the end of the argument
+ * array.
+ */
+
+ objv += objc-2;
+
+ if ((reqlength == 0) || (objv[0] == objv[1])) {
+ /*
+ * Always match at 0 chars of if it is the same obj.
+ */
+
+ Tcl_SetObjResult(interp, Tcl_NewBooleanObj(0));
+ return TCL_OK;
+ }
+
+ if (!nocase && objv[0]->typePtr == &tclByteArrayType &&
+ objv[1]->typePtr == &tclByteArrayType) {
+ /*
+ * Use binary versions of comparisons since that won't cause undue
+ * type conversions and it is much faster. Only do this if we're
+ * case-sensitive (which is all that really makes sense with byte
+ * arrays anyway, and we have no memcasecmp() for some reason... :^)
+ */
+
+ string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1);
+ string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2);
+ strCmpFn = (strCmpFn_t) memcmp;
+ } else if ((objv[0]->typePtr == &tclStringType)
+ && (objv[1]->typePtr == &tclStringType)) {
+ /*
+ * Do a unicode-specific comparison if both of the args are of String
+ * type. In benchmark testing this proved the most efficient check
+ * between the unicode and string comparison operations.
+ */
+
+ string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1);
+ string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2);
+ strCmpFn = (strCmpFn_t)
+ (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
+ } else {
+ /*
+ * As a catch-all we will work with UTF-8. We cannot use memcmp() as
+ * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's
+ * utf rep). We can use the more efficient TclpUtfNcmp2 if we are
+ * case-sensitive and no specific length was requested.
+ */
+
+ string1 = (char *) TclGetStringFromObj(objv[0], &length1);
+ string2 = (char *) TclGetStringFromObj(objv[1], &length2);
+ if ((reqlength < 0) && !nocase) {
+ strCmpFn = (strCmpFn_t) TclpUtfNcmp2;
+ } else {
+ length1 = Tcl_NumUtfChars(string1, length1);
+ length2 = Tcl_NumUtfChars(string2, length2);
+ strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
+ }
+ }
+
+ length = (length1 < length2) ? length1 : length2;
+ if (reqlength > 0 && reqlength < length) {
+ length = reqlength;
+ } else if (reqlength < 0) {
+ /*
+ * The requested length is negative, so we ignore it by setting it to
+ * length + 1 so we correct the match var.
+ */
+
+ reqlength = length + 1;
+ }
+
+ match = strCmpFn(string1, string2, (unsigned) length);
+ if ((match == 0) && (reqlength > length)) {
+ match = length1 - length2;
+ }
+
+ Tcl_SetObjResult(interp,
+ Tcl_NewIntObj((match > 0) ? 1 : (match < 0) ? -1 : 0));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringBytesCmd --
+ *
+ * This procedure is invoked to process the "string bytelength" Tcl
+ * command. See the user documentation for details on what it does. Note
+ * that this command only functions correctly on properly formed Tcl UTF
+ * strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringBytesCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ int length;
+
+ if (objc != 3) {
+ Tcl_WrongNumArgs(interp, 2, objv, "string");
+ return TCL_ERROR;
+ }
+
+ (void) TclGetStringFromObj(objv[2], &length);
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(length));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringLenCmd --
+ *
+ * This procedure is invoked to process the "string length" Tcl command.
+ * See the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringLenCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ int length;
+
+ if (objc != 3) {
+ Tcl_WrongNumArgs(interp, 2, objv, "string");
+ return TCL_ERROR;
+ }
+
+ /*
+ * If we have a ByteArray object, avoid recomputing the string since the
+ * byte array contains one byte per character. Otherwise, use the Unicode
+ * string rep to calculate the length.
+ */
+
+ if (objv[2]->typePtr == &tclByteArrayType) {
+ (void) Tcl_GetByteArrayFromObj(objv[2], &length);
+ } else {
+ length = Tcl_GetCharLength(objv[2]);
+ }
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(length));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringLowerCmd --
+ *
+ * This procedure is invoked to process the "string tolower" Tcl command.
+ * See the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+static int
+StringLowerCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ int length1, length2;
+ char *string1, *string2;
+
+ if (objc < 3 || objc > 5) {
+ Tcl_WrongNumArgs(interp, 2, objv, "string ?first? ?last?");
+ return TCL_ERROR;
+ }
+
+ string1 = TclGetStringFromObj(objv[2], &length1);
+
+ if (objc == 3) {
+ Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1);
+
+ length1 = Tcl_UtfToLower(TclGetString(resultPtr));
+ Tcl_SetObjLength(resultPtr, length1);
+ Tcl_SetObjResult(interp, resultPtr);
+ } else {
+ int first, last;
+ const char *start, *end;
+ Tcl_Obj *resultPtr;
+
+ length1 = Tcl_NumUtfChars(string1, length1) - 1;
+ if (TclGetIntForIndexM(interp,objv[3],length1, &first) != TCL_OK) {
+ return TCL_ERROR;
+ }
if (first < 0) {
first = 0;
}
+ last = first;
+
+ if ((objc == 5) && (TclGetIntForIndexM(interp, objv[4], length1,
+ &last) != TCL_OK)) {
+ return TCL_ERROR;
+ }
+
if (last >= length1) {
last = length1;
}
- if (last >= first) {
- if (string1 != NULL) {
- int numBytes = last - first + 1;
- Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(
- (unsigned char *) &string1[first], numBytes));
- } else {
- Tcl_SetObjResult(interp,
- Tcl_GetRange(objv[2], first, last));
- }
+ if (last < first) {
+ Tcl_SetObjResult(interp, objv[2]);
+ return TCL_OK;
}
- break;
+
+ start = Tcl_UtfAtIndex(string1, first);
+ end = Tcl_UtfAtIndex(start, last - first + 1);
+ resultPtr = Tcl_NewStringObj(string1, end - string1);
+ string2 = TclGetString(resultPtr) + (start - string1);
+
+ length2 = Tcl_UtfToLower(string2);
+ Tcl_SetObjLength(resultPtr, length2 + (start - string1));
+
+ Tcl_AppendToObj(resultPtr, end, -1);
+ Tcl_SetObjResult(interp, resultPtr);
+ }
+
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringUpperCmd --
+ *
+ * This procedure is invoked to process the "string toupper" Tcl command.
+ * See the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+StringUpperCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ int length1, length2;
+ char *string1, *string2;
+
+ if (objc < 3 || objc > 5) {
+ Tcl_WrongNumArgs(interp, 2, objv, "string ?first? ?last?");
+ return TCL_ERROR;
}
- case STR_REPEAT: {
- int count;
- if (objc != 4) {
- Tcl_WrongNumArgs(interp, 2, objv, "string count");
+ string1 = TclGetStringFromObj(objv[2], &length1);
+
+ if (objc == 3) {
+ Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1);
+
+ length1 = Tcl_UtfToUpper(TclGetString(resultPtr));
+ Tcl_SetObjLength(resultPtr, length1);
+ Tcl_SetObjResult(interp, resultPtr);
+ } else {
+ int first, last;
+ const char *start, *end;
+ Tcl_Obj *resultPtr;
+
+ length1 = Tcl_NumUtfChars(string1, length1) - 1;
+ if (TclGetIntForIndexM(interp,objv[3],length1, &first) != TCL_OK) {
return TCL_ERROR;
}
+ if (first < 0) {
+ first = 0;
+ }
+ last = first;
- if (TclGetIntFromObj(interp, objv[3], &count) != TCL_OK) {
+ if ((objc == 5) && (TclGetIntForIndexM(interp, objv[4], length1,
+ &last) != TCL_OK)) {
return TCL_ERROR;
}
- if (count == 1) {
+ if (last >= length1) {
+ last = length1;
+ }
+ if (last < first) {
Tcl_SetObjResult(interp, objv[2]);
- } else if (count > 1) {
- string1 = TclGetStringFromObj(objv[2], &length1);
- if (length1 > 0) {
- /*
- * Only build up a string that has data. Instead of building
- * it up with repeated appends, we just allocate the necessary
- * space once and copy the string value in. Check for overflow
- * with back-division. [Bug #714106]
- */
+ return TCL_OK;
+ }
- Tcl_Obj *resultPtr;
+ start = Tcl_UtfAtIndex(string1, first);
+ end = Tcl_UtfAtIndex(start, last - first + 1);
+ resultPtr = Tcl_NewStringObj(string1, end - string1);
+ string2 = TclGetString(resultPtr) + (start - string1);
- length2 = length1 * count;
- if ((length2 / count) != length1) {
- Tcl_SetObjResult(interp, Tcl_ObjPrintf(
- "string size overflow, must be less than %d",
- INT_MAX));
- return TCL_ERROR;
- }
+ length2 = Tcl_UtfToUpper(string2);
+ Tcl_SetObjLength(resultPtr, length2 + (start - string1));
- /*
- * Include space for the NULL.
- */
+ Tcl_AppendToObj(resultPtr, end, -1);
+ Tcl_SetObjResult(interp, resultPtr);
+ }
- string2 = (char *) ckalloc((size_t) length2+1);
- for (index = 0; index < count; index++) {
- memcpy(string2 + (length1 * index), string1,
- (size_t) length1);
- }
- string2[length2] = '\0';
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringTitleCmd --
+ *
+ * This procedure is invoked to process the "string totitle" Tcl command.
+ * See the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
- /*
- * We have to directly assign this instead of using
- * Tcl_SetStringObj (and indirectly TclInitStringRep) because
- * that makes another copy of the data.
- */
+static int
+StringTitleCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ int length1, length2;
+ char *string1, *string2;
- TclNewObj(resultPtr);
- resultPtr->bytes = string2;
- resultPtr->length = length2;
- Tcl_SetObjResult(interp, resultPtr);
- }
- }
- break;
+ if (objc < 3 || objc > 5) {
+ Tcl_WrongNumArgs(interp, 2, objv, "string ?first? ?last?");
+ return TCL_ERROR;
}
- case STR_REPLACE: {
- Tcl_UniChar *ustring1;
+
+ string1 = TclGetStringFromObj(objv[2], &length1);
+
+ if (objc == 3) {
+ Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1);
+
+ length1 = Tcl_UtfToTitle(TclGetString(resultPtr));
+ Tcl_SetObjLength(resultPtr, length1);
+ Tcl_SetObjResult(interp, resultPtr);
+ } else {
int first, last;
+ const char *start, *end;
+ Tcl_Obj *resultPtr;
- if (objc < 5 || objc > 6) {
- Tcl_WrongNumArgs(interp, 2, objv, "string first last ?string?");
+ length1 = Tcl_NumUtfChars(string1, length1) - 1;
+ if (TclGetIntForIndexM(interp,objv[3],length1, &first) != TCL_OK) {
return TCL_ERROR;
}
+ if (first < 0) {
+ first = 0;
+ }
+ last = first;
- ustring1 = Tcl_GetUnicodeFromObj(objv[2], &length1);
- length1--;
-
- if (TclGetIntForIndexM(interp, objv[3], length1, &first) != TCL_OK ||
- TclGetIntForIndexM(interp, objv[4], length1, &last) != TCL_OK){
+ if ((objc == 5) && (TclGetIntForIndexM(interp, objv[4], length1,
+ &last) != TCL_OK)) {
return TCL_ERROR;
}
- if ((last < first) || (last < 0) || (first > length1)) {
- Tcl_SetObjResult(interp, objv[2]);
- } else {
- Tcl_Obj *resultPtr;
- if (first < 0) {
- first = 0;
- }
-
- resultPtr = Tcl_NewUnicodeObj(ustring1, first);
- if (objc == 6) {
- Tcl_AppendObjToObj(resultPtr, objv[5]);
- }
- if (last < length1) {
- Tcl_AppendUnicodeToObj(resultPtr, ustring1 + last + 1,
- length1 - last);
- }
- Tcl_SetObjResult(interp, resultPtr);
+ if (last >= length1) {
+ last = length1;
}
- break;
- }
- case STR_REVERSE: {
- if (objc != 3) {
- Tcl_WrongNumArgs(interp, 2, objv, "string");
- return TCL_ERROR;
+ if (last < first) {
+ Tcl_SetObjResult(interp, objv[2]);
+ return TCL_OK;
}
- Tcl_SetObjResult(interp, TclStringObjReverse(objv[2]));
- break;
+ start = Tcl_UtfAtIndex(string1, first);
+ end = Tcl_UtfAtIndex(start, last - first + 1);
+ resultPtr = Tcl_NewStringObj(string1, end - string1);
+ string2 = TclGetString(resultPtr) + (start - string1);
+
+ length2 = Tcl_UtfToTitle(string2);
+ Tcl_SetObjLength(resultPtr, length2 + (start - string1));
+
+ Tcl_AppendToObj(resultPtr, end, -1);
+ Tcl_SetObjResult(interp, resultPtr);
}
- case STR_TOLOWER:
- case STR_TOUPPER:
- case STR_TOTITLE:
- if (objc < 3 || objc > 5) {
- Tcl_WrongNumArgs(interp, 2, objv, "string ?first? ?last?");
- return TCL_ERROR;
- }
- string1 = TclGetStringFromObj(objv[2], &length1);
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringTrimCmd --
+ *
+ * This procedure is invoked to process the "string trim" Tcl command.
+ * See the user documentation for details on what it does. Note that this
+ * command only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
- if (objc == 3) {
- Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1);
- if ((enum options) index == STR_TOLOWER) {
- length1 = Tcl_UtfToLower(TclGetString(resultPtr));
- } else if ((enum options) index == STR_TOUPPER) {
- length1 = Tcl_UtfToUpper(TclGetString(resultPtr));
- } else {
- length1 = Tcl_UtfToTitle(TclGetString(resultPtr));
- }
- Tcl_SetObjLength(resultPtr, length1);
- Tcl_SetObjResult(interp, resultPtr);
- } else {
- int first, last;
- CONST char *start, *end;
- Tcl_Obj *resultPtr;
+static int
+StringTrimCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ Tcl_UniChar ch, trim;
+ register const char *p, *end;
+ const char *check, *checkEnd, *string1, *string2;
+ int offset, length1, length2;
- length1 = Tcl_NumUtfChars(string1, length1) - 1;
- if (TclGetIntForIndexM(interp,objv[3],length1, &first) != TCL_OK) {
- return TCL_ERROR;
- }
- if (first < 0) {
- first = 0;
- }
- last = first;
+ if (objc == 4) {
+ string2 = TclGetStringFromObj(objv[3], &length2);
+ } else if (objc == 3) {
+ string2 = " \t\n\r";
+ length2 = strlen(string2);
+ } else {
+ Tcl_WrongNumArgs(interp, 2, objv, "string ?chars?");
+ return TCL_ERROR;
+ }
+ string1 = TclGetStringFromObj(objv[2], &length1);
+ checkEnd = string2 + length2;
- if ((objc == 5) && (TclGetIntForIndexM(interp, objv[4], length1,
- &last) != TCL_OK)) {
- return TCL_ERROR;
- }
+ /*
+ * The outer loop iterates over the string. The inner loop iterates over
+ * the trim characters. The loops terminate as soon as a non-trim
+ * character is discovered and string1 is left pointing at the first
+ * non-trim character.
+ */
+
+ end = string1 + length1;
+ for (p = string1; p < end; p += offset) {
+ offset = TclUtfToUniChar(p, &ch);
- if (last >= length1) {
- last = length1;
+ for (check = string2; ; ) {
+ if (check >= checkEnd) {
+ p = end;
+ break;
}
- if (last < first) {
- Tcl_SetObjResult(interp, objv[2]);
+ check += TclUtfToUniChar(check, &trim);
+ if (ch == trim) {
+ length1 -= offset;
+ string1 += offset;
break;
}
+ }
+ }
- start = Tcl_UtfAtIndex(string1, first);
- end = Tcl_UtfAtIndex(start, last - first + 1);
- resultPtr = Tcl_NewStringObj(string1, end - string1);
- string2 = TclGetString(resultPtr) + (start - string1);
+ /*
+ * The outer loop iterates over the string. The inner loop iterates over
+ * the trim characters. The loops terminate as soon as a non-trim
+ * character is discovered and length1 marks the last non-trim character.
+ */
- if ((enum options) index == STR_TOLOWER) {
- length2 = Tcl_UtfToLower(string2);
- } else if ((enum options) index == STR_TOUPPER) {
- length2 = Tcl_UtfToUpper(string2);
- } else {
- length2 = Tcl_UtfToTitle(string2);
+ end = string1;
+ for (p = string1 + length1; p > end; ) {
+ p = Tcl_UtfPrev(p, string1);
+ offset = TclUtfToUniChar(p, &ch);
+ check = string2;
+ while (1) {
+ if (check >= checkEnd) {
+ p = end;
+ break;
+ }
+ check += TclUtfToUniChar(check, &trim);
+ if (ch == trim) {
+ length1 -= offset;
+ break;
}
- Tcl_SetObjLength(resultPtr, length2 + (start - string1));
-
- Tcl_AppendToObj(resultPtr, end, -1);
- Tcl_SetObjResult(interp, resultPtr);
}
- break;
+ }
- case STR_TRIMLEFT:
- left = 1;
- right = 0;
- goto dotrim;
- case STR_TRIMRIGHT:
- left = 0;
- right = 1;
- goto dotrim;
- case STR_TRIM: {
- Tcl_UniChar ch, trim;
- register CONST char *p, *end;
- char *check, *checkEnd;
- int offset;
-
- left = 1;
- right = 1;
-
- dotrim:
- if (objc == 4) {
- string2 = TclGetStringFromObj(objv[3], &length2);
- } else if (objc == 3) {
- string2 = " \t\n\r";
- length2 = strlen(string2);
- } else {
- Tcl_WrongNumArgs(interp, 2, objv, "string ?chars?");
- return TCL_ERROR;
- }
- string1 = TclGetStringFromObj(objv[2], &length1);
- checkEnd = string2 + length2;
+ Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringTrimLCmd --
+ *
+ * This procedure is invoked to process the "string trimleft" Tcl
+ * command. See the user documentation for details on what it does. Note
+ * that this command only functions correctly on properly formed Tcl UTF
+ * strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
- if (left) {
- end = string1 + length1;
- /*
- * The outer loop iterates over the string. The inner loop
- * iterates over the trim characters. The loops terminate as soon
- * as a non-trim character is discovered and string1 is left
- * pointing at the first non-trim character.
- */
+static int
+StringTrimLCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ Tcl_UniChar ch, trim;
+ register const char *p, *end;
+ const char *check, *checkEnd, *string1, *string2;
+ int offset, length1, length2;
- for (p = string1; p < end; p += offset) {
- offset = TclUtfToUniChar(p, &ch);
+ if (objc == 4) {
+ string2 = TclGetStringFromObj(objv[3], &length2);
+ } else if (objc == 3) {
+ string2 = " \t\n\r";
+ length2 = strlen(string2);
+ } else {
+ Tcl_WrongNumArgs(interp, 2, objv, "string ?chars?");
+ return TCL_ERROR;
+ }
+ string1 = TclGetStringFromObj(objv[2], &length1);
+ checkEnd = string2 + length2;
- for (check = string2; ; ) {
- if (check >= checkEnd) {
- p = end;
- break;
- }
- check += TclUtfToUniChar(check, &trim);
- if (ch == trim) {
- length1 -= offset;
- string1 += offset;
- break;
- }
- }
- }
- }
- if (right) {
- end = string1;
+ /*
+ * The outer loop iterates over the string. The inner loop iterates over
+ * the trim characters. The loops terminate as soon as a non-trim
+ * character is discovered and string1 is left pointing at the first
+ * non-trim character.
+ */
- /*
- * The outer loop iterates over the string. The inner loop
- * iterates over the trim characters. The loops terminate as soon
- * as a non-trim character is discovered and length1 marks the
- * last non-trim character.
- */
+ end = string1 + length1;
+ for (p = string1; p < end; p += offset) {
+ offset = TclUtfToUniChar(p, &ch);
- for (p = string1 + length1; p > end; ) {
- p = Tcl_UtfPrev(p, string1);
- offset = TclUtfToUniChar(p, &ch);
- check = string2;
- while (1) {
- if (check >= checkEnd) {
- p = end;
- break;
- }
- check += TclUtfToUniChar(check, &trim);
- if (ch == trim) {
- length1 -= offset;
- break;
- }
- }
+ for (check = string2; ; ) {
+ if (check >= checkEnd) {
+ p = end;
+ break;
+ }
+ check += TclUtfToUniChar(check, &trim);
+ if (ch == trim) {
+ length1 -= offset;
+ string1 += offset;
+ break;
}
}
- Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1));
- break;
}
- case STR_WORDEND: {
- int cur;
- Tcl_UniChar ch;
- CONST char *p, *end;
- int numChars;
- if (objc != 4) {
- Tcl_WrongNumArgs(interp, 2, objv, "string index");
- return TCL_ERROR;
- }
+ Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * StringTrimRCmd --
+ *
+ * This procedure is invoked to process the "string trimright" Tcl
+ * command. See the user documentation for details on what it does. Note
+ * that this command only functions correctly on properly formed Tcl UTF
+ * strings.
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
- string1 = TclGetStringFromObj(objv[2], &length1);
- numChars = Tcl_NumUtfChars(string1, length1);
- if (TclGetIntForIndexM(interp, objv[3], numChars-1, &index) != TCL_OK){
- return TCL_ERROR;
- }
- if (index < 0) {
- index = 0;
- }
- if (index < numChars) {
- p = Tcl_UtfAtIndex(string1, index);
- end = string1+length1;
- for (cur = index; p < end; cur++) {
- p += TclUtfToUniChar(p, &ch);
- if (!Tcl_UniCharIsWordChar(ch)) {
- break;
- }
- }
- if (cur == index) {
- cur++;
- }
- } else {
- cur = numChars;
- }
- Tcl_SetObjResult(interp, Tcl_NewIntObj(cur));
- break;
+static int
+StringTrimRCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ Tcl_UniChar ch, trim;
+ register const char *p, *end;
+ const char *check, *checkEnd, *string1, *string2;
+ int offset, length1, length2;
+
+ if (objc == 4) {
+ string2 = TclGetStringFromObj(objv[3], &length2);
+ } else if (objc == 3) {
+ string2 = " \t\n\r";
+ length2 = strlen(string2);
+ } else {
+ Tcl_WrongNumArgs(interp, 2, objv, "string ?chars?");
+ return TCL_ERROR;
}
- case STR_WORDSTART: {
- int cur;
- Tcl_UniChar ch;
- CONST char *p;
- int numChars;
+ string1 = TclGetStringFromObj(objv[2], &length1);
+ checkEnd = string2 + length2;
- if (objc != 4) {
- Tcl_WrongNumArgs(interp, 2, objv, "string index");
- return TCL_ERROR;
- }
+ /*
+ * The outer loop iterates over the string. The inner loop iterates over
+ * the trim characters. The loops terminate as soon as a non-trim
+ * character is discovered and length1 marks the last non-trim character.
+ */
- string1 = TclGetStringFromObj(objv[2], &length1);
- numChars = Tcl_NumUtfChars(string1, length1);
- if (TclGetIntForIndexM(interp, objv[3], numChars-1, &index) != TCL_OK){
- return TCL_ERROR;
- }
- if (index >= numChars) {
- index = numChars - 1;
- }
- cur = 0;
- if (index > 0) {
- p = Tcl_UtfAtIndex(string1, index);
- for (cur = index; cur >= 0; cur--) {
- TclUtfToUniChar(p, &ch);
- if (!Tcl_UniCharIsWordChar(ch)) {
- break;
- }
- p = Tcl_UtfPrev(p, string1);
+ end = string1;
+ for (p = string1 + length1; p > end; ) {
+ p = Tcl_UtfPrev(p, string1);
+ offset = TclUtfToUniChar(p, &ch);
+ check = string2;
+ while (1) {
+ if (check >= checkEnd) {
+ p = end;
+ break;
}
- if (cur != index) {
- cur += 1;
+ check += TclUtfToUniChar(check, &trim);
+ if (ch == trim) {
+ length1 -= offset;
+ break;
}
}
- Tcl_SetObjResult(interp, Tcl_NewIntObj(cur));
- break;
- }
}
+
+ Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1));
return TCL_OK;
}
-static int
-UniCharIsAscii(
- int character)
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_StringObjCmd --
+ *
+ * This procedure is invoked to process the "string" Tcl command. See the
+ * user documentation for details on what it does. Note that this command
+ * only functions correctly on properly formed Tcl UTF strings.
+ *
+ * Note that the primary methods here (equal, compare, match, ...) have
+ * bytecode equivalents. You will find the code for those in
+ * tclExecute.c. The code here will only be used in the non-bc case (like
+ * in an 'eval').
+ *
+ * Results:
+ * A standard Tcl result.
+ *
+ * Side effects:
+ * See the user documentation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_StringObjCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
{
- return (character >= 0) && (character < 0x80);
+ static const EnsembleImplMap stringImplMap[] = {
+ {"bytelength", StringBytesCmd, NULL},
+ {"compare", StringCmpCmd, NULL},
+ {"equal", StringEqualCmd, NULL},
+ {"first", StringFirstCmd, NULL},
+ {"index", StringIndexCmd, NULL},
+ {"is", StringIsCmd, NULL},
+ {"last", StringLastCmd, NULL},
+ {"length", StringLenCmd, NULL},
+ {"map", StringMapCmd, NULL},
+ {"match", StringMatchCmd, NULL},
+ {"range", StringRangeCmd, NULL},
+ {"repeat", StringReptCmd, NULL},
+ {"replace", StringRplcCmd, NULL},
+ {"reverse", StringRevCmd, NULL},
+ {"tolower", StringLowerCmd, NULL},
+ {"toupper", StringUpperCmd, NULL},
+ {"totitle", StringTitleCmd, NULL},
+ {"trim", StringTrimCmd, NULL},
+ {"trimleft", StringTrimLCmd, NULL},
+ {"trimright", StringTrimRCmd, NULL},
+ {"wordend", StringEndCmd, NULL},
+ {"wordstart", StringStartCmd, NULL},
+ {NULL}
+ };
+
+ int index;
+
+ if (objc < 2) {
+ Tcl_WrongNumArgs(interp, 1, objv, "option arg ?arg ...?");
+ return TCL_ERROR;
+ }
+ if (Tcl_GetIndexFromObjStruct(interp, objv[1], &stringImplMap[0].name,
+ sizeof(EnsembleImplMap), "option", 0, &index) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ return stringImplMap[index].proc(dummy, interp, objc, objv);
}
/*