diff options
-rw-r--r-- | generic/tclCmdMZ.c | 212 | ||||
-rw-r--r-- | generic/tclExecute.c | 90 | ||||
-rw-r--r-- | generic/tclInt.h | 14 | ||||
-rw-r--r-- | generic/tclStringObj.c | 220 |
4 files changed, 285 insertions, 251 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 7c979bb..efe4869 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -2546,10 +2546,8 @@ StringEqualCmd( * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...). */ - const char *string1, *string2; - int length1, length2, i, match, length, nocase = 0, reqlength = -1; - typedef int (*strCmpFn_t)(const char *, const char *, unsigned int); - strCmpFn_t strCmpFn; + const char *string2; + int length, i, match, nocase = 0, reqlength = -1; if (objc < 3 || objc > 6) { str_cmp_args: @@ -2559,11 +2557,11 @@ StringEqualCmd( } for (i = 1; i < objc-2; i++) { - string2 = TclGetStringFromObj(objv[i], &length2); - if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) { + string2 = TclGetStringFromObj(objv[i], &length); + if ((length > 1) && !strncmp(string2, "-nocase", (size_t)length)) { nocase = 1; - } else if ((length2 > 1) - && !strncmp(string2, "-length", (size_t)length2)) { + } else if ((length > 1) + && !strncmp(string2, "-length", (size_t)length)) { if (i+1 >= objc-2) { goto str_cmp_args; } @@ -2588,78 +2586,7 @@ StringEqualCmd( objv += objc-2; - if ((reqlength == 0) || (objv[0] == objv[1])) { - /* - * Always match at 0 chars of if it is the same obj. - */ - - Tcl_SetObjResult(interp, Tcl_NewBooleanObj(1)); - return TCL_OK; - } - - if (!nocase && TclIsPureByteArray(objv[0]) && - TclIsPureByteArray(objv[1])) { - /* - * Use binary versions of comparisons since that won't cause undue - * type conversions and it is much faster. Only do this if we're - * case-sensitive (which is all that really makes sense with byte - * arrays anyway, and we have no memcasecmp() for some reason... :^) - */ - - string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1); - string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2); - strCmpFn = (strCmpFn_t) memcmp; - } else if ((objv[0]->typePtr == &tclStringType) - && (objv[1]->typePtr == &tclStringType)) { - /* - * Do a unicode-specific comparison if both of the args are of String - * type. In benchmark testing this proved the most efficient check - * between the unicode and string comparison operations. - */ - - string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1); - string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2); - strCmpFn = (strCmpFn_t) - (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); - } else { - /* - * As a catch-all we will work with UTF-8. We cannot use memcmp() as - * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's - * utf rep). We can use the more efficient TclpUtfNcmp2 if we are - * case-sensitive and no specific length was requested. - */ - - string1 = (char *) TclGetStringFromObj(objv[0], &length1); - string2 = (char *) TclGetStringFromObj(objv[1], &length2); - if ((reqlength < 0) && !nocase) { - strCmpFn = (strCmpFn_t) TclpUtfNcmp2; - } else { - length1 = Tcl_NumUtfChars(string1, length1); - length2 = Tcl_NumUtfChars(string2, length2); - strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); - } - } - - if ((reqlength < 0) && (length1 != length2)) { - match = 1; /* This will be reversed below. */ - } else { - length = (length1 < length2) ? length1 : length2; - if (reqlength > 0 && reqlength < length) { - length = reqlength; - } else if (reqlength < 0) { - /* - * The requested length is negative, so we ignore it by setting it - * to length + 1 so we correct the match var. - */ - - reqlength = length + 1; - } - - match = strCmpFn(string1, string2, (unsigned) length); - if ((match == 0) && (reqlength > length)) { - match = length1 - length2; - } - } + match = TclStringCmp (objv[0], objv[1], 0, nocase, reqlength); Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1)); return TCL_OK; @@ -2696,11 +2623,33 @@ StringCmpCmd( * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...). */ - const char *string1, *string2; - int length1, length2, i, match, length, nocase = 0, reqlength = -1; - typedef int (*strCmpFn_t)(const char *, const char *, unsigned int); - strCmpFn_t strCmpFn; + int match, nocase, reqlength, status; + + if ((status = TclStringCmpOpts(interp, objc, objv, &nocase, &reqlength)) + != TCL_OK) { + + return status; + } + + objv += objc-2; + match = TclStringCmp (objv[0], objv[1], 0, nocase, reqlength); + Tcl_SetObjResult(interp, Tcl_NewIntObj(match)); + return TCL_OK; +} + +int TclStringCmpOpts ( + Tcl_Interp *interp, /* Current interpreter. */ + int objc, /* Number of arguments. */ + Tcl_Obj *const objv[], /* Argument objects. */ + int *nocase, + int *reqlength +) +{ + int i, length; + const char *string; + *reqlength = -1; + *nocase = 0; if (objc < 3 || objc > 6) { str_cmp_args: Tcl_WrongNumArgs(interp, 1, objv, @@ -2709,106 +2658,27 @@ StringCmpCmd( } for (i = 1; i < objc-2; i++) { - string2 = TclGetStringFromObj(objv[i], &length2); - if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) { - nocase = 1; - } else if ((length2 > 1) - && !strncmp(string2, "-length", (size_t)length2)) { + string = TclGetStringFromObj(objv[i], &length); + if ((length > 1) && !strncmp(string, "-nocase", (size_t)length)) { + *nocase = 1; + } else if ((length > 1) + && !strncmp(string, "-length", (size_t)length)) { if (i+1 >= objc-2) { goto str_cmp_args; } i++; - if (TclGetIntFromObj(interp, objv[i], &reqlength) != TCL_OK) { + if (TclGetIntFromObj(interp, objv[i], reqlength) != TCL_OK) { return TCL_ERROR; } } else { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "bad option \"%s\": must be -nocase or -length", - string2)); + string)); Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "INDEX", "option", - string2, NULL); + string, NULL); return TCL_ERROR; } } - - /* - * From now on, we only access the two objects at the end of the argument - * array. - */ - - objv += objc-2; - - if ((reqlength == 0) || (objv[0] == objv[1])) { - /* - * Always match at 0 chars of if it is the same obj. - */ - - Tcl_SetObjResult(interp, Tcl_NewBooleanObj(0)); - return TCL_OK; - } - - if (!nocase && TclIsPureByteArray(objv[0]) && - TclIsPureByteArray(objv[1])) { - /* - * Use binary versions of comparisons since that won't cause undue - * type conversions and it is much faster. Only do this if we're - * case-sensitive (which is all that really makes sense with byte - * arrays anyway, and we have no memcasecmp() for some reason... :^) - */ - - string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1); - string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2); - strCmpFn = (strCmpFn_t) memcmp; - } else if ((objv[0]->typePtr == &tclStringType) - && (objv[1]->typePtr == &tclStringType)) { - /* - * Do a unicode-specific comparison if both of the args are of String - * type. In benchmark testing this proved the most efficient check - * between the unicode and string comparison operations. - */ - - string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1); - string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2); - strCmpFn = (strCmpFn_t) - (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); - } else { - /* - * As a catch-all we will work with UTF-8. We cannot use memcmp() as - * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's - * utf rep). We can use the more efficient TclpUtfNcmp2 if we are - * case-sensitive and no specific length was requested. - */ - - string1 = (char *) TclGetStringFromObj(objv[0], &length1); - string2 = (char *) TclGetStringFromObj(objv[1], &length2); - if ((reqlength < 0) && !nocase) { - strCmpFn = (strCmpFn_t) TclpUtfNcmp2; - } else { - length1 = Tcl_NumUtfChars(string1, length1); - length2 = Tcl_NumUtfChars(string2, length2); - strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); - } - } - - length = (length1 < length2) ? length1 : length2; - if (reqlength > 0 && reqlength < length) { - length = reqlength; - } else if (reqlength < 0) { - /* - * The requested length is negative, so we ignore it by setting it to - * length + 1 so we correct the match var. - */ - - reqlength = length + 1; - } - - match = strCmpFn(string1, string2, (unsigned) length); - if ((match == 0) && (reqlength > length)) { - match = length1 - length2; - } - - Tcl_SetObjResult(interp, - Tcl_NewIntObj((match > 0) ? 1 : (match < 0) ? -1 : 0)); return TCL_OK; } diff --git a/generic/tclExecute.c b/generic/tclExecute.c index b5b2918..fda50b2 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5088,88 +5088,10 @@ TEBCresume( value2Ptr = OBJ_AT_TOS; valuePtr = OBJ_UNDER_TOS; - if (valuePtr == value2Ptr) { - match = 0; - } else { - /* - * We only need to check (in)equality when we have equal length - * strings. We can use memcmp in all (n)eq cases because we - * don't need to worry about lexical LE/BE variance. - */ - - typedef int (*memCmpFn_t)(const void*, const void*, size_t); - memCmpFn_t memCmpFn; + { int checkEq = ((*pc == INST_EQ) || (*pc == INST_NEQ) || (*pc == INST_STR_EQ) || (*pc == INST_STR_NEQ)); - - if (TclIsPureByteArray(valuePtr) - && TclIsPureByteArray(value2Ptr)) { - s1 = (char *) Tcl_GetByteArrayFromObj(valuePtr, &s1len); - s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); - memCmpFn = memcmp; - } else if ((valuePtr->typePtr == &tclStringType) - && (value2Ptr->typePtr == &tclStringType)) { - /* - * Do a unicode-specific comparison if both of the args are of - * String type. If the char length == byte length, we can do a - * memcmp. In benchmark testing this proved the most efficient - * check between the unicode and string comparison operations. - */ - - s1len = Tcl_GetCharLength(valuePtr); - s2len = Tcl_GetCharLength(value2Ptr); - if ((s1len == valuePtr->length) - && (valuePtr->bytes != NULL) - && (s2len == value2Ptr->length) - && (value2Ptr->bytes != NULL)) { - s1 = valuePtr->bytes; - s2 = value2Ptr->bytes; - memCmpFn = memcmp; - } else { - s1 = (char *) Tcl_GetUnicode(valuePtr); - s2 = (char *) Tcl_GetUnicode(value2Ptr); - if ( -#ifdef WORDS_BIGENDIAN - 1 -#else - checkEq -#endif - ) { - memCmpFn = memcmp; - s1len *= sizeof(Tcl_UniChar); - s2len *= sizeof(Tcl_UniChar); - } else { - memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp; - } - } - } else { - /* - * strcmp can't do a simple memcmp in order to handle the - * special Tcl \xC0\x80 null encoding for utf-8. - */ - - s1 = TclGetStringFromObj(valuePtr, &s1len); - s2 = TclGetStringFromObj(value2Ptr, &s2len); - if (checkEq) { - memCmpFn = memcmp; - } else { - memCmpFn = (memCmpFn_t) TclpUtfNcmp2; - } - } - - if (checkEq && (s1len != s2len)) { - match = 1; - } else { - /* - * The comparison function should compare up to the minimum - * byte length only. - */ - match = memCmpFn(s1, s2, - (size_t) ((s1len < s2len) ? s1len : s2len)); - if (match == 0) { - match = s1len - s2len; - } - } + match = TclStringCmp(valuePtr, value2Ptr, checkEq, 0, -1); } /* @@ -5739,6 +5661,14 @@ TEBCresume( value2Ptr = OBJ_AT_TOS; valuePtr = OBJ_UNDER_TOS; + /* + Try to determine, without triggering generation of a string + representation, whether one value is not a number. + */ + if (TclCheckEmptyString(valuePtr) > 0 || TclCheckEmptyString(value2Ptr) > 0) { + goto stringCompare; + } + if (GetNumberFromObj(NULL, valuePtr, &ptr1, &type1) != TCL_OK || GetNumberFromObj(NULL, value2Ptr, &ptr2, &type2) != TCL_OK) { /* diff --git a/generic/tclInt.h b/generic/tclInt.h index e9afbbb..4a8b49d 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2760,6 +2760,10 @@ MODULE_SCOPE long tclObjsShared[TCL_MAX_SHARED_OBJ_STATS]; MODULE_SCOPE char tclEmptyString; +enum CheckEmptyStringResult { + TCL_EMPTYSTRING_UNKNOWN = -1, TCL_EMPTYSTRING_NO, TCL_EMPTYSTRING_YES +}; + /* *---------------------------------------------------------------- * Procedures shared among Tcl modules but not used by the outside world, @@ -2900,6 +2904,7 @@ MODULE_SCOPE int TclCheckArrayTraces(Tcl_Interp *interp, Var *varPtr, Var *arrayPtr, Tcl_Obj *name, int index); MODULE_SCOPE int TclCheckBadOctal(Tcl_Interp *interp, const char *value); +MODULE_SCOPE int TclCheckEmptyString(Tcl_Obj *objPtr); MODULE_SCOPE int TclChanCaughtErrorBypass(Tcl_Interp *interp, Tcl_Channel chan); MODULE_SCOPE Tcl_ObjCmdProc TclChannelNamesCmd; @@ -3185,6 +3190,12 @@ MODULE_SCOPE void TclSpellFix(Tcl_Interp *interp, Tcl_Obj *bad, Tcl_Obj *fix); MODULE_SCOPE void * TclStackRealloc(Tcl_Interp *interp, void *ptr, int numBytes); + +typedef int (*memCmpFn_t)(const void*, const void*, size_t); +MODULE_SCOPE int TclStringCmp (Tcl_Obj *value1Ptr, Tcl_Obj *value2Ptr, + int checkEq, int nocase, int reqlength); +MODULE_SCOPE int TclStringCmpOpts (Tcl_Interp *interp, int objc, Tcl_Obj *const objv[], + int *nocase, int *reqlength); MODULE_SCOPE int TclStringMatch(const char *str, int strLen, const char *pattern, int ptnLen, int flags); MODULE_SCOPE int TclStringMatchObj(Tcl_Obj *stringObj, @@ -4540,6 +4551,9 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, const char *file, */ MODULE_SCOPE int TclIsPureByteArray(Tcl_Obj *objPtr); +#define TclIsPureDict(objPtr) \ + (((objPtr)->bytes==NULL) && ((objPtr)->typePtr==&tclDictType)) + /* *---------------------------------------------------------------- diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 5915ce0..fd201ca 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -436,6 +436,7 @@ Tcl_GetCharLength( return length; } + /* * OK, need to work with the object as a string. */ @@ -454,6 +455,50 @@ Tcl_GetCharLength( } return numChars; } + + + +/* + *---------------------------------------------------------------------- + * + * TclCheckEmptyString -- + * + * Determine whether the string value of an object is or would be the + * empty string, without generating a string representation. + * + * Results: + * Returns 1 if empty, 0 if not, and -1 if unknown. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ +int +TclCheckEmptyString ( + Tcl_Obj *objPtr +) { + int length = -1; + + if (objPtr->bytes == &tclEmptyString) { + return TCL_EMPTYSTRING_YES; + } + + if (TclListObjIsCanonical(objPtr)) { + Tcl_ListObjLength(NULL, objPtr, &length); + return length == 0; + } + + if (TclIsPureDict(objPtr)) { + Tcl_DictObjSize(NULL, objPtr, &length); + return length == 0; + } + + if (objPtr->bytes == NULL) { + return TCL_EMPTYSTRING_UNKNOWN; + } + return objPtr->length == 0; +} /* *---------------------------------------------------------------------- @@ -3271,6 +3316,181 @@ TclStringCat( /* *--------------------------------------------------------------------------- * + * TclStringCmp -- + * Compare two Tcl_Obj values as strings. + * + * Results: + * Like memcmp, return -1, 0, or 1. + * + * Side effects: + * String representations may be generated. Internal representation may + * be changed. + * + *--------------------------------------------------------------------------- + */ + +int TclStringCmp ( + Tcl_Obj *value1Ptr, + Tcl_Obj *value2Ptr, + int checkEq, /* comparison is only for equality */ + int nocase, /* comparison is not case sensitive */ + int reqlength /* requested length */ +) { + char *s1, *s2; + int empty, length, match, s1len, s2len; + memCmpFn_t memCmpFn; + + if ((reqlength == 0) || (value1Ptr == value2Ptr)) { + /* + * Always match at 0 chars of if it is the same obj. + */ + match = 0; + } else { + + if (!nocase && TclIsPureByteArray(value1Ptr) + && TclIsPureByteArray(value2Ptr)) { + /* + * Use binary versions of comparisons since that won't cause undue + * type conversions and it is much faster. Only do this if we're + * case-sensitive (which is all that really makes sense with byte + * arrays anyway, and we have no memcasecmp() for some reason... :^) + */ + s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); + s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); + memCmpFn = memcmp; + } else if ((value1Ptr->typePtr == &tclStringType) + && (value2Ptr->typePtr == &tclStringType)) { + /* + * Do a unicode-specific comparison if both of the args are of + * String type. If the char length == byte length, we can do a + * memcmp. In benchmark testing this proved the most efficient + * check between the unicode and string comparison operations. + */ + + if (nocase) { + s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len); + s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len); + memCmpFn = (memCmpFn_t)Tcl_UniCharNcasecmp; + } else { + s1len = Tcl_GetCharLength(value1Ptr); + s2len = Tcl_GetCharLength(value2Ptr); + if ((s1len == value1Ptr->length) + && (value1Ptr->bytes != NULL) + && (s2len == value2Ptr->length) + && (value2Ptr->bytes != NULL)) { + s1 = value1Ptr->bytes; + s2 = value2Ptr->bytes; + memCmpFn = memcmp; + } else { + s1 = (char *) Tcl_GetUnicode(value1Ptr); + s2 = (char *) Tcl_GetUnicode(value2Ptr); + if ( +#ifdef WORDS_BIGENDIAN + 1 +#else + checkEq +#endif + ) { + memCmpFn = memcmp; + s1len *= sizeof(Tcl_UniChar); + s2len *= sizeof(Tcl_UniChar); + } else { + memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp; + } + } + } + } else { + if ((empty = TclCheckEmptyString(value1Ptr)) > 0) { + switch (TclCheckEmptyString(value2Ptr)) { + case -1: + s1 = 0; + s1len = 0; + s2 = TclGetStringFromObj(value2Ptr, &s2len); + break; + case 0: + match = -1; + goto matchdone; + case 1: + match = 0; + goto matchdone; + } + } else if (TclCheckEmptyString(value2Ptr) > 0) { + switch (empty) { + case -1: + s2 = 0; + s2len = 0; + s1 = TclGetStringFromObj(value1Ptr, &s1len); + break; + case 0: + match = 1; + goto matchdone; + case 1: + match = 0; + goto matchdone; + } + } else { + s1 = TclGetStringFromObj(value1Ptr, &s1len); + s2 = TclGetStringFromObj(value2Ptr, &s2len); + } + if (!nocase && checkEq) { + /* + * When we have equal-length we can check only for (in)equality. + * We can use memcmp in all (n)eq cases because we + * don't need to worry about lexical LE/BE variance. + */ + memCmpFn = memcmp; + } else { + + /* + * As a catch-all we will work with UTF-8. We cannot use memcmp() as + * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's + * utf rep). We can use the more efficient TclpUtfNcmp2 if we are + * case-sensitive and no specific length was requested. + */ + + if ((reqlength < 0) && !nocase) { + memCmpFn = (memCmpFn_t) TclpUtfNcmp2; + } else { + s1len = Tcl_NumUtfChars(s1, s1len); + s2len = Tcl_NumUtfChars(s2, s2len); + memCmpFn = (memCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); + } + } + } + + length = (s1len < s2len) ? s1len : s2len; + if (reqlength > 0 && reqlength < length) { + length = reqlength; + } else if (reqlength < 0) { + /* + * The requested length is negative, so we ignore it by setting it to + * length + 1 so we correct the match var. + */ + + reqlength = length + 1; + } + + if (checkEq && (s1len != s2len)) { + match = 1; /* This will be reversed below. */ + } else { + /* + * The comparison function should compare up to the minimum + * byte length only. + */ + match = memCmpFn(s1, s2, (size_t) length); + } + if ((match == 0) && (reqlength > length)) { + match = s1len - s2len; + } + match = (match > 0) ? 1 : (match < 0) ? -1 : 0; + } + matchdone: + return match; +} + +/* + *--------------------------------------------------------------------------- + * * TclStringFirst -- * * Implements the [string first] operation. |