diff options
Diffstat (limited to 'generic/tclStringObj.c')
| -rw-r--r-- | generic/tclStringObj.c | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index a330a14..39358eb 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -436,6 +436,7 @@ Tcl_GetCharLength( return length; } + /* * OK, need to work with the object as a string. */ @@ -454,6 +455,50 @@ Tcl_GetCharLength( } return numChars; } + + + +/* + *---------------------------------------------------------------------- + * + * TclCheckEmptyString -- + * + * Determine whether the string value of an object is or would be the + * empty string, without generating a string representation. + * + * Results: + * Returns 1 if empty, 0 if not, and -1 if unknown. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ +int +TclCheckEmptyString ( + Tcl_Obj *objPtr +) { + int length = -1; + + if (objPtr->bytes == &tclEmptyString) { + return TCL_EMPTYSTRING_YES; + } + + if (TclListObjIsCanonical(objPtr)) { + Tcl_ListObjLength(NULL, objPtr, &length); + return length == 0; + } + + if (TclIsPureDict(objPtr)) { + Tcl_DictObjSize(NULL, objPtr, &length); + return length == 0; + } + + if (objPtr->bytes == NULL) { + return TCL_EMPTYSTRING_UNKNOWN; + } + return objPtr->length == 0; +} /* *---------------------------------------------------------------------- @@ -3237,6 +3282,183 @@ TclStringCat( /* *--------------------------------------------------------------------------- * + * TclStringCmp -- + * Compare two Tcl_Obj values as strings. + * + * Results: + * Like memcmp, return -1, 0, or 1. + * + * Side effects: + * String representations may be generated. Internal representation may + * be changed. + * + *--------------------------------------------------------------------------- + */ + +int TclStringCmp ( + Tcl_Obj *value1Ptr, + Tcl_Obj *value2Ptr, + int checkEq, /* comparison is only for equality */ + int nocase, /* comparison is not case sensitive */ + int reqlength /* requested length */ +) { + char *s1, *s2; + int empty, length, match, s1len, s2len; + memCmpFn_t memCmpFn; + + if ((reqlength == 0) || (value1Ptr == value2Ptr)) { + /* + * Always match at 0 chars of if it is the same obj. + */ + match = 0; + } else { + + if (!nocase && TclIsPureByteArray(value1Ptr) + && TclIsPureByteArray(value2Ptr)) { + /* + * Use binary versions of comparisons since that won't cause undue + * type conversions and it is much faster. Only do this if we're + * case-sensitive (which is all that really makes sense with byte + * arrays anyway, and we have no memcasecmp() for some reason... :^) + */ + s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); + s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); + memCmpFn = memcmp; + } else if ((value1Ptr->typePtr == &tclStringType) + && (value2Ptr->typePtr == &tclStringType)) { + /* + * Do a unicode-specific comparison if both of the args are of + * String type. If the char length == byte length, we can do a + * memcmp. In benchmark testing this proved the most efficient + * check between the unicode and string comparison operations. + */ + + if (nocase) { + s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len); + s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len); + memCmpFn = (memCmpFn_t)Tcl_UniCharNcasecmp; + } else { + s1len = Tcl_GetCharLength(value1Ptr); + s2len = Tcl_GetCharLength(value2Ptr); + if ((s1len == value1Ptr->length) + && (value1Ptr->bytes != NULL) + && (s2len == value2Ptr->length) + && (value2Ptr->bytes != NULL)) { + s1 = value1Ptr->bytes; + s2 = value2Ptr->bytes; + memCmpFn = memcmp; + } else { + s1 = (char *) Tcl_GetUnicode(value1Ptr); + s2 = (char *) Tcl_GetUnicode(value2Ptr); + if ( +#ifdef WORDS_BIGENDIAN + 1 +#else + checkEq +#endif + ) { + memCmpFn = memcmp; + s1len *= sizeof(Tcl_UniChar); + s2len *= sizeof(Tcl_UniChar); + } else { + memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp; + } + } + } + } else { + if ((empty = TclCheckEmptyString(value1Ptr)) > 0) { + switch (TclCheckEmptyString(value2Ptr)) { + case -1: + s1 = 0; + s1len = 0; + s2 = TclGetStringFromObj(value2Ptr, &s2len); + break; + case 0: + match = -1; + goto matchdone; + case 1: + default: /* avoid warn: `s2` may be used uninitialized */ + match = 0; + goto matchdone; + } + } else if (TclCheckEmptyString(value2Ptr) > 0) { + switch (empty) { + case -1: + s2 = 0; + s2len = 0; + s1 = TclGetStringFromObj(value1Ptr, &s1len); + break; + case 0: + match = 1; + goto matchdone; + case 1: + default: /* avoid warn: `s1` may be used uninitialized */ + match = 0; + goto matchdone; + } + } else { + s1 = TclGetStringFromObj(value1Ptr, &s1len); + s2 = TclGetStringFromObj(value2Ptr, &s2len); + } + if (!nocase && checkEq) { + /* + * When we have equal-length we can check only for (in)equality. + * We can use memcmp in all (n)eq cases because we + * don't need to worry about lexical LE/BE variance. + */ + memCmpFn = memcmp; + } else { + + /* + * As a catch-all we will work with UTF-8. We cannot use memcmp() as + * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's + * utf rep). We can use the more efficient TclpUtfNcmp2 if we are + * case-sensitive and no specific length was requested. + */ + + if ((reqlength < 0) && !nocase) { + memCmpFn = (memCmpFn_t) TclpUtfNcmp2; + } else { + s1len = Tcl_NumUtfChars(s1, s1len); + s2len = Tcl_NumUtfChars(s2, s2len); + memCmpFn = (memCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); + } + } + } + + length = (s1len < s2len) ? s1len : s2len; + if (reqlength > 0 && reqlength < length) { + length = reqlength; + } else if (reqlength < 0) { + /* + * The requested length is negative, so we ignore it by setting it to + * length + 1 so we correct the match var. + */ + + reqlength = length + 1; + } + + if (checkEq && (s1len != s2len)) { + match = 1; /* This will be reversed below. */ + } else { + /* + * The comparison function should compare up to the minimum + * byte length only. + */ + match = memCmpFn(s1, s2, (size_t) length); + } + if ((match == 0) && (reqlength > length)) { + match = s1len - s2len; + } + match = (match > 0) ? 1 : (match < 0) ? -1 : 0; + } + matchdone: + return match; +} + +/* + *--------------------------------------------------------------------------- + * * TclStringFirst -- * * Implements the [string first] operation. |
