diff options
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r-- | generic/tclStringObj.c | 279 |
1 files changed, 157 insertions, 122 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 42c4514..f9ac8d7 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -436,7 +436,6 @@ Tcl_GetCharLength( return length; } - /* * OK, need to work with the object as a string. */ @@ -455,8 +454,6 @@ Tcl_GetCharLength( } return numChars; } - - /* *---------------------------------------------------------------------- @@ -475,9 +472,9 @@ Tcl_GetCharLength( *---------------------------------------------------------------------- */ int -TclCheckEmptyString ( - Tcl_Obj *objPtr -) { +TclCheckEmptyString( + Tcl_Obj *objPtr) +{ int length = -1; if (objPtr->bytes == &tclEmptyString) { @@ -570,20 +567,22 @@ Tcl_GetUniChar( if (index >= stringPtr->numChars) { return -1; } - ch = stringPtr->unicode[index]; + ch = stringPtr->unicode[index]; #if TCL_UTF_MAX <= 4 - /* See: bug [11ae2be95dac9417] */ - if ((ch&0xF800) == 0xD800) { - if (ch&0x400) { - if ((index > 0) && ((stringPtr->unicode[index-1]&0xFC00) == 0xD800)) { - ch = -1; /* low surrogate preceded by high surrogate */ - } - } else if ((++index < stringPtr->numChars) - && ((stringPtr->unicode[index]&0xFC00) == 0xDC00)) { - /* high surrogate followed by low surrogate */ - ch = (((ch & 0x3FF) << 10) | (stringPtr->unicode[index] & 0x3FF)) + 0x10000; + /* See: bug [11ae2be95dac9417] */ + if ((ch & 0xF800) == 0xD800) { + if (ch & 0x400) { + if ((index > 0) + && ((stringPtr->unicode[index-1] & 0xFC00) == 0xD800)) { + ch = -1; /* low surrogate preceded by high surrogate */ } + } else if ((++index < stringPtr->numChars) + && ((stringPtr->unicode[index] & 0xFC00) == 0xDC00)) { + /* high surrogate followed by low surrogate */ + ch = (((ch & 0x3FF) << 10) | + (stringPtr->unicode[index] & 0x3FF)) + 0x10000; } + } #endif return ch; } @@ -689,7 +688,8 @@ Tcl_GetRange( if (first < 0) { first = 0; - } + } + /* * Optimize the case where we're really dealing with a bytearray object * we don't need to convert to a string to perform the substring operation. @@ -697,13 +697,14 @@ Tcl_GetRange( if (TclIsPureByteArray(objPtr)) { unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + if (last >= length) { last = length - 1; } if (last < first) { return Tcl_NewObj(); } - return Tcl_NewByteArrayObj(bytes+first, last-first+1); + return Tcl_NewByteArrayObj(bytes + first, last - first + 1); } /* @@ -742,24 +743,25 @@ Tcl_GetRange( FillUnicodeRep(objPtr); stringPtr = GET_STRING(objPtr); } - if (last > stringPtr->numChars) { - last = stringPtr->numChars; - } - if (last < first) { - return Tcl_NewObj(); - } + if (last > stringPtr->numChars) { + last = stringPtr->numChars; + } + if (last < first) { + return Tcl_NewObj(); + } #if TCL_UTF_MAX <= 4 - /* See: bug [11ae2be95dac9417] */ - if ((first>0) && ((stringPtr->unicode[first]&0xFC00) == 0xDC00) - && ((stringPtr->unicode[first-1]&0xFC00) == 0xD800)) { - ++first; - } - if ((last+1<stringPtr->numChars) && ((stringPtr->unicode[last+1]&0xFC00) == 0xDC00) - && ((stringPtr->unicode[last]&0xFC00) == 0xD800)) { - ++last; - } + /* See: bug [11ae2be95dac9417] */ + if ((first > 0) && ((stringPtr->unicode[first] & 0xFC00) == 0xDC00) + && ((stringPtr->unicode[first-1] & 0xFC00) == 0xD800)) { + ++first; + } + if ((last + 1 < stringPtr->numChars) + && ((stringPtr->unicode[last+1] & 0xFC00) == 0xDC00) + && ((stringPtr->unicode[last] & 0xFC00) == 0xD800)) { + ++last; + } #endif - return Tcl_NewUnicodeObj(stringPtr->unicode + first, last-first+1); + return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1); } /* @@ -1317,45 +1319,51 @@ Tcl_AppendObjToObj( /* * Handle append of one bytearray object to another as a special case. * Note that we only do this when the objects are pure so that the - * bytearray faithfully represent the true value; Otherwise - * appending the byte arrays together could lose information; + * bytearray faithfully represent the true value; Otherwise appending the + * byte arrays together could lose information; */ if ((TclIsPureByteArray(objPtr) || objPtr->bytes == &tclEmptyString) && TclIsPureByteArray(appendObjPtr)) { - /* * You might expect the code here to be * * bytes = Tcl_GetByteArrayFromObj(appendObjPtr, &length); * TclAppendBytesToByteArray(objPtr, bytes, length); * - * and essentially all of the time that would be fine. However, - * it would run into trouble in the case where objPtr and - * appendObjPtr point to the same thing. That may never be a - * good idea. It seems to violate Copy On Write, and we don't - * have any tests for the situation, since making any Tcl commands - * that call Tcl_AppendObjToObj() do that appears impossible - * (They honor Copy On Write!). For the sake of extensions that - * go off into that realm, though, here's a more complex approach - * that can handle all the cases. + * and essentially all of the time that would be fine. However, it + * would run into trouble in the case where objPtr and appendObjPtr + * point to the same thing. That may never be a good idea. It seems to + * violate Copy On Write, and we don't have any tests for the + * situation, since making any Tcl commands that call + * Tcl_AppendObjToObj() do that appears impossible (They honor Copy On + * Write!). For the sake of extensions that go off into that realm, + * though, here's a more complex approach that can handle all the + * cases. + * + * First, get the lengths. */ - /* Get lengths */ int lengthSrc; (void) Tcl_GetByteArrayFromObj(objPtr, &length); (void) Tcl_GetByteArrayFromObj(appendObjPtr, &lengthSrc); - /* Grow buffer enough for the append */ + /* + * Grow buffer enough for the append. + */ + TclAppendBytesToByteArray(objPtr, NULL, lengthSrc); - /* Reset objPtr back to the original value */ + /* + * Reset objPtr back to the original value. + */ + Tcl_SetByteArrayLength(objPtr, length); /* - * Now do the append knowing that buffer growth cannot cause - * any trouble. + * Now do the append knowing that buffer growth cannot cause any + * trouble. */ TclAppendBytesToByteArray(objPtr, @@ -1403,6 +1411,7 @@ Tcl_AppendObjToObj( numChars = stringPtr->numChars; if ((numChars >= 0) && (appendObjPtr->typePtr == &tclStringType)) { String *appendStringPtr = GET_STRING(appendObjPtr); + appendNumChars = appendStringPtr->numChars; } @@ -1988,7 +1997,8 @@ Tcl_AppendFormatToObj( format += step; step = TclUtfToUniChar(format, &ch); } - } else if ((ch == 't') || (ch == 'z') || (ch == 'q') || (ch == 'j') || (ch == 'L')) { + } else if ((ch == 't') || (ch == 'z') || (ch == 'q') || (ch == 'j') + || (ch == 'L')) { format += step; step = TclUtfToUniChar(format, &ch); useBig = 1; @@ -2513,7 +2523,7 @@ Tcl_AppendFormatToObj( /* *--------------------------------------------------------------------------- * - * Tcl_Format-- + * Tcl_Format -- * * Results: * A refcount zero Tcl_Obj. @@ -2904,7 +2914,10 @@ TclStringRepeat( Tcl_GetByteArrayFromObj(objResultPtr, NULL), (count - done) * length); } else if (unichar) { - /* Efficiently produce a pure Tcl_UniChar array result */ + /* + * Efficiently produce a pure Tcl_UniChar array result. + */ + if (!inPlace || Tcl_IsShared(objPtr)) { objResultPtr = Tcl_NewUnicodeObj(Tcl_GetUnicode(objPtr), length); } else { @@ -2930,7 +2943,10 @@ TclStringRepeat( Tcl_AppendUnicodeToObj(objResultPtr, Tcl_GetUnicode(objResultPtr), (count - done) * length); } else { - /* Efficiently concatenate string reps */ + /* + * Efficiently concatenate string reps. + */ + if (!inPlace || Tcl_IsShared(objPtr)) { objResultPtr = Tcl_NewStringObj(Tcl_GetString(objPtr), length); } else { @@ -3014,9 +3030,10 @@ TclStringCat( /* Value has a string rep. */ if (objPtr->length) { /* - * Non-empty string rep. Not a pure bytearray, so we - * won't create a pure bytearray + * Non-empty string rep. Not a pure bytearray, so we won't + * create a pure bytearray. */ + binary = 0; if ((objPtr->typePtr) && (objPtr->typePtr != &tclStringType)) { /* Prevent shimmer of non-string types. */ @@ -3037,8 +3054,12 @@ TclStringCat( } while (--oc && (binary || allowUniChar)); if (binary) { - /* Result will be pure byte array. Pre-size it */ - ov = objv; oc = objc; + /* + * Result will be pure byte array. Pre-size it + */ + + ov = objv; + oc = objc; do { Tcl_Obj *objPtr = *ov++; @@ -3058,8 +3079,12 @@ TclStringCat( } } while (--oc); } else if (allowUniChar && requestUniChar) { - /* Result will be pure Tcl_UniChar array. Pre-size it. */ - ov = objv; oc = objc; + /* + * Result will be pure Tcl_UniChar array. Pre-size it. + */ + + ov = objv; + oc = objc; do { Tcl_Obj *objPtr = *ov++; @@ -3104,9 +3129,9 @@ TclStringCat( } while (--oc && (length == 0) && (pendingPtr == NULL)); /* - * Either we found a possibly non-empty value, and we - * remember this index as the first and last such value so - * far seen, or (oc == 0) and all values are known empty, + * Either we found a possibly non-empty value, and we remember + * this index as the first and last such value so far seen, + * or (oc == 0) and all values are known empty, * so first = last = objc - 1 signals the right quick return. */ @@ -3118,10 +3143,9 @@ TclStringCat( /* assert ( pendingPtr != NULL ) */ /* - * There's a pending value followed by more values. - * Loop over remaining values generating strings until - * a non-empty value is found, or the pending value gets - * its string generated. + * There's a pending value followed by more values. Loop over + * remaining values generating strings until a non-empty value + * is found, or the pending value gets its string generated. */ do { @@ -3177,10 +3201,10 @@ TclStringCat( unsigned char *dst; /* - * Broken interface! Byte array value routines offer no way - * to handle failure to allocate enough space. Following - * stanza may panic. + * Broken interface! Byte array value routines offer no way to handle + * failure to allocate enough space. Following stanza may panic. */ + if (inPlace && !Tcl_IsShared(*objv)) { int start; @@ -3295,6 +3319,7 @@ TclStringCat( if ((objPtr->bytes == NULL) || (objPtr->length)) { int more; char *src = Tcl_GetStringFromObj(objPtr, &more); + memcpy(dst, src, (size_t) more); dst += more; } @@ -3320,7 +3345,7 @@ TclStringCat( * Compare two Tcl_Obj values as strings. * * Results: - * Like memcmp, return -1, 0, or 1. + * Like memcmp, return -1, 0, or 1. * * Side effects: * String representations may be generated. Internal representation may @@ -3329,13 +3354,13 @@ TclStringCat( *--------------------------------------------------------------------------- */ -int TclStringCmp ( - Tcl_Obj *value1Ptr, - Tcl_Obj *value2Ptr, - int checkEq, /* comparison is only for equality */ - int nocase, /* comparison is not case sensitive */ - int reqlength /* requested length */ -) { +int TclStringCmp( + Tcl_Obj *value1Ptr, + Tcl_Obj *value2Ptr, + int checkEq, /* comparison is only for equality */ + int nocase, /* comparison is not case sensitive */ + int reqlength) /* requested length */ +{ char *s1, *s2; int empty, length, match, s1len, s2len; memCmpFn_t memCmpFn; @@ -3355,6 +3380,7 @@ int TclStringCmp ( * case-sensitive (which is all that really makes sense with byte * arrays anyway, and we have no memcasecmp() for some reason... :^) */ + s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); memCmpFn = memcmp; @@ -3402,31 +3428,31 @@ int TclStringCmp ( } else { if ((empty = TclCheckEmptyString(value1Ptr)) > 0) { switch (TclCheckEmptyString(value2Ptr)) { - case -1: + case -1: s1 = 0; s1len = 0; s2 = TclGetStringFromObj(value2Ptr, &s2len); break; - case 0: + case 0: match = -1; goto matchdone; - case 1: - default: /* avoid warn: `s2` may be used uninitialized */ + case 1: + default: /* avoid warn: `s2` may be used uninitialized */ match = 0; goto matchdone; } } else if (TclCheckEmptyString(value2Ptr) > 0) { switch (empty) { - case -1: + case -1: s2 = 0; s2len = 0; s1 = TclGetStringFromObj(value1Ptr, &s1len); break; - case 0: + case 0: match = 1; goto matchdone; - case 1: - default: /* avoid warn: `s1` may be used uninitialized */ + case 1: + default: /* avoid warn: `s1` may be used uninitialized */ match = 0; goto matchdone; } @@ -3436,18 +3462,19 @@ int TclStringCmp ( } if (!nocase && checkEq) { /* - * When we have equal-length we can check only for (in)equality. - * We can use memcmp in all (n)eq cases because we - * don't need to worry about lexical LE/BE variance. + * When we have equal-length we can check only for + * (in)equality. We can use memcmp in all (n)eq cases because + * we don't need to worry about lexical LE/BE variance. */ + memCmpFn = memcmp; } else { - /* - * As a catch-all we will work with UTF-8. We cannot use memcmp() as - * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's - * utf rep). We can use the more efficient TclpUtfNcmp2 if we are - * case-sensitive and no specific length was requested. + * As a catch-all we will work with UTF-8. We cannot use + * memcmp() as that is unsafe with any string containing NUL + * (\xC0\x80 in Tcl's utf rep). We can use the more efficient + * TclpUtfNcmp2 if we are case-sensitive and no specific + * length was requested. */ if ((reqlength < 0) && !nocase) { @@ -3455,7 +3482,8 @@ int TclStringCmp ( } else { s1len = Tcl_NumUtfChars(s1, s1len); s2len = Tcl_NumUtfChars(s2, s2len); - memCmpFn = (memCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); + memCmpFn = (memCmpFn_t) + (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); } } } @@ -3465,8 +3493,8 @@ int TclStringCmp ( length = reqlength; } else if (reqlength < 0) { /* - * The requested length is negative, so we ignore it by setting it to - * length + 1 so we correct the match var. + * The requested length is negative, so we ignore it by setting it + * to length + 1 so we correct the match var. */ reqlength = length + 1; @@ -3474,11 +3502,12 @@ int TclStringCmp ( if (checkEq && (s1len != s2len)) { match = 1; /* This will be reversed below. */ - } else { + } else { /* - * The comparison function should compare up to the minimum - * byte length only. + * The comparison function should compare up to the minimum byte + * length only. */ + match = memCmpFn(s1, s2, (size_t) length); } if ((match == 0) && (reqlength > length)) { @@ -3486,7 +3515,7 @@ int TclStringCmp ( } match = (match > 0) ? 1 : (match < 0) ? -1 : 0; } - matchdone: + matchdone: return match; } @@ -3684,18 +3713,20 @@ static void ReverseBytes( unsigned char *to, /* Copy bytes into here... */ unsigned char *from, /* ...from here... */ - int count) /* Until this many are copied, */ + int count) /* Until this many are copied, */ /* reversing as you go. */ { unsigned char *src = from + count; + if (to == from) { /* Reversing in place */ while (--src > to) { unsigned char c = *src; + *src = *to; *to++ = c; } - } else { + } else { while (--src >= from) { *to++ = *src; } @@ -3744,7 +3775,10 @@ TclStringReverse( *to++ = *src; } } else { - /* Reversing in place */ + /* + * Reversing in place. + */ + while (--src > from) { ch = *src; *src = *from; @@ -3768,20 +3802,22 @@ TclStringReverse( /* * Either numChars == -1 and we don't know how many chars are * represented by objPtr->bytes and we need Pass 1 just in case, - * or numChars >= 0 and we know we have fewer chars than bytes, - * so we know there's a multibyte character needing Pass 1. + * or numChars >= 0 and we know we have fewer chars than bytes, so + * we know there's a multibyte character needing Pass 1. * * Pass 1. Reverse the bytes of each multi-byte character. */ + int charCount = 0; int bytesLeft = numBytes; while (bytesLeft) { /* - * NOTE: We know that the from buffer is NUL-terminated. - * It's part of the contract for objPtr->bytes values. - * Thus, we can skip calling Tcl_UtfCharComplete() here. + * NOTE: We know that the from buffer is NUL-terminated. It's + * part of the contract for objPtr->bytes values. Thus, we can + * skip calling Tcl_UtfCharComplete() here. */ + int bytesInChar = TclUtfToUniChar(from, &ch); ReverseBytes((unsigned char *)to, (unsigned char *)from, @@ -3811,19 +3847,18 @@ TclStringReverse( * * The result is a concatenation of a prefix from objPtr, characters * 0 through first-1, the insertPtr string value, and a suffix from - * objPtr, characters from first + count to the end. The effect is - * as if the inner substring of characters first through first+count-1 - * are removed and replaced with insertPtr. - * If insertPtr is NULL, it is treated as an empty string. - * When passed the flag TCL_STRING_IN_PLACE, this routine will try - * to do the work within objPtr, so long as no sharing forbids it. - * Without that request, or as needed, a new Tcl value will be allocated - * to be the result. + * objPtr, characters from first + count to the end. The effect is as if + * the inner substring of characters first through first+count-1 are + * removed and replaced with insertPtr. If insertPtr is NULL, it is + * treated as an empty string. When passed the flag TCL_STRING_IN_PLACE, + * this routine will try to do the work within objPtr, so long as no + * sharing forbids it. Without that request, or as needed, a new Tcl + * value will be allocated to be the result. * * Results: - * A Tcl value that is the result of the substring replacement. - * May return NULL in case of an error. When NULL is returned and - * interp is non-NULL, error information is left in interp + * A Tcl value that is the result of the substring replacement. May + * return NULL in case of an error. When NULL is returned and interp is + * non-NULL, error information is left in interp * *--------------------------------------------------------------------------- */ |