diff options
author | dgp <dgp@users.sourceforge.net> | 2009-02-14 23:07:17 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2009-02-14 23:07:17 (GMT) |
commit | 9325ac89905cf6cadf12491e007de22301362651 (patch) | |
tree | 0444ca159ecb1aa9c156bec35a4b82953ce34b43 | |
parent | 34bab026d12e407d9bf7e5d9eca2012f2e9a1f97 (diff) | |
download | tcl-9325ac89905cf6cadf12491e007de22301362651.zip tcl-9325ac89905cf6cadf12491e007de22301362651.tar.gz tcl-9325ac89905cf6cadf12491e007de22301362651.tar.bz2 |
* generic/tclStringObj.c: Revisions so that we avoid creating
the strange representation of an empty string with
objPtr->bytes == NULL and stringPtr->hasUnicode == 0. Instead in
the situations where that was being created, create a traditional
two-legged stork representation (objPtr->bytes = tclEmptyStringRep
and stringPtr->hasUnicode = 1). In the situations where the strange
rep was treated differently, continue to do so by testing
stringPtr->numChars == 0 to detect it. These changes make the code
more conventional so easier for new maintainers to pick up. Also
sets up further simplifications.
-rw-r--r-- | ChangeLog | 11 | ||||
-rw-r--r-- | generic/tclStringObj.c | 124 |
2 files changed, 84 insertions, 51 deletions
@@ -1,5 +1,16 @@ 2009-02-14 Don Porter <dgp@users.sourceforge.net> + * generic/tclStringObj.c: Revisions so that we avoid creating + the strange representation of an empty string with + objPtr->bytes == NULL and stringPtr->hasUnicode == 0. Instead in + the situations where that was being created, create a traditional + two-legged stork representation (objPtr->bytes = tclEmptyStringRep + and stringPtr->hasUnicode = 1). In the situations where the strange + rep was treated differently, continue to do so by testing + stringPtr->numChars == 0 to detect it. These changes make the code + more conventional so easier for new maintainers to pick up. Also + sets up further simplifications. + * generic/tclTestObj.c: Revise updates to [teststringobj] so we don't get blocked by MODULE_SCOPE limits. diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 8d24001..9112572 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -33,7 +33,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclStringObj.c,v 1.104 2009/02/13 14:45:54 dgp Exp $ */ + * RCS: @(#) $Id: tclStringObj.c,v 1.105 2009/02/14 23:07:17 dgp Exp $ */ #include "tclInt.h" #include "tommath.h" @@ -64,6 +64,7 @@ static void FreeStringInternalRep(Tcl_Obj *objPtr); static int SetStringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); static void SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); +static int UnicodeLength(const Tcl_UniChar *unicode); static void UpdateStringOfString(Tcl_Obj *objPtr); /* @@ -406,7 +407,7 @@ Tcl_GetCharLength( * Disabled the auto-fill of the unicode rep when multi-byte * characters have been detected, on the YAGNI principle. */ -#if 0 +#if 1 if (numChars < objPtr->length) { /* * Since we've just computed the number of chars, and not all @@ -723,7 +724,6 @@ Tcl_SetObjLength( } SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); /* @@ -761,6 +761,8 @@ Tcl_SetObjLength( objPtr->bytes[length] = 0; } + /* Note: here we can get an empty string != tclEmptyStringRep */ + /* * Invalidate the unicode data. */ @@ -779,16 +781,21 @@ Tcl_SetObjLength( SET_STRING(objPtr, stringPtr); stringPtr->uallocated = uallocated; } + + /* Mark the new end of the unicode string */ stringPtr->numChars = length; - stringPtr->hasUnicode = (length > 0); + stringPtr->unicode[length] = 0; + stringPtr->hasUnicode = 1; /* - * Ensure the string is NUL-terminated. + * Can only get here when objPtr->bytes == NULL. + * No need to invalidate the string rep. */ - stringPtr->unicode[length] = 0; - stringPtr->allocated = 0; - objPtr->length = 0; + if (length == 0) { + /* For the empty string case, set the string rep. */ + TclInitStringRep(objPtr, tclEmptyStringRep, 0); + } } } @@ -840,7 +847,6 @@ Tcl_AttemptSetObjLength( } SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); /* @@ -905,16 +911,21 @@ Tcl_AttemptSetObjLength( SET_STRING(objPtr, stringPtr); stringPtr->uallocated = uallocated; } + + /* Mark the new end of the unicode string */ + stringPtr->unicode[length] = 0; stringPtr->numChars = length; - stringPtr->hasUnicode = (length > 0); + stringPtr->hasUnicode = 1; /* - * Ensure the string is NUL-terminated. + * Can only get here when objPtr->bytes == NULL. + * No need to invalidate the string rep. */ - stringPtr->unicode[length] = 0; - stringPtr->allocated = 0; - objPtr->length = 0; + if (length == 0) { + /* For the empty string case, set the string rep. */ + TclInitStringRep(objPtr, tclEmptyStringRep, 0); + } } return 1; } @@ -988,17 +999,22 @@ SetUnicodeObj( uallocated = STRING_UALLOC(numChars); stringPtr = stringAlloc(uallocated); + SET_STRING(objPtr, stringPtr); + objPtr->typePtr = &tclStringType; - stringPtr->numChars = numChars; stringPtr->uallocated = uallocated; - stringPtr->hasUnicode = (numChars > 0); - stringPtr->allocated = 0; memcpy(stringPtr->unicode, unicode, uallocated); stringPtr->unicode[numChars] = 0; + stringPtr->numChars = numChars; + stringPtr->hasUnicode = 1; TclInvalidateStringRep(objPtr); - objPtr->typePtr = &tclStringType; - SET_STRING(objPtr, stringPtr); + stringPtr->allocated = 0; + + if (numChars == 0) { + /* For the empty string case, set the string rep. */ + TclInitStringRep(objPtr, tclEmptyStringRep, 0); + } } /* @@ -1040,8 +1056,6 @@ Tcl_AppendLimitedToObj( Tcl_Panic("%s called with shared object", "Tcl_AppendLimitedToObj"); } - SetStringFromAny(NULL, objPtr); - if (length < 0) { length = (bytes ? strlen(bytes) : 0); } @@ -1064,8 +1078,10 @@ Tcl_AppendLimitedToObj( * objPtr's string rep. */ + SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); - if (stringPtr->hasUnicode != 0) { + + if (stringPtr->hasUnicode && stringPtr->numChars > 0) { AppendUtfToUnicodeRep(objPtr, bytes, toCopy); } else { AppendUtfToUtfRep(objPtr, bytes, toCopy); @@ -1076,7 +1092,7 @@ Tcl_AppendLimitedToObj( } stringPtr = GET_STRING(objPtr); - if (stringPtr->hasUnicode != 0) { + if (stringPtr->hasUnicode && stringPtr->numChars > 0) { AppendUtfToUnicodeRep(objPtr, ellipsis, strlen(ellipsis)); } else { AppendUtfToUtfRep(objPtr, ellipsis, strlen(ellipsis)); @@ -1155,7 +1171,8 @@ Tcl_AppendUnicodeToObj( * objPtr's string rep. */ - if (stringPtr->hasUnicode != 0) { + /* TODO: shift appends to empty to work on Unicode? */ + if (stringPtr->hasUnicode && stringPtr->numChars > 0) { AppendUnicodeToUnicodeRep(objPtr, unicode, length); } else { AppendUnicodeToUtfRep(objPtr, unicode, length); @@ -1223,21 +1240,23 @@ Tcl_AppendObjToObj( */ SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); /* * If objPtr has a valid Unicode rep, then get a Unicode string from * appendObjPtr and append it. */ - stringPtr = GET_STRING(objPtr); - if (stringPtr->hasUnicode != 0) { + /* TODO: Check that append to self works */ + + if (stringPtr->hasUnicode && stringPtr->numChars > 0) { /* * If appendObjPtr is not of the "String" type, don't convert it. */ if (appendObjPtr->typePtr == &tclStringType) { stringPtr = GET_STRING(appendObjPtr); - if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) { + if (stringPtr->hasUnicode == 0) { /* * If appendObjPtr is a string obj with no valid Unicode rep, * then fill its unicode rep. @@ -1267,6 +1286,7 @@ Tcl_AppendObjToObj( numChars = stringPtr->numChars; if ((numChars >= 0) && (appendObjPtr->typePtr == &tclStringType)) { stringPtr = GET_STRING(appendObjPtr); + /* TODO why is the == length test needed here? */ if ((stringPtr->numChars >= 0) && (stringPtr->numChars == length)) { numChars += stringPtr->numChars; allOneByteChars = 1; @@ -1386,6 +1406,7 @@ AppendUnicodeToUtfRep( stringPtr->numChars += numChars; } + /* TODO: Condition on (numChars > 0) ? or change caller & eliminate ? */ /* Invalidate the unicode rep */ stringPtr->hasUnicode = 0; } @@ -1580,6 +1601,7 @@ Tcl_AppendStringsToObjVA( } stringPtr = GET_STRING(objPtr); + /* TODO: pure unicode will crash! */ if (oldLength + newLength > stringPtr->allocated) { /* * There isn't currently enough space in the string representation, so @@ -2708,12 +2730,16 @@ ExtendUnicodeRepWithString( SET_STRING(objPtr, stringPtr); } - stringPtr->hasUnicode = (needed > 0); + stringPtr->hasUnicode = 1; stringPtr->numChars = needed; for (dst=stringPtr->unicode + numOrigChars; numAppendChars-- > 0; dst++) { bytes += TclUtfToUniChar(bytes, dst); } *dst = 0; + if (needed == 0) { + /* For the empty string case, set the string rep. */ + TclInitStringRep(objPtr, tclEmptyStringRep, 0); + } } /* @@ -2744,9 +2770,6 @@ DupStringInternalRep( String *srcStringPtr = GET_STRING(srcPtr); String *copyStringPtr = NULL; - /* TODO: Consider not copying String intrep when just a utf string. */ - /* TODO: Consider not copying extra space. */ - /* * If the src obj is a string of 1-byte Utf chars, then copy the string * rep of the source object and create an "empty" Unicode internal rep for @@ -2754,28 +2777,31 @@ DupStringInternalRep( * the string rep of the new object. */ - if (srcStringPtr->hasUnicode == 0) { - copyStringPtr = (String *) ckalloc((unsigned) sizeof(String)); - copyStringPtr->uallocated = 0; - } else { + if (srcStringPtr->hasUnicode && srcStringPtr->numChars > 0) { + /* Copy the full allocation for the Unicode buffer. */ + /* TODO: consider a more limited copy to the min of + * the current uallocated value and twice the current numChars */ copyStringPtr = stringAlloc(srcStringPtr->uallocated); copyStringPtr->uallocated = srcStringPtr->uallocated; - memcpy(copyStringPtr->unicode, srcStringPtr->unicode, (size_t) srcStringPtr->numChars * sizeof(Tcl_UniChar)); copyStringPtr->unicode[srcStringPtr->numChars] = 0; + copyStringPtr->allocated = 0; + } else { + /* TODO: consider not bothering to make a String intrep. */ + copyStringPtr = (String *) ckalloc((unsigned) sizeof(String)); + copyStringPtr->unicode[0] = 0; + copyStringPtr->uallocated = 0; + /* + * Tricky point: the string value was copied by generic object + * management code, so it doesn't contain any extra bytes that + * might exist in the source object. + */ + copyStringPtr->allocated = copyPtr->length; } copyStringPtr->numChars = srcStringPtr->numChars; copyStringPtr->hasUnicode = srcStringPtr->hasUnicode; - /* - * Tricky point: the string value was copied by generic object management - * code, so it doesn't contain any extra bytes that might exist in the - * source object. - */ - - copyStringPtr->allocated = copyPtr->length; - SET_STRING(copyPtr, copyStringPtr); copyPtr->typePtr = &tclStringType; } @@ -2871,17 +2897,12 @@ ExtendStringRepWithUnicode( } if (numChars == 0) { - if (objPtr->bytes == NULL) { - TclInitStringRep(objPtr, buf, 0); - } return 0; } if (objPtr->bytes == tclEmptyStringRep) { - TclInvalidateStringRep(objPtr); - /*stringPtr->allocated = 0;*/ - } - if (objPtr->bytes) { + objPtr->bytes = NULL; + } else if (objPtr->bytes) { size = objPtr->length; } else { objPtr->length = 0; @@ -2902,6 +2923,7 @@ ExtendStringRepWithUnicode( /* Grow space if needed */ if (size > stringPtr->allocated) { + /* TODO: Growth algorithm for appends ? */ objPtr->bytes = ckrealloc(objPtr->bytes, (unsigned) size+1); stringPtr->allocated = size; } |