diff options
author | dgp <dgp@users.sourceforge.net> | 2009-02-12 03:46:32 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2009-02-12 03:46:32 (GMT) |
commit | 17a69ae4cf88c0a60211daa415a3d7cd1d77238d (patch) | |
tree | 0508003028aa6113b3fdc4380de4f136a51ab199 | |
parent | 95504e489088e89fb179000ccf42553b620183d8 (diff) | |
download | tcl-17a69ae4cf88c0a60211daa415a3d7cd1d77238d.zip tcl-17a69ae4cf88c0a60211daa415a3d7cd1d77238d.tar.gz tcl-17a69ae4cf88c0a60211daa415a3d7cd1d77238d.tar.bz2 |
* generic/tclStringObj.c: Re-implemented AppendUnicodeToUtfRep
so that we no longer pass through Tcl_DStrings which have their own
sets of problems when lengths overflow the int range. Now AUTUR and
UpdateStringOfString share a common core routine.
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | generic/tclStringObj.c | 133 |
2 files changed, 84 insertions, 54 deletions
@@ -1,5 +1,10 @@ 2009-02-11 Don Porter <dgp@users.sourceforge.net> + * generic/tclStringObj.c: Re-implemented AppendUnicodeToUtfRep + so that we no longer pass through Tcl_DStrings which have their own + sets of problems when lengths overflow the int range. Now AUTUR and + UpdateStringOfString share a common core routine. + * generic/tclStringObj.c: Changed type of the 'allocated' field * generic/tclTestObj.c: of the String struct (and the TestString counterpart) from size_t to int since only int values are diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 8e1aacf..5283e6e 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -33,7 +33,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclStringObj.c,v 1.97 2009/02/11 19:33:24 dgp Exp $ */ + * RCS: @(#) $Id: tclStringObj.c,v 1.98 2009/02/12 03:46:40 dgp Exp $ */ #include "tclInt.h" #include "tommath.h" @@ -42,6 +42,8 @@ * Prototypes for functions defined later in this file: */ +static void AppendPrintfToObjVA(Tcl_Obj *objPtr, + const char *format, va_list argList); static void AppendUnicodeToUnicodeRep(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int appendNumChars); static void AppendUnicodeToUtfRep(Tcl_Obj *objPtr, @@ -50,12 +52,12 @@ static void AppendUtfToUnicodeRep(Tcl_Obj *objPtr, const char *bytes, int numBytes); static void AppendUtfToUtfRep(Tcl_Obj *objPtr, const char *bytes, int numBytes); -static void FillUnicodeRep(Tcl_Obj *objPtr); -static void AppendPrintfToObjVA(Tcl_Obj *objPtr, - const char *format, va_list argList); -static void FreeStringInternalRep(Tcl_Obj *objPtr); static void DupStringInternalRep(Tcl_Obj *objPtr, Tcl_Obj *copyPtr); +static void ExtendStringRepWithUnicode(Tcl_Obj *objPtr, + const Tcl_UniChar *unicode, int numChars); +static void FillUnicodeRep(Tcl_Obj *objPtr); +static void FreeStringInternalRep(Tcl_Obj *objPtr); static int SetStringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); static void SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); @@ -1407,6 +1409,7 @@ AppendUnicodeToUnicodeRep( appendNumChars * sizeof(Tcl_UniChar)); stringPtr->unicode[numChars] = 0; stringPtr->numChars = numChars; + stringPtr->allocated = 0; TclInvalidateStringRep(objPtr); } @@ -1434,25 +1437,13 @@ AppendUnicodeToUtfRep( const Tcl_UniChar *unicode, /* String to convert to UTF. */ int numChars) /* Number of chars of "unicode" to convert. */ { - Tcl_DString dsPtr; - const char *bytes; + String *stringPtr = GET_STRING(objPtr); - if (numChars < 0) { - numChars = 0; - if (unicode) { - while (unicode[numChars] != 0) { - numChars++; - } - } - } - if (numChars == 0) { - return; - } + ExtendStringRepWithUnicode(objPtr, unicode, numChars); - Tcl_DStringInit(&dsPtr); - bytes = Tcl_UniCharToUtfDString(unicode, numChars, &dsPtr); - AppendUtfToUtfRep(objPtr, bytes, Tcl_DStringLength(&dsPtr)); - Tcl_DStringFree(&dsPtr); + /* Invalidate the unicode rep */ + stringPtr->numChars = -1; + stringPtr->hasUnicode = 0; } /* @@ -2661,9 +2652,12 @@ TclStringObjReverse( source[i++] = tmp; } TclInvalidateStringRep(objPtr); + stringPtr->allocated = 0; return objPtr; } + /* TODO: Document the dangers here! */ + bytes = TclGetString(objPtr); if (Tcl_IsShared(objPtr)) { char *dest; @@ -2881,46 +2875,77 @@ static void UpdateStringOfString( Tcl_Obj *objPtr) /* Object with string rep to update. */ { - int i, size; - Tcl_UniChar *unicode; - char dummy[TCL_UTF_MAX]; - char *dst; - String *stringPtr; + String *stringPtr = GET_STRING(objPtr); + ExtendStringRepWithUnicode(objPtr, stringPtr->unicode, stringPtr->numChars); + return; +} - stringPtr = GET_STRING(objPtr); - if (stringPtr->numChars <= 0) { - /* - * If there is no Unicode rep, or the string has 0 chars, then set - * the string rep to an empty string. - */ +static void +ExtendStringRepWithUnicode( + Tcl_Obj *objPtr, + const Tcl_UniChar *unicode, + int numChars) +{ + int i, size = 0; + char *dst, buf[TCL_UTF_MAX]; + + /* Pre-condition: this is the "string" Tcl_ObjType */ + String *stringPtr = GET_STRING(objPtr); - objPtr->bytes = tclEmptyStringRep; - objPtr->length = 0; - return; + if (numChars < 0) { + numChars = 0; + if (unicode) { + while (numChars >= 0 && unicode[numChars] != 0) { + numChars++; + } + if (numChars < 0) { + Tcl_Panic("max length for a Tcl value (%d chars) exceeded", + INT_MAX); + } } + } - unicode = stringPtr->unicode; + if (numChars == 0) { + if (objPtr->bytes == NULL) { + TclInitStringRep(objPtr, buf, 0); + } + return; + } - /* - * Translate the Unicode string to UTF. "size" will hold the amount of - * space the UTF string needs. - */ + if (objPtr->bytes == tclEmptyStringRep) { + TclInvalidateStringRep(objPtr); + /*stringPtr->allocated = 0;*/ + } + if (objPtr->bytes) { + size = objPtr->length; + } else { + objPtr->length = 0; + } + + /* + * TODO: Consider fast overallocation of numChars*TCL_UTF_MAX bytes. + * Then we could make one pass instead of two. Trade away memory + * efficiency for speed. + */ - size = 0; - for (i = 0; i < stringPtr->numChars; i++) { - size += Tcl_UniCharToUtf((int) unicode[i], dummy); - } + for (i = 0; i < numChars && size >= 0; i++) { + size += Tcl_UniCharToUtf((int) unicode[i], buf); + } + if (size < 0) { + Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); + } - dst = (char *) ckalloc((unsigned) (size + 1)); - objPtr->bytes = dst; - objPtr->length = size; + /* Grow space if needed */ + if (size > stringPtr->allocated) { + objPtr->bytes = ckrealloc(objPtr->bytes, (unsigned) size+1); stringPtr->allocated = size; - - for (i = 0; i < stringPtr->numChars; i++) { - dst += Tcl_UniCharToUtf(unicode[i], dst); - } - *dst = '\0'; - return; + } + dst = objPtr->bytes + objPtr->length; + for (i = 0; i < numChars; i++) { + dst += Tcl_UniCharToUtf((int) unicode[i], dst); + } + objPtr->length = size; + objPtr->bytes[size] = '\0'; } /* |