summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2009-02-12 03:46:32 (GMT)
committerdgp <dgp@users.sourceforge.net>2009-02-12 03:46:32 (GMT)
commit17a69ae4cf88c0a60211daa415a3d7cd1d77238d (patch)
tree0508003028aa6113b3fdc4380de4f136a51ab199
parent95504e489088e89fb179000ccf42553b620183d8 (diff)
downloadtcl-17a69ae4cf88c0a60211daa415a3d7cd1d77238d.zip
tcl-17a69ae4cf88c0a60211daa415a3d7cd1d77238d.tar.gz
tcl-17a69ae4cf88c0a60211daa415a3d7cd1d77238d.tar.bz2
* generic/tclStringObj.c: Re-implemented AppendUnicodeToUtfRep
so that we no longer pass through Tcl_DStrings which have their own sets of problems when lengths overflow the int range. Now AUTUR and UpdateStringOfString share a common core routine.
-rw-r--r--ChangeLog5
-rw-r--r--generic/tclStringObj.c133
2 files changed, 84 insertions, 54 deletions
diff --git a/ChangeLog b/ChangeLog
index cf18b17..cb2db93 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
2009-02-11 Don Porter <dgp@users.sourceforge.net>
+ * generic/tclStringObj.c: Re-implemented AppendUnicodeToUtfRep
+ so that we no longer pass through Tcl_DStrings which have their own
+ sets of problems when lengths overflow the int range. Now AUTUR and
+ UpdateStringOfString share a common core routine.
+
* generic/tclStringObj.c: Changed type of the 'allocated' field
* generic/tclTestObj.c: of the String struct (and the
TestString counterpart) from size_t to int since only int values are
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 8e1aacf..5283e6e 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -33,7 +33,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclStringObj.c,v 1.97 2009/02/11 19:33:24 dgp Exp $ */
+ * RCS: @(#) $Id: tclStringObj.c,v 1.98 2009/02/12 03:46:40 dgp Exp $ */
#include "tclInt.h"
#include "tommath.h"
@@ -42,6 +42,8 @@
* Prototypes for functions defined later in this file:
*/
+static void AppendPrintfToObjVA(Tcl_Obj *objPtr,
+ const char *format, va_list argList);
static void AppendUnicodeToUnicodeRep(Tcl_Obj *objPtr,
const Tcl_UniChar *unicode, int appendNumChars);
static void AppendUnicodeToUtfRep(Tcl_Obj *objPtr,
@@ -50,12 +52,12 @@ static void AppendUtfToUnicodeRep(Tcl_Obj *objPtr,
const char *bytes, int numBytes);
static void AppendUtfToUtfRep(Tcl_Obj *objPtr,
const char *bytes, int numBytes);
-static void FillUnicodeRep(Tcl_Obj *objPtr);
-static void AppendPrintfToObjVA(Tcl_Obj *objPtr,
- const char *format, va_list argList);
-static void FreeStringInternalRep(Tcl_Obj *objPtr);
static void DupStringInternalRep(Tcl_Obj *objPtr,
Tcl_Obj *copyPtr);
+static void ExtendStringRepWithUnicode(Tcl_Obj *objPtr,
+ const Tcl_UniChar *unicode, int numChars);
+static void FillUnicodeRep(Tcl_Obj *objPtr);
+static void FreeStringInternalRep(Tcl_Obj *objPtr);
static int SetStringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr);
static void SetUnicodeObj(Tcl_Obj *objPtr,
const Tcl_UniChar *unicode, int numChars);
@@ -1407,6 +1409,7 @@ AppendUnicodeToUnicodeRep(
appendNumChars * sizeof(Tcl_UniChar));
stringPtr->unicode[numChars] = 0;
stringPtr->numChars = numChars;
+ stringPtr->allocated = 0;
TclInvalidateStringRep(objPtr);
}
@@ -1434,25 +1437,13 @@ AppendUnicodeToUtfRep(
const Tcl_UniChar *unicode, /* String to convert to UTF. */
int numChars) /* Number of chars of "unicode" to convert. */
{
- Tcl_DString dsPtr;
- const char *bytes;
+ String *stringPtr = GET_STRING(objPtr);
- if (numChars < 0) {
- numChars = 0;
- if (unicode) {
- while (unicode[numChars] != 0) {
- numChars++;
- }
- }
- }
- if (numChars == 0) {
- return;
- }
+ ExtendStringRepWithUnicode(objPtr, unicode, numChars);
- Tcl_DStringInit(&dsPtr);
- bytes = Tcl_UniCharToUtfDString(unicode, numChars, &dsPtr);
- AppendUtfToUtfRep(objPtr, bytes, Tcl_DStringLength(&dsPtr));
- Tcl_DStringFree(&dsPtr);
+ /* Invalidate the unicode rep */
+ stringPtr->numChars = -1;
+ stringPtr->hasUnicode = 0;
}
/*
@@ -2661,9 +2652,12 @@ TclStringObjReverse(
source[i++] = tmp;
}
TclInvalidateStringRep(objPtr);
+ stringPtr->allocated = 0;
return objPtr;
}
+ /* TODO: Document the dangers here! */
+
bytes = TclGetString(objPtr);
if (Tcl_IsShared(objPtr)) {
char *dest;
@@ -2881,46 +2875,77 @@ static void
UpdateStringOfString(
Tcl_Obj *objPtr) /* Object with string rep to update. */
{
- int i, size;
- Tcl_UniChar *unicode;
- char dummy[TCL_UTF_MAX];
- char *dst;
- String *stringPtr;
+ String *stringPtr = GET_STRING(objPtr);
+ ExtendStringRepWithUnicode(objPtr, stringPtr->unicode, stringPtr->numChars);
+ return;
+}
- stringPtr = GET_STRING(objPtr);
- if (stringPtr->numChars <= 0) {
- /*
- * If there is no Unicode rep, or the string has 0 chars, then set
- * the string rep to an empty string.
- */
+static void
+ExtendStringRepWithUnicode(
+ Tcl_Obj *objPtr,
+ const Tcl_UniChar *unicode,
+ int numChars)
+{
+ int i, size = 0;
+ char *dst, buf[TCL_UTF_MAX];
+
+ /* Pre-condition: this is the "string" Tcl_ObjType */
+ String *stringPtr = GET_STRING(objPtr);
- objPtr->bytes = tclEmptyStringRep;
- objPtr->length = 0;
- return;
+ if (numChars < 0) {
+ numChars = 0;
+ if (unicode) {
+ while (numChars >= 0 && unicode[numChars] != 0) {
+ numChars++;
+ }
+ if (numChars < 0) {
+ Tcl_Panic("max length for a Tcl value (%d chars) exceeded",
+ INT_MAX);
+ }
}
+ }
- unicode = stringPtr->unicode;
+ if (numChars == 0) {
+ if (objPtr->bytes == NULL) {
+ TclInitStringRep(objPtr, buf, 0);
+ }
+ return;
+ }
- /*
- * Translate the Unicode string to UTF. "size" will hold the amount of
- * space the UTF string needs.
- */
+ if (objPtr->bytes == tclEmptyStringRep) {
+ TclInvalidateStringRep(objPtr);
+ /*stringPtr->allocated = 0;*/
+ }
+ if (objPtr->bytes) {
+ size = objPtr->length;
+ } else {
+ objPtr->length = 0;
+ }
+
+ /*
+ * TODO: Consider fast overallocation of numChars*TCL_UTF_MAX bytes.
+ * Then we could make one pass instead of two. Trade away memory
+ * efficiency for speed.
+ */
- size = 0;
- for (i = 0; i < stringPtr->numChars; i++) {
- size += Tcl_UniCharToUtf((int) unicode[i], dummy);
- }
+ for (i = 0; i < numChars && size >= 0; i++) {
+ size += Tcl_UniCharToUtf((int) unicode[i], buf);
+ }
+ if (size < 0) {
+ Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
+ }
- dst = (char *) ckalloc((unsigned) (size + 1));
- objPtr->bytes = dst;
- objPtr->length = size;
+ /* Grow space if needed */
+ if (size > stringPtr->allocated) {
+ objPtr->bytes = ckrealloc(objPtr->bytes, (unsigned) size+1);
stringPtr->allocated = size;
-
- for (i = 0; i < stringPtr->numChars; i++) {
- dst += Tcl_UniCharToUtf(unicode[i], dst);
- }
- *dst = '\0';
- return;
+ }
+ dst = objPtr->bytes + objPtr->length;
+ for (i = 0; i < numChars; i++) {
+ dst += Tcl_UniCharToUtf((int) unicode[i], dst);
+ }
+ objPtr->length = size;
+ objPtr->bytes[size] = '\0';
}
/*