summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorhershey <hershey>1999-06-15 01:16:21 (GMT)
committerhershey <hershey>1999-06-15 01:16:21 (GMT)
commit73d440a8ed4e3ef4fd1c30ce5708061a261396dc (patch)
treedc505f5e222817db5fe70f5b9eb5d01607656d12 /generic
parent9710a67498395708dbbe9b7e401cec3c6227b394 (diff)
downloadtcl-73d440a8ed4e3ef4fd1c30ce5708061a261396dc.zip
tcl-73d440a8ed4e3ef4fd1c30ce5708061a261396dc.tar.gz
tcl-73d440a8ed4e3ef4fd1c30ce5708061a261396dc.tar.bz2
Merged String and Unicode object types. Added new functions to
the puplic API: Tcl_NewUnicodeObj, Tcl_SetUnicodeObj, Tcl_GetUnicode, Tcl_GetUniChar, Tcl_GetCharLength, Tcl_GetRange, Tcl_AppendUnicodeToObj. Note: some stringObj tests are still failing--the teststringobj command still needs to be updated.
Diffstat (limited to 'generic')
-rw-r--r--generic/tcl.decls26
-rw-r--r--generic/tclCmdMZ.c32
-rw-r--r--generic/tclDecls.h57
-rw-r--r--generic/tclInt.h18
-rw-r--r--generic/tclObj.c3
-rw-r--r--generic/tclRegexp.c6
-rw-r--r--generic/tclStringObj.c1077
-rw-r--r--generic/tclStubInit.c9
-rw-r--r--generic/tclUnicodeObj.c882
9 files changed, 1113 insertions, 997 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 8447520..8b9d46d 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -10,7 +10,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: tcl.decls,v 1.15 1999/06/10 04:28:49 stanton Exp $
+# RCS: @(#) $Id: tcl.decls,v 1.16 1999/06/15 01:16:21 hershey Exp $
library tcl
@@ -1284,7 +1284,29 @@ declare 376 generic {
declare 377 generic {
void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr)
}
-
+declare 378 generic {
+ Tcl_Obj * Tcl_NewUnicodeObj(Tcl_UniChar *unicode, int numChars)
+}
+declare 379 generic {
+ void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, Tcl_UniChar *unicode, \
+ int numChars)
+}
+declare 380 generic {
+ int Tcl_GetCharLength (Tcl_Obj *objPtr)
+}
+declare 381 generic {
+ Tcl_UniChar Tcl_GetUniChar (Tcl_Obj *objPtr, int index)
+}
+declare 382 generic {
+ Tcl_UniChar * Tcl_GetUnicode (Tcl_Obj *objPtr)
+}
+declare 383 generic {
+ Tcl_Obj * Tcl_GetRange (Tcl_Obj *objPtr, int first, int last)
+}
+declare 384 generic {
+ void Tcl_AppendUnicodeToObj (register Tcl_Obj *objPtr, \
+ Tcl_UniChar *unicode, int length)
+}
##############################################################################
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 4f20815..3746cfc 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -13,7 +13,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.14 1999/06/10 04:28:50 stanton Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.15 1999/06/15 01:16:22 hershey Exp $
*/
#include "tclInt.h"
@@ -274,7 +274,7 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv)
newPtr = Tcl_NewListObj(2, objs);
} else {
if (i <= info.nsubs) {
- newPtr = TclGetRangeFromObj(objPtr, info.matches[i].start,
+ newPtr = Tcl_GetRange(objPtr, info.matches[i].start,
info.matches[i].end - 1);
} else {
newPtr = Tcl_NewObj();
@@ -385,8 +385,8 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
Tcl_IncrRefCount(resultPtr);
objPtr = objv[1];
- wlen = TclGetUnicodeLengthFromObj(objPtr);
- wstring = TclGetUnicodeFromObj(objPtr);
+ wlen = Tcl_GetCharLength(objPtr);
+ wstring = Tcl_GetUnicode(objPtr);
subspec = Tcl_GetString(objv[2]);
varPtr = objv[3];
@@ -430,7 +430,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
Tcl_RegExpGetInfo(regExpr, &info);
start = info.matches[0].start;
end = info.matches[0].end;
- TclAppendUnicodeToObj(resultPtr, wstring + offset, start);
+ Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start);
/*
* Append the subSpec argument to the variable, making appropriate
@@ -468,7 +468,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
subStart = info.matches[index].start;
subEnd = info.matches[index].end;
if ((subStart >= 0) && (subEnd >= 0)) {
- TclAppendUnicodeToObj(resultPtr, wstring + offset + subStart,
+ Tcl_AppendUnicodeToObj(resultPtr, wstring + offset + subStart,
subEnd - subStart);
}
if (*src == '\\') {
@@ -485,7 +485,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
* in order to prevent infinite loops.
*/
- TclAppendUnicodeToObj(resultPtr, wstring + offset, 1);
+ Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1);
offset++;
}
offset += end;
@@ -500,7 +500,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv)
*/
if ((offset < wlen) || (numMatches == 0)) {
- TclAppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset);
+ Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset);
}
if (Tcl_ObjSetVar2(interp, varPtr, NULL, resultPtr, 0) == NULL) {
Tcl_AppendResult(interp, "couldn't set variable \"",
@@ -973,8 +973,11 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
if (length2 == utflen) {
/* no unicode chars */
string2 += start;
+ length2 -= start;
} else {
- string2 = Tcl_UtfAtIndex(string2, start);
+ char *s = Tcl_UtfAtIndex(string2, start);
+ length2 -= s - string2;
+ string2 = s;
}
}
}
@@ -1049,14 +1052,14 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
* 'end' really means.
*/
- length2 = TclGetUnicodeLengthFromObj(objv[2]);
+ length2 = Tcl_GetCharLength(objv[2]);
if (TclGetIntForIndex(interp, objv[3], length2 - 1,
&index) != TCL_OK) {
return TCL_ERROR;
}
if ((index >= 0) && (index < length2)) {
- unichar = TclGetUniCharFromObj(objv[2], index);
+ unichar = Tcl_GetUniChar(objv[2], index);
length2 = Tcl_UniCharToUtf((int)unichar, buf);
Tcl_SetStringObj(resultPtr, buf, length2);
}
@@ -1432,7 +1435,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
Tcl_SetIntObj(resultPtr, length1);
} else {
Tcl_SetIntObj(resultPtr,
- TclGetUnicodeLengthFromObj(objv[2]));
+ Tcl_GetCharLength(objv[2]));
}
}
break;
@@ -1611,7 +1614,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
* create a result object.
*/
- length2 = TclGetUnicodeLengthFromObj(objv[2]) - 1;
+ length2 = Tcl_GetCharLength(objv[2]) - 1;
if (TclGetIntForIndex(interp, objv[3], length2,
&first) != TCL_OK) {
@@ -1628,7 +1631,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
last = length1 - 1;
}
if (last >= first) {
- resultPtr = TclGetRangeFromObj(objv[2], first, last);
+ resultPtr = Tcl_GetRange(objv[2], first, last);
Tcl_SetObjResult(interp, resultPtr);
}
}
@@ -1761,6 +1764,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
Tcl_SetStringObj(resultPtr, string1, start - string1);
Tcl_AppendToObj(resultPtr, string2, length2);
Tcl_AppendToObj(resultPtr, end, -1);
+ ckfree(string2);
}
break;
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 2a7ac93..8cec11a 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclDecls.h,v 1.15 1999/06/10 04:28:50 stanton Exp $
+ * RCS: @(#) $Id: tclDecls.h,v 1.16 1999/06/15 01:16:22 hershey Exp $
*/
#ifndef _TCLDECLS
@@ -1175,6 +1175,26 @@ EXTERN int Tcl_RegExpMatchObj _ANSI_ARGS_((Tcl_Interp * interp,
/* 377 */
EXTERN void Tcl_RegExpGetInfo _ANSI_ARGS_((Tcl_RegExp regexp,
Tcl_RegExpInfo * infoPtr));
+/* 378 */
+EXTERN Tcl_Obj * Tcl_NewUnicodeObj _ANSI_ARGS_((Tcl_UniChar * unicode,
+ int numChars));
+/* 379 */
+EXTERN void Tcl_SetUnicodeObj _ANSI_ARGS_((Tcl_Obj * objPtr,
+ Tcl_UniChar * unicode, int numChars));
+/* 380 */
+EXTERN int Tcl_GetCharLength _ANSI_ARGS_((Tcl_Obj * objPtr));
+/* 381 */
+EXTERN Tcl_UniChar Tcl_GetUniChar _ANSI_ARGS_((Tcl_Obj * objPtr,
+ int index));
+/* 382 */
+EXTERN Tcl_UniChar * Tcl_GetUnicode _ANSI_ARGS_((Tcl_Obj * objPtr));
+/* 383 */
+EXTERN Tcl_Obj * Tcl_GetRange _ANSI_ARGS_((Tcl_Obj * objPtr,
+ int first, int last));
+/* 384 */
+EXTERN void Tcl_AppendUnicodeToObj _ANSI_ARGS_((
+ register Tcl_Obj * objPtr,
+ Tcl_UniChar * unicode, int length));
typedef struct TclStubHooks {
struct TclPlatStubs *tclPlatStubs;
@@ -1620,6 +1640,13 @@ typedef struct TclStubs {
int (*tcl_UniCharIsPunct) _ANSI_ARGS_((int ch)); /* 375 */
int (*tcl_RegExpMatchObj) _ANSI_ARGS_((Tcl_Interp * interp, Tcl_RegExp regexp, Tcl_Obj * objPtr, int offset, int nmatches, int flags)); /* 376 */
void (*tcl_RegExpGetInfo) _ANSI_ARGS_((Tcl_RegExp regexp, Tcl_RegExpInfo * infoPtr)); /* 377 */
+ Tcl_Obj * (*tcl_NewUnicodeObj) _ANSI_ARGS_((Tcl_UniChar * unicode, int numChars)); /* 378 */
+ void (*tcl_SetUnicodeObj) _ANSI_ARGS_((Tcl_Obj * objPtr, Tcl_UniChar * unicode, int numChars)); /* 379 */
+ int (*tcl_GetCharLength) _ANSI_ARGS_((Tcl_Obj * objPtr)); /* 380 */
+ Tcl_UniChar (*tcl_GetUniChar) _ANSI_ARGS_((Tcl_Obj * objPtr, int index)); /* 381 */
+ Tcl_UniChar * (*tcl_GetUnicode) _ANSI_ARGS_((Tcl_Obj * objPtr)); /* 382 */
+ Tcl_Obj * (*tcl_GetRange) _ANSI_ARGS_((Tcl_Obj * objPtr, int first, int last)); /* 383 */
+ void (*tcl_AppendUnicodeToObj) _ANSI_ARGS_((register Tcl_Obj * objPtr, Tcl_UniChar * unicode, int length)); /* 384 */
} TclStubs;
#ifdef __cplusplus
@@ -3168,6 +3195,34 @@ extern TclStubs *tclStubsPtr;
#define Tcl_RegExpGetInfo \
(tclStubsPtr->tcl_RegExpGetInfo) /* 377 */
#endif
+#ifndef Tcl_NewUnicodeObj
+#define Tcl_NewUnicodeObj \
+ (tclStubsPtr->tcl_NewUnicodeObj) /* 378 */
+#endif
+#ifndef Tcl_SetUnicodeObj
+#define Tcl_SetUnicodeObj \
+ (tclStubsPtr->tcl_SetUnicodeObj) /* 379 */
+#endif
+#ifndef Tcl_GetCharLength
+#define Tcl_GetCharLength \
+ (tclStubsPtr->tcl_GetCharLength) /* 380 */
+#endif
+#ifndef Tcl_GetUniChar
+#define Tcl_GetUniChar \
+ (tclStubsPtr->tcl_GetUniChar) /* 381 */
+#endif
+#ifndef Tcl_GetUnicode
+#define Tcl_GetUnicode \
+ (tclStubsPtr->tcl_GetUnicode) /* 382 */
+#endif
+#ifndef Tcl_GetRange
+#define Tcl_GetRange \
+ (tclStubsPtr->tcl_GetRange) /* 383 */
+#endif
+#ifndef Tcl_AppendUnicodeToObj
+#define Tcl_AppendUnicodeToObj \
+ (tclStubsPtr->tcl_AppendUnicodeToObj) /* 384 */
+#endif
#endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */
diff --git a/generic/tclInt.h b/generic/tclInt.h
index d30d439..cfa587b 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -11,7 +11,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclInt.h,v 1.32 1999/06/10 04:28:51 stanton Exp $
+ * RCS: @(#) $Id: tclInt.h,v 1.33 1999/06/15 01:16:23 hershey Exp $
*/
#ifndef _TCLINT
@@ -1509,7 +1509,6 @@ extern Tcl_ObjType tclIntType;
extern Tcl_ObjType tclListType;
extern Tcl_ObjType tclProcBodyType;
extern Tcl_ObjType tclStringType;
-extern Tcl_ObjType tclUnicodeType;
/*
* The head of the list of free Tcl objects, and the total number of Tcl
@@ -1543,12 +1542,6 @@ EXTERN int TclAccess _ANSI_ARGS_((CONST char *path,
EXTERN int TclAccessDeleteProc _ANSI_ARGS_((TclAccessProc_ *proc));
EXTERN int TclAccessInsertProc _ANSI_ARGS_((TclAccessProc_ *proc));
EXTERN void TclAllocateFreeObjects _ANSI_ARGS_((void));
-EXTERN Tcl_Obj * TclAppendObjToUnicodeObj _ANSI_ARGS_((
- register Tcl_Obj *targetObjPtr,
- register Tcl_Obj *srcObjPtr));
-EXTERN void TclAppendUnicodeToObj _ANSI_ARGS_((
- register Tcl_Obj *objPtr, Tcl_UniChar *unichars,
- int length));
EXTERN int TclArraySet _ANSI_ARGS_((Tcl_Interp *interp,
Tcl_Obj *arrayNameObj, Tcl_Obj *arrayElemObj));
EXTERN int TclCleanupChildren _ANSI_ARGS_((Tcl_Interp *interp,
@@ -1641,13 +1634,6 @@ EXTERN int TclGetOpenMode _ANSI_ARGS_((Tcl_Interp *interp,
char *string, int *seekFlagPtr));
EXTERN Tcl_Command TclGetOriginalCommand _ANSI_ARGS_((
Tcl_Command command));
-EXTERN Tcl_Obj* TclGetRangeFromObj _ANSI_ARGS_((Tcl_Obj *objPtr,
- int first, int last));
-EXTERN Tcl_UniChar TclGetUniCharFromObj _ANSI_ARGS_((Tcl_Obj *objPtr,
- int index));
-EXTERN Tcl_UniChar * TclGetUnicodeFromObj _ANSI_ARGS_((Tcl_Obj *objPtr));
-EXTERN int TclGetUnicodeLengthFromObj _ANSI_ARGS_((
- Tcl_Obj *objPtr));
EXTERN int TclGlob _ANSI_ARGS_((Tcl_Interp *interp,
char *pattern, int noComplain));
EXTERN int TclGlobalInvoke _ANSI_ARGS_((Tcl_Interp *interp,
@@ -1695,8 +1681,6 @@ EXTERN Var * TclLookupVar _ANSI_ARGS_((Tcl_Interp *interp,
EXTERN int TclMathInProgress _ANSI_ARGS_((void));
EXTERN int TclNeedSpace _ANSI_ARGS_((char *start, char *end));
EXTERN Tcl_Obj * TclNewProcBodyObj _ANSI_ARGS_((Proc *procPtr));
-EXTERN Tcl_Obj * TclNewUnicodeObj _ANSI_ARGS_((Tcl_UniChar *unichars,
- int numChars));
EXTERN int TclObjCommandComplete _ANSI_ARGS_((Tcl_Obj *cmdPtr));
EXTERN int TclObjInterpProc _ANSI_ARGS_((ClientData clientData,
Tcl_Interp *interp, int objc,
diff --git a/generic/tclObj.c b/generic/tclObj.c
index 423df28..67be178 100644
--- a/generic/tclObj.c
+++ b/generic/tclObj.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclObj.c,v 1.8 1999/06/08 02:59:25 hershey Exp $
+ * RCS: @(#) $Id: tclObj.c,v 1.9 1999/06/15 01:16:23 hershey Exp $
*/
#include "tclInt.h"
@@ -138,7 +138,6 @@ TclInitObjSubsystem()
Tcl_RegisterObjType(&tclListType);
Tcl_RegisterObjType(&tclByteCodeType);
Tcl_RegisterObjType(&tclProcBodyType);
- Tcl_RegisterObjType(&tclUnicodeType);
#ifdef TCL_COMPILE_STATS
Tcl_MutexLock(&tclObjMutex);
diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c
index 7590c8d..3e28224 100644
--- a/generic/tclRegexp.c
+++ b/generic/tclRegexp.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclRegexp.c,v 1.7 1999/06/10 04:28:51 stanton Exp $
+ * RCS: @(#) $Id: tclRegexp.c,v 1.8 1999/06/15 01:16:24 hershey Exp $
*/
#include "tclInt.h"
@@ -429,8 +429,8 @@ Tcl_RegExpMatchObj(interp, re, objPtr, offset, nmatches, flags)
Tcl_IncrRefCount(objPtr);
- udata = TclGetUnicodeFromObj(objPtr);
- length = TclGetUnicodeLengthFromObj(objPtr);
+ udata = Tcl_GetUnicode(objPtr);
+ length = Tcl_GetCharLength(objPtr);
/*
* Save the target object so we can extract strings from it later.
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index c70bcb9..8dc6e90 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -9,13 +9,29 @@
* track of the extra space. Objects with this internal
* representation are called "expandable string objects".
*
+ * Since some string operations work with UTF strings and others require Unicode
+ format, the string obeject type stores one or both formats. If the object is
+ created with a Unicode string, then UTF form is not stored until it is
+ required by a string operation. The string object always stores the number of
+ characters, so if the object is created with a UTF string, we automatically
+ convert it to unicode (as this costs little more than
+
+A Unicode string
+ * is an internationalized string. Conceptually, a Unicode string is an
+ * array of 16-bit quantities organized as a sequence of properly formed
+ * UTF-8 characters. There is a one-to-one map between Unicode and UTF
+ * characters. The Unicode ojbect is opitmized for the case where each UTF
+ * char in a string is only one byte. In this case, we store the value of
+ * numChars, but we don't copy the bytes to the unicodeObj->chars. Before
+ * accessing obj->chars, check if unicodeObj->numChars == obj->length.
+ *
* Copyright (c) 1995-1997 Sun Microsystems, Inc.
* Copyright (c) 1999 by Scriptics Corporation.
*
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclStringObj.c,v 1.7 1999/06/08 02:59:25 hershey Exp $
+ * RCS: @(#) $Id: tclStringObj.c,v 1.8 1999/06/15 01:16:25 hershey Exp $
*/
#include "tclInt.h"
@@ -24,7 +40,20 @@
* Prototypes for procedures defined later in this file:
*/
-static void ConvertToStringType _ANSI_ARGS_((Tcl_Obj *objPtr));
+static void AppendUnicodeToUnicodeRep _ANSI_ARGS_((
+ Tcl_Obj *objPtr, Tcl_UniChar *unicode,
+ int appendNumChars));
+static void AppendUnicodeToUtfRep _ANSI_ARGS_((
+ Tcl_Obj *objPtr, Tcl_UniChar *unicode,
+ int numChars));
+static void AppendUtfToUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr,
+ char *bytes, int numBytes));
+static void AppendUtfToUtfRep _ANSI_ARGS_((Tcl_Obj *objPtr,
+ char *bytes, int numBytes));
+
+static void FillUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr));
+
+static void FreeStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
static void DupStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr,
Tcl_Obj *copyPtr));
static int SetStringFromAny _ANSI_ARGS_((Tcl_Interp *interp,
@@ -38,11 +67,46 @@ static void UpdateStringOfString _ANSI_ARGS_((Tcl_Obj *objPtr));
Tcl_ObjType tclStringType = {
"string", /* name */
- (Tcl_FreeInternalRepProc *) NULL, /* freeIntRepProc */
+ FreeStringInternalRep, /* freeIntRepPro */
DupStringInternalRep, /* dupIntRepProc */
UpdateStringOfString, /* updateStringProc */
SetStringFromAny /* setFromAnyProc */
};
+
+/*
+ * The following structure is the internal rep for a String object.
+ * It keeps track of how much memory has been used and how much has been
+ * allocated for the Unicode and UTF string to enable growing and
+ * shrinking of the UTF and Unicode reps of the String object with fewer
+ * mallocs. To optimize string length and indexing operations, this
+ * structure also stores the number of characters (same of UTF and Unicode!)
+ * once that value has been computede.
+ */
+
+typedef struct String {
+ int numChars; /* The number of chars in the string.
+ * -1 means this value has not been
+ * calculated. >= 0 means that there is a
+ * valid Unicode rep, or that the number
+ * of UTF bytes == the number of chars. */
+ size_t allocated; /* The amount of space actually allocated
+ * for the UTF string (minus 1 byte for
+ * the termination char). */
+ size_t uallocated; /* The amount of space actually allocated
+ * for the Unicode string. 0 means the
+ * Unicode string rep is invalid. */
+ Tcl_UniChar unicode[2]; /* The array of Unicode chars. The actual
+ * size of this field depends on the
+ * 'uallocated' field above. */
+} String;
+
+#define STRING_SIZE(len) \
+ ((unsigned) (sizeof(String) + ((len-1) * sizeof(Tcl_UniChar))))
+#define GET_STRING(objPtr) \
+ ((String *) (objPtr)->internalRep.otherValuePtr)
+#define SET_STRING(objPtr, stringPtr) \
+ (objPtr)->internalRep.otherValuePtr = (VOID *) (stringPtr)
+
/*
*----------------------------------------------------------------------
@@ -182,6 +246,327 @@ Tcl_DbNewStringObj(bytes, length, file, line)
#endif /* TCL_MEM_DEBUG */
/*
+ *---------------------------------------------------------------------------
+ *
+ * TclNewUnicodeObj --
+ *
+ * This procedure is creates a new String object and initializes
+ * it from the given Utf String. If the Utf String is the same size
+ * as the Unicode string, don't duplicate the data.
+ *
+ * Results:
+ * The newly created object is returned. This object will have no
+ * initial string representation. The returned object has a ref count
+ * of 0.
+ *
+ * Side effects:
+ * Memory allocated for new object and copy of Unicode argument.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+Tcl_Obj *
+Tcl_NewUnicodeObj(unicode, numChars)
+ Tcl_UniChar *unicode; /* The unicode string used to initialize
+ * the new object. */
+ int numChars; /* Number of characters in the unicode
+ * string. */
+{
+ Tcl_Obj *objPtr;
+ String *stringPtr;
+ int uallocated = (numChars + 1) * sizeof(Tcl_UniChar);
+
+ /*
+ * Create a new obj with an invalid string rep.
+ */
+
+ TclNewObj(objPtr);
+ Tcl_InvalidateStringRep(objPtr);
+ objPtr->typePtr = &tclStringType;
+
+ stringPtr = (String *) ckalloc(STRING_SIZE(uallocated));
+ stringPtr->numChars = numChars;
+ stringPtr->uallocated = uallocated;
+ stringPtr->allocated = 0;
+ memcpy((VOID *) stringPtr->unicode, (VOID *) unicode,
+ (size_t) (numChars * sizeof(Tcl_UniChar)));
+ stringPtr->unicode[numChars] = 0;
+ SET_STRING(objPtr, stringPtr);
+ return objPtr;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_GetCharLength --
+ *
+ * Get the length of the Unicode string from the Tcl object.
+ *
+ * Results:
+ * Pointer to unicode string representing the unicode object.
+ *
+ * Side effects:
+ * Frees old internal rep. Allocates memory for new "String"
+ * internal rep.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_GetCharLength(objPtr)
+ Tcl_Obj *objPtr; /* The String object to get the num chars of. */
+{
+ String *stringPtr;
+
+ SetStringFromAny(NULL, objPtr);
+ stringPtr = GET_STRING(objPtr);
+
+/* if (objPtr->bytes == NULL) { */
+/* printf("called Tcl_GetCharLength with unicode str.\n"); */
+/* } else { */
+/* printf("called Tcl_GetCharLength with str = %s\n", objPtr->bytes); */
+/* } */
+
+ /*
+ * If numChars is unknown, then calculate the number of characaters
+ * while populating the Unicode string.
+ */
+
+ if (stringPtr->numChars == -1) {
+
+ stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length);
+
+ if (stringPtr->numChars == objPtr->length) {
+
+ /*
+ * Since we've just calucalated the number of chars, and all
+ * UTF chars are 1-byte long, we don't need to store the
+ * unicode string.
+ */
+
+ stringPtr->uallocated = 0;
+
+ } else {
+
+ /*
+ * Since we've just calucalated the number of chars, and not
+ * all UTF chars are 1-byte long, go ahead and populate the
+ * unicode string.
+ */
+
+ FillUnicodeRep(objPtr);
+
+ /*
+ * We need to fetch the pointer again because we have just
+ * reallocated the structure to make room for the Unicode data.
+ */
+
+ stringPtr = GET_STRING(objPtr);
+ }
+ }
+ return stringPtr->numChars;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_GetUniChar --
+ *
+ * Get the index'th Unicode character from the String object. The
+ * index is assumed to be in the appropriate range.
+ *
+ * Results:
+ * Returns the index'th Unicode character in the Object.
+ *
+ * Side effects:
+ * Fills unichar with the index'th Unicode character.
+ *
+ *----------------------------------------------------------------------
+ */
+
+Tcl_UniChar
+Tcl_GetUniChar(objPtr, index)
+ Tcl_Obj *objPtr; /* The object to get the Unicode charater from. */
+ int index; /* Get the index'th Unicode character. */
+{
+ Tcl_UniChar unichar;
+ String *stringPtr;
+
+ SetStringFromAny(NULL, objPtr);
+ stringPtr = GET_STRING(objPtr);
+
+/* if (objPtr->bytes == NULL) { */
+/* printf("called Tcl_GetUniChar with unicode str.\n"); */
+/* } else { */
+/* printf("called Tcl_GetUniChar with str = %s\n", objPtr->bytes); */
+/* } */
+
+ if (stringPtr->numChars == -1) {
+
+ /*
+ * We haven't yet calculated the length, so we don't have the
+ * Unicode str. We need to know the number of chars before we
+ * can do indexing.
+ */
+
+ Tcl_GetCharLength(objPtr);
+
+ /*
+ * We need to fetch the pointer again because we may have just
+ * reallocated the structure.
+ */
+
+ stringPtr = GET_STRING(objPtr);
+ }
+ if (stringPtr->uallocated == 0) {
+ char *bytes;
+
+ /*
+ * All of the characters in the Utf string are 1 byte chars,
+ * so we don't store the unicode char. We get the Utf string
+ * and convert the index'th byte to a Unicode character.
+ */
+
+ bytes = Tcl_GetString(objPtr);
+ Tcl_UtfToUniChar(&bytes[index], &unichar);
+ } else {
+ unichar = stringPtr->unicode[index];
+ }
+ return unichar;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_GetUnicode --
+ *
+ * Get the index'th Unicode character from the String object. If
+ * the object is not already a String object, it will be converted
+ * to one. If the String object does not have a Unicode rep, then
+ * one is create from the UTF string format.
+ *
+ * Results:
+ * Returns a pointer to the object's internal Unicode string.
+ *
+ * Side effects:
+ * Converts the object to have the String internal rep.
+ *
+ *----------------------------------------------------------------------
+ */
+
+Tcl_UniChar *
+Tcl_GetUnicode(objPtr)
+ Tcl_Obj *objPtr; /* The object to find the unicode string for. */
+{
+ String *stringPtr;
+
+ SetStringFromAny(NULL, objPtr);
+ stringPtr = GET_STRING(objPtr);
+
+/* if (objPtr->bytes == NULL) { */
+/* printf("called Tcl_GetUnicode with unicode str.\n"); */
+/* } else { */
+/* printf("called Tcl_GetUnicode with str = %s\n", objPtr->bytes); */
+/* } */
+
+ if ((stringPtr->numChars == -1) || (stringPtr->uallocated == 0)) {
+
+ /*
+ * We haven't yet calculated the length, or all of the characters
+ * in the Utf string are 1 byte chars (so we didn't store the
+ * unicode str). Since this function must return a unicode string,
+ * and one has not yet been stored, force the Unicode to be
+ * calculated and stored now.
+ */
+
+ FillUnicodeRep(objPtr);
+
+ /*
+ * We need to fetch the pointer again because we have just
+ * reallocated the structure to make room for the Unicode data.
+ */
+
+ stringPtr = GET_STRING(objPtr);
+ }
+ return stringPtr->unicode;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_GetRange --
+ *
+ * Create a Tcl Object that contains the chars between first and last
+ * of the object indicated by "objPtr". If the object is not already
+ * a String object, convert it to one. The first and last indices
+ * are assumed to be in the appropriate range.
+ *
+ * Results:
+ * Returns a new Tcl Object of the String type.
+ *
+ * Side effects:
+ * Changes the internal rep of "objPtr" to the String type.
+ *
+ *----------------------------------------------------------------------
+ */
+
+Tcl_Obj*
+Tcl_GetRange(objPtr, first, last)
+
+ Tcl_Obj *objPtr; /* The Tcl object to find the range of. */
+ int first; /* First index of the range. */
+ int last; /* Last index of the range. */
+{
+ Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */
+ String *stringPtr;
+
+ SetStringFromAny(NULL, objPtr);
+ stringPtr = GET_STRING(objPtr);
+
+ if (stringPtr->numChars == -1) {
+
+ /*
+ * We haven't yet calculated the length, so we don't have the
+ * Unicode str. We need to know the number of chars before we
+ * can do indexing.
+ */
+
+ Tcl_GetCharLength(objPtr);
+
+ /*
+ * We need to fetch the pointer again because we may have just
+ * reallocated the structure.
+ */
+
+ stringPtr = GET_STRING(objPtr);
+ }
+
+ if (stringPtr->numChars == objPtr->length) {
+ char *str = Tcl_GetString(objPtr);
+
+ /*
+ * All of the characters in the Utf string are 1 byte chars,
+ * so we don't store the unicode char. Create a new string
+ * object containing the specified range of chars.
+ */
+
+ newObjPtr = Tcl_NewStringObj(&str[first], last-first+1);
+
+ /*
+ * Since we know the new string only has 1-byte chars, we
+ * can set it's numChars field.
+ */
+
+/* stringPtr = GET_STRING(newObjPtr); */
+/* stringPtr->numChars = last-first+1; */
+ } else {
+ newObjPtr = Tcl_NewUnicodeObj(stringPtr->unicode + first,
+ last-first+1);
+ }
+ return newObjPtr;
+}
+
+/*
*----------------------------------------------------------------------
*
* Tcl_SetStringObj --
@@ -237,6 +622,7 @@ Tcl_SetStringObj(objPtr, bytes, length)
length = (bytes? strlen(bytes) : 0);
}
TclInitStringRep(objPtr, bytes, length);
+/* printf("called Tcl_SetStringObj with str = %s\n", objPtr->bytes); */
}
/*
@@ -272,15 +658,23 @@ Tcl_SetObjLength(objPtr, length)
* terminating null byte. */
{
char *new;
+ String *stringPtr;
if (Tcl_IsShared(objPtr)) {
panic("Tcl_SetObjLength called with shared object");
}
- if (objPtr->typePtr != &tclStringType) {
- ConvertToStringType(objPtr);
- }
-
- if ((long)length > objPtr->internalRep.longValue) {
+ SetStringFromAny(NULL, objPtr);
+
+ /*
+ * Invalidate the unicode data.
+ */
+
+ stringPtr = GET_STRING(objPtr);
+ stringPtr->numChars = -1;
+ stringPtr->uallocated = 0;
+
+ if (length > stringPtr->allocated) {
+
/*
* Not enough space in current string. Reallocate the string
* space and free the old string.
@@ -290,11 +684,13 @@ Tcl_SetObjLength(objPtr, length)
if (objPtr->bytes != NULL) {
memcpy((VOID *) new, (VOID *) objPtr->bytes,
(size_t) objPtr->length);
+/* new[objPtr->length] = 0; */
Tcl_InvalidateStringRep(objPtr);
}
objPtr->bytes = new;
- objPtr->internalRep.longValue = (long) length;
+ stringPtr->allocated = length;
}
+
objPtr->length = length;
if ((objPtr->bytes != NULL) && (objPtr->bytes != tclEmptyStringRep)) {
objPtr->bytes[length] = 0;
@@ -302,6 +698,60 @@ Tcl_SetObjLength(objPtr, length)
}
/*
+ *---------------------------------------------------------------------------
+ *
+ * TclSetUnicodeObj --
+ *
+ * Modify an object to hold the Unicode string indicated by "unicode".
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Memory allocated for new "String" internal rep.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+void
+Tcl_SetUnicodeObj(objPtr, unicode, numChars)
+ Tcl_Obj *objPtr; /* The object to set the string of. */
+ Tcl_UniChar *unicode; /* The unicode string used to initialize
+ * the object. */
+ int numChars; /* Number of characters in the unicode
+ * string. */
+{
+ Tcl_ObjType *typePtr;
+ String *stringPtr;
+ size_t uallocated = (numChars + 1) * sizeof(Tcl_UniChar);
+
+ /*
+ * Free the internal rep if one exists, and invalidate the string rep.
+ */
+
+ typePtr = objPtr->typePtr;
+ if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) {
+ (*typePtr->freeIntRepProc)(objPtr);
+ }
+ objPtr->typePtr = &tclStringType;
+
+ /*
+ * Allocate enough space for the String structure + Unicode string.
+ */
+
+ stringPtr = (String *) ckalloc(STRING_SIZE(uallocated));
+ stringPtr->numChars = numChars;
+ stringPtr->uallocated = uallocated;
+ stringPtr->allocated = 0;
+ memcpy((VOID *) stringPtr->unicode, (VOID *) unicode,
+ (size_t) (numChars * sizeof(Tcl_UniChar)));
+ stringPtr->unicode[numChars] = 0;
+ SET_STRING(objPtr, stringPtr);
+ Tcl_InvalidateStringRep(objPtr);
+ return;
+}
+
+/*
*----------------------------------------------------------------------
*
* Tcl_AppendToObj --
@@ -327,37 +777,106 @@ Tcl_AppendToObj(objPtr, bytes, length)
* "bytes". If < 0, then append all bytes
* up to NULL byte. */
{
- int newLength, oldLength;
+ String *stringPtr;
if (Tcl_IsShared(objPtr)) {
panic("Tcl_AppendToObj called with shared object");
}
- if (objPtr->typePtr != &tclStringType) {
- ConvertToStringType(objPtr);
- }
+
+ SetStringFromAny(NULL, objPtr);
+
if (length < 0) {
- length = (bytes? strlen(bytes) : 0);
+ length = (bytes ? strlen(bytes) : 0);
}
if (length == 0) {
return;
}
- oldLength = objPtr->length;
- newLength = length + oldLength;
- if ((long)newLength > objPtr->internalRep.longValue) {
- /*
- * There isn't currently enough space in the string
- * representation so allocate additional space. In fact,
- * overallocate so that there is room for future growth without
- * having to reallocate again.
- */
- Tcl_SetObjLength(objPtr, 2*newLength);
+ /*
+ * TEMPORARY!!! This is terribly inefficient, but it works, and Don
+ * needs for me to check this stuff in ASAP. -Melissa
+ */
+
+/* printf("called Tcl_AppendToObj with str = %s\n", bytes); */
+ UpdateStringOfString(objPtr);
+ AppendUtfToUtfRep(objPtr, bytes, length);
+ return;
+
+ /*
+ * If objPtr has a valid Unicode rep, then append the Unicode
+ * conversion of "bytes" to the objPtr's Unicode rep, otherwise
+ * append "bytes" to objPtr's string rep.
+ */
+
+ stringPtr = GET_STRING(objPtr);
+ if (stringPtr->allocated > 0) {
+ AppendUtfToUnicodeRep(objPtr, bytes, length);
+
+ stringPtr = GET_STRING(objPtr);
+/* printf(" ended Tcl_AppendToObj with %d unicode chars.\n", */
+/* stringPtr->numChars); */
+ } else {
+ AppendUtfToUtfRep(objPtr, bytes, length);
+/* printf(" ended Tcl_AppendToObj with str = %s\n", objPtr->bytes); */
}
- if (length > 0) {
- memcpy((VOID *) (objPtr->bytes + oldLength), (VOID *) bytes,
- (size_t) length);
- objPtr->length = newLength;
- objPtr->bytes[objPtr->length] = 0;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_AppendUnicodeToObj --
+ *
+ * This procedure appends a Unicode string to an object in the
+ * most efficient manner possible. Length must be >= 0.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Invalidates the string rep and creates a new Unicode string.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+Tcl_AppendUnicodeToObj(objPtr, unicode, length)
+ register Tcl_Obj *objPtr; /* Points to the object to append to. */
+ Tcl_UniChar *unicode; /* The unicode string to append to the
+ * object. */
+ int length; /* Number of chars in "unicode". */
+{
+ String *stringPtr;
+
+ if (Tcl_IsShared(objPtr)) {
+ panic("Tcl_AppendUnicodeToObj called with shared object");
+ }
+
+ if (length == 0) {
+ return;
+ }
+
+ SetStringFromAny(NULL, objPtr);
+
+ /*
+ * TEMPORARY!!! This is terribly inefficient, but it works, and Don
+ * needs for me to check this stuff in ASAP. -Melissa
+ */
+
+ UpdateStringOfString(objPtr);
+ AppendUnicodeToUtfRep(objPtr, unicode, length);
+ return;
+
+ /*
+ * If objPtr has a valid Unicode rep, then append the "unicode"
+ * to the objPtr's Unicode rep, otherwise the UTF conversion of
+ * "unicode" to objPtr's string rep.
+ */
+
+ stringPtr = GET_STRING(objPtr);
+ if (stringPtr->allocated > 0) {
+ AppendUnicodeToUnicodeRep(objPtr, unicode, length);
+ } else {
+ AppendUnicodeToUtfRep(objPtr, unicode, length);
}
}
@@ -367,6 +886,7 @@ Tcl_AppendToObj(objPtr, bytes, length)
* Tcl_AppendObjToObj --
*
* This procedure appends the string rep of one object to another.
+ * "objPtr" cannot be a shared object.
*
* Results:
* None.
@@ -383,7 +903,273 @@ Tcl_AppendObjToObj(objPtr, appendObjPtr)
Tcl_Obj *objPtr; /* Points to the object to append to. */
Tcl_Obj *appendObjPtr; /* Object to append. */
{
- TclAppendObjToUnicodeObj(objPtr, appendObjPtr);
+ String *stringPtr;
+ int length;
+ char *bytes;
+
+ SetStringFromAny(NULL, objPtr);
+
+ /*
+ * TEMPORARY!!! This is terribly inefficient, but it works, and Don
+ * needs for me to check this stuff in ASAP. -Melissa
+ */
+
+ UpdateStringOfString(objPtr);
+ bytes = Tcl_GetStringFromObj(appendObjPtr, &length);
+ AppendUtfToUtfRep(objPtr, bytes, length);
+ return;
+
+ /*
+ * If objPtr has a valid Unicode rep, then get a Unicode string
+ * from appendObjPtr and append it.
+ */
+
+ stringPtr = GET_STRING(objPtr);
+ if (stringPtr->allocated > 0) {
+
+ /*
+ * If appendObjPtr is not of the "String" type, don't convert it.
+ */
+
+ if (appendObjPtr->typePtr == &tclStringType) {
+ stringPtr = GET_STRING(appendObjPtr);
+ if ((stringPtr->numChars == -1)
+ || (stringPtr->uallocated == 0)) {
+
+ /*
+ * If appendObjPtr is a string obj with no valide Unicode
+ * rep, then fill its unicode rep.
+ */
+
+ FillUnicodeRep(appendObjPtr);
+ stringPtr = GET_STRING(appendObjPtr);
+ }
+ AppendUnicodeToUnicodeRep(objPtr, stringPtr->unicode,
+ stringPtr->numChars);
+ } else {
+ bytes = Tcl_GetStringFromObj(appendObjPtr, &length);
+ AppendUtfToUnicodeRep(objPtr, bytes, length);
+ }
+ return;
+ }
+
+ /*
+ * Append to objPtr's UTF string rep.
+ */
+
+ bytes = Tcl_GetStringFromObj(appendObjPtr, &length);
+ AppendUtfToUtfRep(objPtr, bytes, length);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * AppendUnicodeToUnicodeRep --
+ *
+ * This procedure appends the contents of "unicode" to the Unicode
+ * rep of "objPtr". objPtr must already have a valid Unicode rep.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * objPtr's internal rep is reallocated.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static void
+AppendUnicodeToUnicodeRep(objPtr, unicode, appendNumChars)
+ Tcl_Obj *objPtr; /* Points to the object to append to. */
+ Tcl_UniChar *unicode; /* String to append. */
+ int appendNumChars; /* Number of chars of "unicode" to append. */
+{
+ String *stringPtr;
+ int numChars;
+ size_t newSize;
+
+ if (appendNumChars == 0) {
+ return;
+ }
+
+ SetStringFromAny(NULL, objPtr);
+ stringPtr = GET_STRING(objPtr);
+
+ /*
+ * Make the buffer big enough for the result.
+ */
+
+ numChars = stringPtr->numChars + appendNumChars;
+ newSize = (numChars + 1) * sizeof(Tcl_UniChar);
+
+ if (newSize > stringPtr->uallocated) {
+ stringPtr->uallocated = newSize * 2;
+ stringPtr = (String *) ckrealloc((char*)stringPtr,
+ STRING_SIZE(stringPtr->uallocated));
+ SET_STRING(objPtr, stringPtr);
+ }
+
+ /*
+ * Copy the new string onto the end of the old string, then add the
+ * trailing null.
+ */
+
+ memcpy((VOID*) (stringPtr->unicode + stringPtr->numChars), unicode,
+ appendNumChars * sizeof(Tcl_UniChar));
+ stringPtr->unicode[numChars] = 0;
+ stringPtr->numChars = numChars;
+
+ SET_STRING(objPtr, stringPtr);
+ Tcl_InvalidateStringRep(objPtr);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * AppendUnicodeToUtfRep --
+ *
+ * This procedure converts the contents of "unicode" to UTF and
+ * appends the UTF to the string rep of "objPtr".
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * objPtr's internal rep is reallocated.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static void
+AppendUnicodeToUtfRep(objPtr, unicode, numChars)
+ Tcl_Obj *objPtr; /* Points to the object to append to. */
+ Tcl_UniChar *unicode; /* String to convert to UTF. */
+ int numChars; /* Number of chars of "unicode" to convert. */
+{
+ Tcl_DString dsPtr;
+ int length = numChars * sizeof(Tcl_UniChar);
+ char *bytes;
+
+ if (numChars == 0) {
+ return;
+ }
+
+ Tcl_DStringInit(&dsPtr);
+ bytes = (char *)Tcl_UniCharToUtfDString(unicode, numChars, &dsPtr);
+ AppendUtfToUtfRep(objPtr, bytes, Tcl_DStringLength(&dsPtr));
+ Tcl_DStringFree(&dsPtr);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * AppendUtfToUnicodeRep --
+ *
+ * This procedure converts the contents of "bytes" to Unicode and
+ * appends the Unicode to the Unicode rep of "objPtr". objPtr must
+ * already have a valid Unicode rep.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * objPtr's internal rep is reallocated.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static void
+AppendUtfToUnicodeRep(objPtr, bytes, numBytes)
+ Tcl_Obj *objPtr; /* Points to the object to append to. */
+ char *bytes; /* String to convert to Unicode. */
+ int numBytes; /* Number of bytes of "bytes" to convert. */
+{
+ Tcl_DString dsPtr;
+ int numChars;
+ Tcl_UniChar *unicode;
+
+ if (numBytes < 0) {
+ numBytes = (bytes ? strlen(bytes) : 0);
+ }
+ if (numBytes == 0) {
+ return;
+ }
+
+ Tcl_DStringInit(&dsPtr);
+ numChars = Tcl_NumUtfChars(bytes, numBytes);
+ unicode = (Tcl_UniChar *)Tcl_UtfToUniCharDString(bytes, numBytes, &dsPtr);
+ AppendUnicodeToUnicodeRep(objPtr, unicode, numChars);
+ Tcl_DStringFree(&dsPtr);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * AppendUtfToUtfRep --
+ *
+ * This procedure appends "numBytes" bytes of "bytes" to the UTF string
+ * rep of "objPtr". objPtr must already have a valid String rep.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * objPtr's internal rep is reallocated.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static void
+AppendUtfToUtfRep(objPtr, bytes, numBytes)
+ Tcl_Obj *objPtr; /* Points to the object to append to. */
+ char *bytes; /* String to append. */
+ int numBytes; /* Number of bytes of "bytes" to append. */
+{
+ String *stringPtr;
+ int newLength, oldLength;
+
+ if (numBytes < 0) {
+ numBytes = (bytes ? strlen(bytes) : 0);
+ }
+ if (numBytes == 0) {
+ return;
+ }
+
+ /*
+ * Copy the new string onto the end of the old string, then add the
+ * trailing null.
+ */
+
+ oldLength = objPtr->length;
+ newLength = numBytes + oldLength;
+
+ stringPtr = GET_STRING(objPtr);
+ if (newLength > stringPtr->allocated) {
+
+ /*
+ * There isn't currently enough space in the string
+ * representation so allocate additional space. If the current
+ * string representation isn't empty (i.e. it looks like we're
+ * doing a series of appends) then overallocate the space so
+ * that we won't have to do as much reallocation in the future.
+ */
+
+ Tcl_SetObjLength(objPtr,
+ (oldLength == 0) ? newLength : 2*newLength);
+ } else {
+
+ /*
+ * Invalidate the unicode data.
+ */
+
+ stringPtr->numChars = -1;
+ stringPtr->uallocated = 0;
+ }
+
+ memcpy((VOID *) (objPtr->bytes + oldLength), (VOID *) bytes,
+ (size_t) numBytes);
+ objPtr->bytes[newLength] = 0;
+ objPtr->length = newLength;
}
/*
@@ -409,6 +1195,7 @@ Tcl_AppendStringsToObjVA (objPtr, argList)
Tcl_Obj *objPtr; /* Points to the object to append to. */
va_list argList; /* Variable argument list. */
{
+ String *stringPtr;
va_list tmpArgList;
int newLength, oldLength;
register char *string, *dst;
@@ -416,9 +1203,8 @@ Tcl_AppendStringsToObjVA (objPtr, argList)
if (Tcl_IsShared(objPtr)) {
panic("Tcl_AppendStringsToObj called with shared object");
}
- if (objPtr->typePtr != &tclStringType) {
- ConvertToStringType(objPtr);
- }
+
+ SetStringFromAny(NULL, objPtr);
/*
* Figure out how much space is needed for all the strings, and
@@ -440,7 +1226,9 @@ Tcl_AppendStringsToObjVA (objPtr, argList)
return;
}
- if ((long)newLength > objPtr->internalRep.longValue) {
+ stringPtr = GET_STRING(objPtr);
+ if (newLength > stringPtr->allocated) {
+
/*
* There isn't currently enough space in the string
* representation so allocate additional space. If the current
@@ -514,45 +1302,64 @@ Tcl_AppendStringsToObj TCL_VARARGS_DEF(Tcl_Obj *,arg1)
}
/*
- *----------------------------------------------------------------------
+ *---------------------------------------------------------------------------
*
- * ConvertToStringType --
+ * FillUnicodeRep --
*
- * This procedure converts the internal representation of an object
- * to "expandable string" type.
+ * Populate the Unicode internal rep with the Unicode form of its string
+ * rep. The object must alread have a "String" internal rep.
*
* Results:
* None.
*
* Side effects:
- * Any old internal reputation for objPtr is freed and the
- * internal representation is set to that for an expandable string
- * (the field internalRep.longValue holds 1 less than the allocated
- * length of objPtr's string representation).
+ * Reallocates the String internal rep.
*
- *----------------------------------------------------------------------
+ *---------------------------------------------------------------------------
*/
static void
-ConvertToStringType(objPtr)
- register Tcl_Obj *objPtr; /* Pointer to object. Must have a
- * typePtr that isn't &tclStringType. */
+FillUnicodeRep(objPtr)
+ Tcl_Obj *objPtr; /* The object in which to fill the unicode rep. */
{
- if (objPtr->typePtr != NULL) {
- if (objPtr->bytes == NULL) {
- objPtr->typePtr->updateStringProc(objPtr);
- }
- if (objPtr->typePtr->freeIntRepProc != NULL) {
- objPtr->typePtr->freeIntRepProc(objPtr);
- }
+ String *stringPtr;
+ size_t uallocated;
+ char *src, *srcEnd;
+ Tcl_UniChar *dst;
+ src = objPtr->bytes;
+
+ stringPtr = GET_STRING(objPtr);
+ if (stringPtr->numChars == -1) {
+ stringPtr->numChars = Tcl_NumUtfChars(src, objPtr->length);
}
- objPtr->typePtr = &tclStringType;
- if (objPtr->bytes != NULL) {
- objPtr->internalRep.longValue = (long)objPtr->length;
- } else {
- objPtr->internalRep.longValue = 0;
- objPtr->length = 0;
+
+ uallocated = stringPtr->numChars * sizeof(Tcl_UniChar);
+ if (uallocated > stringPtr->uallocated) {
+
+ /*
+ * If not enought space has been allocated for the unicode rep,
+ * reallocate the internal rep object with double the amount of
+ * space needed, so the unicode string can grow without being
+ * reallocated.
+ */
+
+ uallocated *= 2;
+ stringPtr = (String *) ckrealloc((char*) stringPtr,
+ STRING_SIZE(uallocated));
+ stringPtr->uallocated = uallocated;
+ }
+
+ /*
+ * Convert src to Unicode and store the coverted data in "unicode".
+ */
+
+ srcEnd = src + objPtr->length;
+ for (dst = stringPtr->unicode; src < srcEnd; dst++) {
+ src += Tcl_UtfToUniChar(src, dst);
}
+ *dst = 0;
+
+ SET_STRING(objPtr, stringPtr);
}
/*
@@ -581,13 +1388,40 @@ DupStringInternalRep(srcPtr, copyPtr)
register Tcl_Obj *copyPtr; /* Object with internal rep to set. Must
* not currently have an internal rep.*/
{
+ String *srcStringPtr = GET_STRING(srcPtr);
+ String *copyStringPtr;
+
+ /*
+ * If the src obj is a string of 1-byte Utf chars, then copy the
+ * string rep of the source object and create an "empty" Unicode
+ * internal rep for the new object. Otherwise, copy Unicode
+ * internal rep, and invalidate the string rep of the new object.
+ */
+
+ if (srcStringPtr->numChars == srcPtr->length) {
+ copyStringPtr = (String *) ckalloc(STRING_SIZE(0));
+ copyStringPtr->uallocated = 0;
+ } else {
+ copyStringPtr = (String *) ckalloc(
+ STRING_SIZE(srcStringPtr->uallocated));
+ copyStringPtr->uallocated = srcStringPtr->uallocated;
+
+ memcpy((VOID *) copyStringPtr->unicode,
+ (VOID *) srcStringPtr->unicode,
+ (size_t) srcStringPtr->numChars * sizeof(Tcl_UniChar));
+ copyStringPtr->unicode[srcStringPtr->numChars] = 0;
+ }
+ copyStringPtr->numChars = srcStringPtr->numChars;
+
/*
* Tricky point: the string value was copied by generic object
* management code, so it doesn't contain any extra bytes that
* might exist in the source object.
*/
- copyPtr->internalRep.longValue = (long)copyPtr->length;
+ copyStringPtr->allocated = copyPtr->length;
+
+ SET_STRING(copyPtr, copyStringPtr);
copyPtr->typePtr = &tclStringType;
}
@@ -596,15 +1430,14 @@ DupStringInternalRep(srcPtr, copyPtr)
*
* SetStringFromAny --
*
- * Create an internal representation of type "expandable string"
- * for an object.
+ * Create an internal representation of type "String" for an object.
*
* Results:
* This operation always succeeds and returns TCL_OK.
*
* Side effects:
- * This procedure does nothing; there is no advantage in converting
- * the internal representation now, so we just defer it.
+ * Any old internal reputation for objPtr is freed and the
+ * internal representation is set to "String".
*
*----------------------------------------------------------------------
*/
@@ -614,6 +1447,42 @@ SetStringFromAny(interp, objPtr)
Tcl_Interp *interp; /* Used for error reporting if not NULL. */
Tcl_Obj *objPtr; /* The object to convert. */
{
+ String *stringPtr;
+
+ /*
+ * The Unicode object is opitmized for the case where each UTF char
+ * in a string is only one byte. In this case, we store the value of
+ * numChars, but we don't copy the bytes to the unicodeObj->unicode.
+ */
+
+ if (objPtr->typePtr != &tclStringType) {
+
+ if (objPtr->typePtr != NULL) {
+ if (objPtr->bytes == NULL) {
+ objPtr->typePtr->updateStringProc(objPtr);
+ }
+ if ((objPtr->typePtr->freeIntRepProc) != NULL) {
+ (*objPtr->typePtr->freeIntRepProc)(objPtr);
+ }
+ }
+ objPtr->typePtr = &tclStringType;
+
+ /*
+ * Allocate enough space for the basic String structure.
+ */
+
+ stringPtr = (String *) ckalloc(STRING_SIZE(0));
+ stringPtr->numChars = -1;
+ stringPtr->uallocated = 0;
+
+ if (objPtr->bytes != NULL) {
+ stringPtr->allocated = objPtr->length;
+ objPtr->bytes[objPtr->length] = 0;
+ } else {
+ objPtr->length = 0;
+ }
+ SET_STRING(objPtr, stringPtr);
+ }
return TCL_OK;
}
@@ -623,13 +1492,14 @@ SetStringFromAny(interp, objPtr)
* UpdateStringOfString --
*
* Update the string representation for an object whose internal
- * representation is "expandable string".
+ * representation is "String".
*
* Results:
* None.
*
* Side effects:
- * None.
+ * The object's string may be set by converting its Unicode
+ * represention to UTF format.
*
*----------------------------------------------------------------------
*/
@@ -638,16 +1508,73 @@ static void
UpdateStringOfString(objPtr)
Tcl_Obj *objPtr; /* Object with string rep to update. */
{
- /*
- * The string is almost always valid already, in which case there's
- * nothing for us to do. The only case we have to worry about is if
- * the object is totally null. In this case, set the string rep to
- * an empty string.
- */
+ int i, length, size;
+ Tcl_UniChar *unicode;
+ char dummy[TCL_UTF_MAX];
+ char *dst;
+ String *stringPtr;
- if (objPtr->bytes == NULL) {
- objPtr->bytes = tclEmptyStringRep;
- objPtr->length = 0;
+ stringPtr = GET_STRING(objPtr);
+ if ((objPtr->bytes == NULL) || (stringPtr->allocated == 0)) {
+
+ if (stringPtr->numChars <= 0) {
+
+ /*
+ * If there is no Unicode rep, or the string has 0 chars,
+ * then set the string rep to an empty string.
+ */
+
+ objPtr->bytes = tclEmptyStringRep;
+ objPtr->length = 0;
+ return;
+ }
+
+ unicode = stringPtr->unicode;
+ length = stringPtr->numChars * sizeof(Tcl_UniChar);
+
+ /*
+ * Translate the Unicode string to UTF. "size" will hold the
+ * amount of space the UTF string needs.
+ */
+
+ size = 0;
+ for (i = 0; i < stringPtr->numChars; i++) {
+ size += Tcl_UniCharToUtf((int) unicode[i], dummy);
+ }
+
+ dst = (char *) ckalloc((unsigned) (size + 1));
+ objPtr->bytes = dst;
+ objPtr->length = size;
+ stringPtr->allocated = size;
+
+ for (i = 0; i < stringPtr->numChars; i++) {
+ dst += Tcl_UniCharToUtf(unicode[i], dst);
+ }
+ *dst = '\0';
}
return;
}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * FreeStringInternalRep --
+ *
+ * Deallocate the storage associated with a String data object's
+ * internal representation.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Frees memory.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static void
+FreeStringInternalRep(objPtr)
+ Tcl_Obj *objPtr; /* Object with internal rep to free. */
+{
+ ckfree((char *) GET_STRING(objPtr));
+}
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 9a3b4f3..a6f7e45 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclStubInit.c,v 1.17 1999/06/10 04:28:51 stanton Exp $
+ * RCS: @(#) $Id: tclStubInit.c,v 1.18 1999/06/15 01:16:25 hershey Exp $
*/
#include "tclInt.h"
@@ -758,6 +758,13 @@ TclStubs tclStubs = {
Tcl_UniCharIsPunct, /* 375 */
Tcl_RegExpMatchObj, /* 376 */
Tcl_RegExpGetInfo, /* 377 */
+ Tcl_NewUnicodeObj, /* 378 */
+ Tcl_SetUnicodeObj, /* 379 */
+ Tcl_GetCharLength, /* 380 */
+ Tcl_GetUniChar, /* 381 */
+ Tcl_GetUnicode, /* 382 */
+ Tcl_GetRange, /* 383 */
+ Tcl_AppendUnicodeToObj, /* 384 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclUnicodeObj.c b/generic/tclUnicodeObj.c
deleted file mode 100644
index 1809b20..0000000
--- a/generic/tclUnicodeObj.c
+++ /dev/null
@@ -1,882 +0,0 @@
-/*
- * tclUnicodeObj.c --
- *
- * This file contains the implementation of the Unicode internal
- * representation of Tcl objects.
- *
- * Copyright (c) 1999 by Scriptics Corporation.
- *
- * See the file "license.terms" for information on usage and redistribution
- * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
- *
- * RCS: @(#) $Id: tclUnicodeObj.c,v 1.6 1999/06/10 19:14:54 hershey Exp $
- */
-
-#include <math.h>
-#include "tclInt.h"
-#include "tclPort.h"
-
-/*
- * Prototypes for local procedures defined in this file:
- */
-
-static int AllSingleByteChars _ANSI_ARGS_((Tcl_Obj *objPtr));
-static void AppendUniCharStrToObj _ANSI_ARGS_((Tcl_Obj *objPtr,
- Tcl_UniChar *unichars, int numNewChars));
-static void DupUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
- Tcl_Obj *copyPtr));
-static void FreeUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
-static void UpdateStringOfUnicode _ANSI_ARGS_((Tcl_Obj *objPtr));
-static void SetOptUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr,
- int numChars));
-static void SetFullUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr,
- char *src, int numBytes, int numChars));
-static int SetUnicodeFromAny _ANSI_ARGS_((Tcl_Interp *interp,
- Tcl_Obj *objPtr));
-
-/*
- * The following object type represents a Unicode string. A Unicode string
- * is an internationalized string. Conceptually, a Unicode string is an
- * array of 16-bit quantities organized as a sequence of properly formed
- * UTF-8 characters. There is a one-to-one map between Unicode and UTF
- * characters. The Unicode ojbect is opitmized for the case where each UTF
- * char in a string is only one byte. In this case, we store the value of
- * numChars, but we don't copy the bytes to the unicodeObj->chars. Before
- * accessing obj->chars, check if unicodeObj->numChars == obj->length.
- */
-
-Tcl_ObjType tclUnicodeType = {
- "unicode",
- FreeUnicodeInternalRep,
- DupUnicodeInternalRep,
- UpdateStringOfUnicode,
- SetUnicodeFromAny
-};
-
-/*
- * The following structure is the internal rep for a Unicode object.
- * Keeps track of how much memory has been used and how much has been
- * allocated for the Unicode to enable growing and shrinking of the
- * Unicode object with fewer mallocs.
- */
-
-typedef struct Unicode {
- int numChars; /* The number of chars in the unicode
- * string. */
- size_t allocated; /* The amount of space actually allocated. */
- Tcl_UniChar chars[2]; /* The array of chars. The actual size of
- * this field depends on the 'allocated' field
- * above. */
-} Unicode;
-
-#define UNICODE_SIZE(len) \
- ((unsigned) (sizeof(Unicode) - (sizeof(Tcl_UniChar)*2) + (len)))
-#define GET_UNICODE(objPtr) \
- ((Unicode *) (objPtr)->internalRep.otherValuePtr)
-#define SET_UNICODE(objPtr, unicodePtr) \
- (objPtr)->internalRep.otherValuePtr = (VOID *) (unicodePtr)
-
-
-/*
- *----------------------------------------------------------------------
- *
- * TclGetUnicodeFromObj --
- *
- * Get the index'th Unicode character from the Unicode object. If
- * the object is not already a Unicode object, an attempt will be
- * made to convert it to one. The index is assumed to be in the
- * appropriate range.
- *
- * Results:
- * Returns a pointer to the object's internal unicode string.
- *
- * Side effects:
- * Converts the object to have the Unicode internal rep.
- *
- *----------------------------------------------------------------------
- */
-
-Tcl_UniChar *
-TclGetUnicodeFromObj(objPtr)
- Tcl_Obj *objPtr; /* The object to find the unicode string for. */
-{
- Unicode *unicodePtr;
- int numBytes;
- char *src;
-
- SetUnicodeFromAny(NULL, objPtr);
- unicodePtr = GET_UNICODE(objPtr);
-
- if (unicodePtr->allocated == 0) {
-
- /*
- * If all of the characters in the Utf string are 1 byte chars,
- * we don't normally store the unicode str. Since this
- * function must return a unicode string, and one has not yet
- * been stored, force the Unicode to be calculated and stored
- * now.
- */
-
- src = Tcl_GetStringFromObj(objPtr, &numBytes);
- SetFullUnicodeFromAny(objPtr, src, numBytes, unicodePtr->numChars);
-
- /*
- * We need to fetch the pointer again because we have just
- * reallocated the structure to make room for the Unicode data.
- */
-
- unicodePtr = GET_UNICODE(objPtr);
- }
- return unicodePtr->chars;
-}
-
-/*
- *----------------------------------------------------------------------
- *
- * TclGetUnicodeLengthFromObj --
- *
- * Get the length of the Unicode string from the Tcl object. If
- * the object is not already a Unicode object, an attempt will be
- * made to convert it to one.
- *
- * Results:
- * Pointer to unicode string representing the unicode object.
- *
- * Side effects:
- * Frees old internal rep. Allocates memory for new internal rep.
- *
- *----------------------------------------------------------------------
- */
-
-int
-TclGetUnicodeLengthFromObj(objPtr)
- Tcl_Obj *objPtr; /* The Unicode object. */
-{
- int length;
- Unicode *unicodePtr;
-
- SetUnicodeFromAny(NULL, objPtr);
- unicodePtr = GET_UNICODE(objPtr);
-
- length = unicodePtr->numChars;
- return length;
-}
-
-/*
- *----------------------------------------------------------------------
- *
- * TclGetUniCharFromObj --
- *
- * Get the index'th Unicode character from the Unicode object. If
- * the object is not already a Unicode object, an attempt will be
- * made to convert it to one. The index is assumed to be in the
- * appropriate range.
- *
- * Results:
- * Returns the index'th Unicode character in the Object.
- *
- * Side effects:
- * Fills unichar with the index'th Unicode character.
- *
- *----------------------------------------------------------------------
- */
-
-Tcl_UniChar
-TclGetUniCharFromObj(objPtr, index)
- Tcl_Obj *objPtr; /* The Unicode object. */
- int index; /* Get the index'th character. */
-{
- Tcl_UniChar unichar;
- Unicode *unicodePtr;
- int length;
-
- SetUnicodeFromAny(NULL, objPtr);
- unicodePtr = GET_UNICODE(objPtr);
- length = objPtr->length;
-
- if (AllSingleByteChars(objPtr)) {
- int length;
- char *str;
-
- /*
- * All of the characters in the Utf string are 1 byte chars,
- * so we don't store the unicode char. We get the Utf string
- * and convert the index'th byte to a Unicode character.
- */
-
- str = Tcl_GetStringFromObj(objPtr, &length);
- Tcl_UtfToUniChar(&str[index], &unichar);
- } else {
- unichar = unicodePtr->chars[index];
- }
- return unichar;
-}
-
-/*
- *----------------------------------------------------------------------
- *
- * TclGetRangeFromObj --
- *
- * Create a Tcl Object that contains the chars between first and last
- * of the object indicated by "objPtr". If the object is not already
- * a Unicode object, an attempt will be made to convert it to one.
- * The first and last indices are assumed to be in the appropriate
- * range.
- *
- * Results:
- * Returns a new Tcl Object of either "string" or "unicode" type,
- * containing the range of chars.
- *
- * Side effects:
- * Changes the internal rep of "objPtr" to unicode.
- *
- *----------------------------------------------------------------------
- */
-
-Tcl_Obj*
-TclGetRangeFromObj(objPtr, first, last)
-
- Tcl_Obj *objPtr; /* The Tcl object to find the range of. */
- int first; /* First index of the range. */
- int last; /* Last index of the range. */
-{
- Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */
- Unicode *unicodePtr;
- int length;
-
- SetUnicodeFromAny(NULL, objPtr);
- unicodePtr = GET_UNICODE(objPtr);
- length = objPtr->length;
-
- if (unicodePtr->numChars != length) {
- newObjPtr = TclNewUnicodeObj(unicodePtr->chars + first, last-first+1);
- } else {
- int length;
- char *str;
-
- /*
- * All of the characters in the Utf string are 1 byte chars,
- * so we don't store the unicode char. Create a new string
- * object containing the specified range of chars.
- */
-
- str = Tcl_GetStringFromObj(objPtr, &length);
- newObjPtr = Tcl_NewStringObj(&str[first], last-first+1);
- }
- return newObjPtr;
-}
-
-/*
- *----------------------------------------------------------------------
- *
- * TclAppendObjToUnicodeObj --
- *
- * This procedure appends the contents of "srcObjPtr" to the Unicode
- * object "destPtr".
- *
- * Results:
- * None.
- *
- * Side effects:
- * If srcObjPtr doesn't have an internal rep, then it is given a
- * Unicode internal rep.
- *
- *----------------------------------------------------------------------
- */
-
-Tcl_Obj *
-TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr)
- register Tcl_Obj *targetObjPtr; /* Points to the object to
- * append to. */
- register Tcl_Obj *srcObjPtr; /* Points to the object to
- * append from. */
-{
- int numBytes, numChars;
- Tcl_Obj *resultObjPtr;
- char *utfSrcStr;
- Tcl_UniChar *unicharSrcStr;
- Unicode *unicodePtr;
- Tcl_DString dsPtr;
-
- /*
- * Duplicate the target if it is shared.
- * Change the result's internal rep to Unicode object.
- */
-
- if (Tcl_IsShared(targetObjPtr)) {
- resultObjPtr = Tcl_DuplicateObj(targetObjPtr);
- } else {
- resultObjPtr = targetObjPtr;
- }
- SetUnicodeFromAny(NULL, resultObjPtr);
-
- /*
- * Case where target chars are 1 byte long:
- * If src obj is of "string" or null type, then convert it to "unicode"
- * type. Src objs of other types (such as int) are left in tact to keep
- * them from shimmering between types. If the src obj is a unichar obj,
- * and all src chars are also 1 byte long, the src string is appended to
- * the target "unicode" obj, and the target obj maintains its "optimized"
- * status.
- */
-
- if (AllSingleByteChars(resultObjPtr)) {
-
- int length;
- char *stringRep;
-
- if (srcObjPtr->typePtr == &tclStringType
- || srcObjPtr->typePtr == NULL) {
- SetUnicodeFromAny(NULL, srcObjPtr);
- }
-
- stringRep = Tcl_GetStringFromObj(srcObjPtr, &length);
- Tcl_AppendToObj(resultObjPtr, stringRep, length);
-
- if ((srcObjPtr->typePtr == &tclUnicodeType)
- && (AllSingleByteChars(srcObjPtr))) {
- SetOptUnicodeFromAny(resultObjPtr, resultObjPtr->length);
- }
- return resultObjPtr;
- }
-
- /*
- * Extract a unicode string from "unicode" or "string" type objects.
- * Extract the utf string from non-unicode objects, and convert the
- * utf string to unichar string locally.
- * If the src obj is a "string" obj, convert it to "unicode" type.
- * Src objs of other types (such as int) are left in tact to keep
- * them from shimmering between types.
- */
-
- Tcl_DStringInit(&dsPtr);
- if (srcObjPtr->typePtr == &tclStringType || srcObjPtr->typePtr == NULL) {
- SetUnicodeFromAny(NULL, srcObjPtr);
- }
- if (srcObjPtr->typePtr == &tclUnicodeType) {
- if (AllSingleByteChars(srcObjPtr)) {
-
- unicodePtr = GET_UNICODE(srcObjPtr);
- numChars = unicodePtr->numChars;
-
- utfSrcStr = Tcl_GetStringFromObj(srcObjPtr, &numBytes);
- unicharSrcStr = (Tcl_UniChar *)Tcl_UtfToUniCharDString(utfSrcStr,
- numBytes, &dsPtr);
- } else {
- unicodePtr = GET_UNICODE(srcObjPtr);
- numChars = unicodePtr->numChars;
- unicharSrcStr = unicodePtr->chars;
- }
- } else {
- utfSrcStr = Tcl_GetStringFromObj(srcObjPtr, &numBytes);
- numChars = Tcl_NumUtfChars(utfSrcStr, numBytes);
- unicharSrcStr = (Tcl_UniChar *)Tcl_UtfToUniCharDString(utfSrcStr,
- numBytes, &dsPtr);
- }
- if (numChars == 0) {
- return resultObjPtr;
- }
-
- /*
- * Append the unichar src string to the result object.
- */
-
- AppendUniCharStrToObj(resultObjPtr, unicharSrcStr, numChars);
- Tcl_DStringFree(&dsPtr);
- return resultObjPtr;
-}
-
-/*
- *----------------------------------------------------------------------
- *
- * AppendUniCharStrToObj --
- *
- * This procedure appends the contents of "srcObjPtr" to the
- * Unicode object "objPtr".
- *
- * Results:
- * None.
- *
- * Side effects:
- * If srcObjPtr doesn't have an internal rep, then it is given a
- * Unicode internal rep.
- *
- *----------------------------------------------------------------------
- */
-
-static void
-AppendUniCharStrToObj(objPtr, unichars, numNewChars)
- register Tcl_Obj *objPtr; /* Points to the object to append to. */
- Tcl_UniChar *unichars; /* The unicode string to append to the
- * object. */
- int numNewChars; /* Number of chars in "unichars". */
-{
- Unicode *unicodePtr;
- int numChars;
- size_t numBytes;
-
- SetUnicodeFromAny(NULL, objPtr);
- unicodePtr = GET_UNICODE(objPtr);
-
- numChars = numNewChars + unicodePtr->numChars;
- numBytes = (numChars + 1) * sizeof(Tcl_UniChar);
-
- if (unicodePtr->allocated < numBytes) {
- int allocatedBytes = numBytes * 2;
-
- /*
- * There isn't currently enough space in the Unicode
- * representation so allocate additional space. In fact,
- * overallocate so that there is room for future growth without
- * having to reallocate again.
- */
-
- unicodePtr = (Unicode *) ckrealloc((char*) unicodePtr,
- UNICODE_SIZE(allocatedBytes));
- unicodePtr->allocated = allocatedBytes;
- unicodePtr = SET_UNICODE(objPtr, unicodePtr);
- }
- memcpy((VOID *) (unicodePtr->chars + unicodePtr->numChars),
- (VOID *) unichars, (size_t) numNewChars * sizeof(Tcl_UniChar));
- unicodePtr->chars[numChars] = 0;
- unicodePtr->numChars = numChars;
-
- /*
- * Invalidate the StringRep.
- */
-
- Tcl_InvalidateStringRep(objPtr);
-}
-
-/*
- *----------------------------------------------------------------------
- *
- * TclAppendUnicodeToObj --
- *
- * This procedure appends a Unicode string to an object in the
- * most efficient manner possible.
- *
- * Results:
- * None.
- *
- * Side effects:
- * Invalidates the string rep and creates a new Unicode string.
- *
- *----------------------------------------------------------------------
- */
-
-void
-TclAppendUnicodeToObj(objPtr, unichars, length)
- register Tcl_Obj *objPtr; /* Points to the object to append to. */
- Tcl_UniChar *unichars; /* The unicode string to append to the
- * object. */
- int length; /* Number of chars in "unichars". */
-{
- Unicode *unicodePtr;
- int numChars, i;
- size_t newSize;
- char *src;
- Tcl_UniChar *dst;
-
- if (Tcl_IsShared(objPtr)) {
- panic("TclAppendUnicodeToObj called with shared object");
- }
-
- SetUnicodeFromAny(NULL, objPtr);
- unicodePtr = GET_UNICODE(objPtr);
-
- /*
- * Make the buffer big enough for the result.
- */
-
- numChars = unicodePtr->numChars + length;
- newSize = (numChars + 1) * sizeof(Tcl_UniChar);
-
- if (newSize > unicodePtr->allocated) {
- int allocated = newSize * 2;
-
- unicodePtr = (Unicode *) ckrealloc((char*)unicodePtr,
- UNICODE_SIZE(allocated));
-
- if (unicodePtr->allocated == 0) {
- /*
- * If the original string was not in Unicode form, add it to the
- * beginning of the buffer.
- */
-
- src = objPtr->bytes;
- dst = unicodePtr->chars;
- for (i = 0; i < unicodePtr->numChars; i++) {
- src += Tcl_UtfToUniChar(src, dst++);
- }
- }
- unicodePtr->allocated = allocated;
- }
-
- /*
- * Copy the new string onto the end of the old string, then add the
- * trailing null.
- */
-
- memcpy((VOID*) (unicodePtr->chars + unicodePtr->numChars), unichars,
- length * sizeof(Tcl_UniChar));
- unicodePtr->numChars = numChars;
- unicodePtr->chars[numChars] = 0;
-
- SET_UNICODE(objPtr, unicodePtr);
-
- Tcl_InvalidateStringRep(objPtr);
-}
-
-/*
- *---------------------------------------------------------------------------
- *
- * TclNewUnicodeObj --
- *
- * This procedure is creates a new Unicode object and initializes
- * it from the given Utf String. If the Utf String is the same size
- * as the Unicode string, don't duplicate the data.
- *
- * Results:
- * The newly created object is returned. This object will have no
- * initial string representation. The returned object has a ref count
- * of 0.
- *
- * Side effects:
- * Memory allocated for new object and copy of Unicode argument.
- *
- *---------------------------------------------------------------------------
- */
-
-Tcl_Obj *
-TclNewUnicodeObj(unichars, numChars)
- Tcl_UniChar *unichars; /* The unicode string used to initialize
- * the new object. */
- int numChars; /* Number of characters in the unicode
- * string. */
-{
- Tcl_Obj *objPtr;
- Unicode *unicodePtr;
- int numBytes, allocated;
-
- numBytes = numChars * sizeof(Tcl_UniChar);
-
- /*
- * Allocate extra space for the null character
- */
-
- allocated = numBytes + sizeof(Tcl_UniChar);
-
- TclNewObj(objPtr);
- objPtr->bytes = NULL;
- objPtr->typePtr = &tclUnicodeType;
-
- unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(allocated));
- unicodePtr->numChars = numChars;
- unicodePtr->allocated = allocated;
- memcpy((VOID *) unicodePtr->chars, (VOID *) unichars, (size_t) numBytes);
- unicodePtr->chars[numChars] = 0;
- SET_UNICODE(objPtr, unicodePtr);
- return objPtr;
-}
-
-/*
- *---------------------------------------------------------------------------
- *
- * TclAllSingleByteChars --
- *
- * Initialize the internal representation of a Unicode Tcl_Obj
- * to a copy of the internal representation of an existing Unicode
- * object.
- *
- * Results:
- * None.
- *
- * Side effects:
- * Allocates memory.
- *
- *---------------------------------------------------------------------------
- */
-
-static int
-AllSingleByteChars(objPtr)
- Tcl_Obj *objPtr; /* Object whose char lengths to check. */
-{
- Unicode *unicodePtr;
- int numBytes, numChars;
-
- unicodePtr = GET_UNICODE(objPtr);
- numChars = unicodePtr->numChars;
- numBytes = objPtr->length;
-
- if (numChars == numBytes) {
- return 1;
- } else {
- return 0;
- }
-}
-
-/*
- *---------------------------------------------------------------------------
- *
- * DupUnicodeInternalRep --
- *
- * Initialize the internal representation of a Unicode Tcl_Obj
- * to a copy of the internal representation of an existing Unicode
- * object.
- *
- * Results:
- * None.
- *
- * Side effects:
- * Allocates memory.
- *
- *---------------------------------------------------------------------------
- */
-
-static void
-DupUnicodeInternalRep(srcPtr, copyPtr)
- Tcl_Obj *srcPtr; /* Object with internal rep to copy. */
- Tcl_Obj *copyPtr; /* Object with internal rep to set. */
-{
- Unicode *srcUnicodePtr = GET_UNICODE(srcPtr);
- Unicode *copyUnicodePtr;
-
- /*
- * If the src obj is a string of 1-byte Utf chars, then copy the
- * string rep of the source object and create an "empty" Unicode
- * internal rep for the new object. Otherwise, copy Unicode
- * internal rep, and invalidate the string rep of the new object.
- */
-
- if (srcUnicodePtr->numChars == srcPtr->length) {
- copyUnicodePtr = (Unicode *) ckalloc(sizeof(Unicode));
- copyUnicodePtr->allocated = 0;
- } else {
- int allocated = srcUnicodePtr->allocated;
-
- copyUnicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(allocated));
-
- copyUnicodePtr->allocated = allocated;
- memcpy((VOID *) copyUnicodePtr->chars,
- (VOID *) srcUnicodePtr->chars,
- (size_t) (srcUnicodePtr->numChars + 1) * sizeof(Tcl_UniChar));
- }
- copyUnicodePtr->numChars = srcUnicodePtr->numChars;
- SET_UNICODE(copyPtr, copyUnicodePtr);
- copyPtr->typePtr = &tclUnicodeType;
-}
-
-/*
- *---------------------------------------------------------------------------
- *
- * UpdateStringOfUnicode --
- *
- * Update the string representation for a Unicode data object.
- * Note: This procedure does not invalidate an existing old string rep
- * so storage will be lost if this has not already been done.
- *
- * Results:
- * None.
- *
- * Side effects:
- * The object's string is set to a valid string that results from
- * the Unicode-to-string conversion.
- *
- * The object becomes a string object -- the internal rep is
- * discarded and the typePtr becomes NULL.
- *
- *---------------------------------------------------------------------------
- */
-
-static void
-UpdateStringOfUnicode(objPtr)
- Tcl_Obj *objPtr; /* Unicode object whose string rep to
- * update. */
-{
- int i, length, size;
- Tcl_UniChar *src;
- char dummy[TCL_UTF_MAX];
- char *dst;
- Unicode *unicodePtr;
-
- unicodePtr = GET_UNICODE(objPtr);
- src = unicodePtr->chars;
- length = unicodePtr->numChars * sizeof(Tcl_UniChar);
-
- /*
- * How much space will string rep need?
- */
-
- size = 0;
- for (i = 0; i < unicodePtr->numChars; i++) {
- size += Tcl_UniCharToUtf((int) src[i], dummy);
- }
-
- dst = (char *) ckalloc((unsigned) (size + 1));
- objPtr->bytes = dst;
- objPtr->length = size;
-
- for (i = 0; i < unicodePtr->numChars; i++) {
- dst += Tcl_UniCharToUtf(src[i], dst);
- }
- *dst = '\0';
-}
-
-/*
- *---------------------------------------------------------------------------
- *
- * SetOptUnicodeFromAny --
- *
- * Generate the optimized Unicode internal rep from the string rep.
- *
- * Results:
- * None.
- *
- * Side effects:
- * The Unicode ojbect is opitmized for the case where each UTF char in
- * a string is only one byte. In this case, we store the value of
- * numChars, but we don't copy the bytes to the unicodeObj->chars.
- * Before accessing obj->chars, check if all chars are 1 byte long.
- *
- *---------------------------------------------------------------------------
- */
-
-static void
-SetOptUnicodeFromAny(objPtr, numChars)
- Tcl_Obj *objPtr; /* The object to convert to type Unicode. */
- int numChars;
-{
- Tcl_ObjType *typePtr;
- Unicode *unicodePtr;
-
- typePtr = objPtr->typePtr;
- if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) {
- (*typePtr->freeIntRepProc)(objPtr);
- }
- objPtr->typePtr = &tclUnicodeType;
-
- /*
- * Allocate enough space for the basic Unicode structure.
- */
-
- unicodePtr = (Unicode *) ckalloc(sizeof(Unicode));
- unicodePtr->numChars = numChars;
- unicodePtr->allocated = 0;
- SET_UNICODE(objPtr, unicodePtr);
-}
-
-/*
- *---------------------------------------------------------------------------
- *
- * SetFullUnicodeFromAny --
- *
- * Generate the full (non-optimized) Unicode internal rep from the
- * string rep.
- *
- * Results:
- * None.
- *
- * Side effects:
- * The Unicode internal rep will contain a copy of the string "src" in
- * unicode format.
- *
- *---------------------------------------------------------------------------
- */
-
-static void
-SetFullUnicodeFromAny(objPtr, src, numBytes, numChars)
- Tcl_Obj *objPtr; /* The object to convert to type Unicode. */
- char *src;
- int numBytes;
- int numChars;
-{
- Tcl_ObjType *typePtr;
- Unicode *unicodePtr;
- char *srcEnd;
- Tcl_UniChar *dst;
- size_t length = (numChars + 1) * sizeof(Tcl_UniChar);
-
- unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(length));
- srcEnd = src + numBytes;
-
- for (dst = unicodePtr->chars; src < srcEnd; dst++) {
- src += Tcl_UtfToUniChar(src, dst);
- }
- *dst = 0;
-
- unicodePtr->numChars = numChars;
- unicodePtr->allocated = length;
-
- typePtr = objPtr->typePtr;
- if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) {
- (*typePtr->freeIntRepProc)(objPtr);
- }
- objPtr->typePtr = &tclUnicodeType;
- SET_UNICODE(objPtr, unicodePtr);
-}
-
-/*
- *---------------------------------------------------------------------------
- *
- * SetUnicodeFromAny --
- *
- * Generate the Unicode internal rep from the string rep.
- *
- * Results:
- * The return value is always TCL_OK.
- *
- * Side effects:
- * A Unicode object is stored as the internal rep of objPtr. The Unicode
- * object is opitmized for the case where each UTF char in a string is
- * only one byte. In this case, we store the value of numChars, but we
- * don't copy the bytes to the unicodeObj->chars. Before accessing
- * obj->chars, check if all chars are 1 byte long.
- *
- *---------------------------------------------------------------------------
- */
-
-static int
-SetUnicodeFromAny(interp, objPtr)
- Tcl_Interp *interp; /* Not used. */
- Tcl_Obj *objPtr; /* The object to convert to type Unicode. */
-{
- int numBytes, numChars;
- char *src;
-
- if (objPtr->typePtr != &tclUnicodeType) {
- src = Tcl_GetStringFromObj(objPtr, &numBytes);
-
- numChars = Tcl_NumUtfChars(src, numBytes);
- if (numChars == numBytes) {
- SetOptUnicodeFromAny(objPtr, numChars);
- } else {
- SetFullUnicodeFromAny(objPtr, src, numBytes, numChars);
- }
- }
- return TCL_OK;
-}
-
-/*
- *----------------------------------------------------------------------
- *
- * FreeUnicodeInternalRep --
- *
- * Deallocate the storage associated with a Unicode data object's
- * internal representation.
- *
- * Results:
- * None.
- *
- * Side effects:
- * Frees memory.
- *
- *----------------------------------------------------------------------
- */
-
-static void
-FreeUnicodeInternalRep(objPtr)
- Tcl_Obj *objPtr; /* Object with internal rep to free. */
-{
- ckfree((char *) GET_UNICODE(objPtr));
-}