* tests/string.test:

* generic/tclVar.c (Tcl_SetVar2Ex): * generic/tclStringObj.c (Tcl_AppendObjToObj): * generic/tclCmdMZ.c (Tcl_StringObjCmd): optimized the string index, string length, string range, and append command in cases where the object's internal rep is a bytearray. Objects with other internal reps are converted to have the new unicode internal rep. * unix/Makefile.in: * win/Makefile.in: * win/Makefile.vc: * tests/unicode.test: * generic/tclInt.h: * generic/tclObj.c: * generic/tclUnicodeObj.c: added a new object type to store the unicode representation of a string. * generic/tclTestObj.c: added the objtype option to the testobj command. This option returns the name of the type of internal rep an object has.
author: hershey <hershey> 1999-06-08 02:59:23 (GMT)
committer: hershey <hershey> 1999-06-08 02:59:23 (GMT)
commit: 0e53e351cd3c0bdf51b84e459262c47f913c9a97 (patch)
tree: 87cee8e23f1c9f621f583c5d97e3e62979935fa9 /generic
parent: b2759d9c544b22071eca46475d110812304e8faa (diff)
download: tcl-0e53e351cd3c0bdf51b84e459262c47f913c9a97.zip
tcl-0e53e351cd3c0bdf51b84e459262c47f913c9a97.tar.gz
tcl-0e53e351cd3c0bdf51b84e459262c47f913c9a97.tar.bz2
7 files changed, 907 insertions, 57 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 19b9ece..ebea22b 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -13,7 +13,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclCmdMZ.c,v 1.12 1999/06/03 18:43:30 stanton Exp $
+ * RCS: @(#) $Id: tclCmdMZ.c,v 1.13 1999/06/08 02:59:23 hershey Exp $
  */
 
 #include "tclInt.h"
@@ -1009,32 +1009,47 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 	}
 	case STR_INDEX: {
 	    int index;
+	    char buf[TCL_UTF_MAX];
+	    Tcl_UniChar unichar;
 
 	    if (objc != 4) {
 	        Tcl_WrongNumArgs(interp, 2, objv, "string charIndex");
 		return TCL_ERROR;
 	    }
-	    string1 = Tcl_GetStringFromObj(objv[2], &length1);
-	    /*
-	     * establish what 'end' really means
-	     */
-	    length2 = Tcl_NumUtfChars(string1, length1);
-	    if (TclGetIntForIndex(interp, objv[3], length2 - 1,
-				  &index) != TCL_OK) {
-	      return TCL_ERROR;
-	    }
+
 	    /*
-	     * index must be between 0 and the UTF length to be valid
+	     * If we have a ByteArray object, avoid indexing in the
+	     * Utf string since the byte array contains one byte per
+	     * character.  Otherwise, use the Unicode string rep to
+	     * get the index'th char.
 	     */
-	    if ((index >= 0) && (index < length2)) {
-		if (length1 == length2) {
-		    /* no unicode chars */
-		    Tcl_SetStringObj(resultPtr, string1+index, 1);
-		} else {
-		    char buf[TCL_UTF_MAX];
 
-		    length2 = Tcl_UniCharToUtf(Tcl_UniCharAtIndex(string1,
-								  index), buf);
+	    if (objv[2]->typePtr == &tclByteArrayType) {
+
+		string1 = Tcl_GetByteArrayFromObj(objv[2], &length1);
+
+		if (TclGetIntForIndex(interp, objv[3], length1 - 1,
+			&index) != TCL_OK) {
+		    return TCL_ERROR;
+		}
+		Tcl_SetStringObj(resultPtr, &string1[index], 1);
+	    } else {
+		string1 = Tcl_GetStringFromObj(objv[2], &length1);
+		
+		/*
+		 * convert to Unicode internal rep to calulate what
+		 * 'end' really means.
+		 */
+
+		length2 = TclGetUnicodeLengthFromObj(objv[2]);
+    
+		if (TclGetIntForIndex(interp, objv[3], length2 - 1,
+			&index) != TCL_OK) {
+		    return TCL_ERROR;
+		}
+		if ((index >= 0) && (index < length2)) {
+		    unichar = TclGetUniCharFromObj(objv[2], index);
+		    length2 = Tcl_UniCharToUtf((int)unichar, buf);
 		    Tcl_SetStringObj(resultPtr, buf, length2);
 		}
 	    }
@@ -1400,16 +1415,16 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 		/*
 		 * If we have a ByteArray object, avoid recomputing the
 		 * string since the byte array contains one byte per
-		 * character. 
+		 * character.  Otherwise, use the Unicode string rep to
+		 * calculate the length.
 		 */
 
 		if (objv[2]->typePtr == &tclByteArrayType) {
 		    (void) Tcl_GetByteArrayFromObj(objv[2], &length1);
 		    Tcl_SetIntObj(resultPtr, length1);
 		} else {
-		    string1 = Tcl_GetStringFromObj(objv[2], &length1);
-		    Tcl_SetIntObj(resultPtr, Tcl_NumUtfChars(string1,
-			    length1));
+		    Tcl_SetIntObj(resultPtr,
+			    TclGetUnicodeLengthFromObj(objv[2]));
 		}
 	    }
 	    break;
@@ -1550,28 +1565,64 @@ Tcl_StringObjCmd(dummy, interp, objc, objv)
 		return TCL_ERROR;
 	    }
 
-	    string1 = Tcl_GetStringFromObj(objv[2], &length1);
-	    length1 = Tcl_NumUtfChars(string1, length1) - 1;
-	    if (TclGetIntForIndex(interp, objv[3], length1,
-		    &first) != TCL_OK) {
-		return TCL_ERROR;
-	    }
-	    if (TclGetIntForIndex(interp, objv[4], length1,
-		    &last) != TCL_OK) {
-		return TCL_ERROR;
-	    }
-	    if (first < 0) {
-		first = 0;
-	    }
-	    if (last >= length1) {
-		last = length1;
-	    }
-	    if (last >= first) {
-		char *start, *end;
+	    /*
+	     * If we have a ByteArray object, avoid indexing in the
+	     * Utf string since the byte array contains one byte per
+	     * character.  Otherwise, use the Unicode string rep to
+	     * get the range.
+	     */
 
-		start = Tcl_UtfAtIndex(string1, first);
-		end = Tcl_UtfAtIndex(start, last - first + 1);
-	        Tcl_SetStringObj(resultPtr, start, end - start);
+	    if (objv[2]->typePtr == &tclByteArrayType) {
+
+		string1 = Tcl_GetByteArrayFromObj(objv[2], &length1);
+
+		if (TclGetIntForIndex(interp, objv[3], length1 - 1,
+			&first) != TCL_OK) {
+		    return TCL_ERROR;
+		}
+		if (TclGetIntForIndex(interp, objv[4], length1 - 1,
+			&last) != TCL_OK) {
+		    return TCL_ERROR;
+		}
+		if (first < 0) {
+		    first = 0;
+		}
+		if (last >= length1 - 1) {
+		    last = length1 - 1;
+		}
+		if (last >= first) {
+		    int numBytes = last - first + 1;
+		    resultPtr = Tcl_NewByteArrayObj(&string1[first], numBytes);
+		    Tcl_SetObjResult(interp, resultPtr);
+		}
+	    } else {
+		string1 = Tcl_GetStringFromObj(objv[2], &length1);
+		
+		/*
+		 * Convert to Unicode internal rep to calulate length and
+		 * create a result object.
+		 */
+
+		length2 = TclGetUnicodeLengthFromObj(objv[2]) - 1;
+    
+		if (TclGetIntForIndex(interp, objv[3], length2,
+			&first) != TCL_OK) {
+		    return TCL_ERROR;
+		}
+		if (TclGetIntForIndex(interp, objv[4], length2,
+			&last) != TCL_OK) {
+		    return TCL_ERROR;
+		}
+		if (first < 0) {
+		    first = 0;
+		}
+		if (last >= length1 - 1) {
+		    last = length1 - 1;
+		}
+		if (last >= first) {
+		    resultPtr = TclGetRangeFromObj(objv[2], first, last);
+		    Tcl_SetObjResult(interp, resultPtr);
+		}
 	    }
 	    break;
 	}
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 68614bc..ed9002d 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -11,7 +11,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclInt.h,v 1.29 1999/05/13 01:50:32 stanton Exp $
+ * RCS: @(#) $Id: tclInt.h,v 1.30 1999/06/08 02:59:24 hershey Exp $
  */
 
 #ifndef _TCLINT
@@ -1509,6 +1509,7 @@ extern Tcl_ObjType	tclIntType;
 extern Tcl_ObjType	tclListType;
 extern Tcl_ObjType	tclProcBodyType;
 extern Tcl_ObjType	tclStringType;
+extern Tcl_ObjType	tclUnicodeType;
 
 /*
  * The head of the list of free Tcl objects, and the total number of Tcl
@@ -1542,6 +1543,9 @@ EXTERN int		TclAccess _ANSI_ARGS_((CONST char *path,
 EXTERN int		TclAccessDeleteProc _ANSI_ARGS_((TclAccessProc_ *proc));
 EXTERN int		TclAccessInsertProc _ANSI_ARGS_((TclAccessProc_ *proc));
 EXTERN void		TclAllocateFreeObjects _ANSI_ARGS_((void));
+EXTERN Tcl_Obj *	TclAppendObjToUnicodeObj _ANSI_ARGS_((
+	    		    register Tcl_Obj *targetObjPtr,
+	    		    register Tcl_Obj *srcObjPtr));
 EXTERN int		TclArraySet _ANSI_ARGS_((Tcl_Interp *interp,
 			    Tcl_Obj *arrayNameObj, Tcl_Obj *arrayElemObj));
 EXTERN int		TclCleanupChildren _ANSI_ARGS_((Tcl_Interp *interp,
@@ -1634,6 +1638,12 @@ EXTERN int		TclGetOpenMode _ANSI_ARGS_((Tcl_Interp *interp,
 			    char *string, int *seekFlagPtr));
 EXTERN Tcl_Command	TclGetOriginalCommand _ANSI_ARGS_((
 			    Tcl_Command command));
+EXTERN Tcl_Obj*         TclGetRangeFromObj _ANSI_ARGS_((Tcl_Obj *objPtr,
+			    int first, int last));
+EXTERN Tcl_UniChar      TclGetUniCharFromObj _ANSI_ARGS_((Tcl_Obj *objPtr,
+			    int index));
+EXTERN int		TclGetUnicodeLengthFromObj _ANSI_ARGS_((
+			    Tcl_Obj *objPtr));
 EXTERN int		TclGlob _ANSI_ARGS_((Tcl_Interp *interp,
 			    char *pattern, int noComplain));
 EXTERN int		TclGlobalInvoke _ANSI_ARGS_((Tcl_Interp *interp,
diff --git a/generic/tclObj.c b/generic/tclObj.c
index f1858f8..423df28 100644
--- a/generic/tclObj.c
+++ b/generic/tclObj.c
@@ -5,11 +5,12 @@
  * 	many Tcl commands.
  *
  * Copyright (c) 1995-1997 Sun Microsystems, Inc.
+ * Copyright (c) 1999 by Scriptics Corporation.
  *
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclObj.c,v 1.7 1999/05/28 23:02:33 stanton Exp $
+ * RCS: @(#) $Id: tclObj.c,v 1.8 1999/06/08 02:59:25 hershey Exp $
  */
 
 #include "tclInt.h"
@@ -137,6 +138,7 @@ TclInitObjSubsystem()
     Tcl_RegisterObjType(&tclListType);
     Tcl_RegisterObjType(&tclByteCodeType);
     Tcl_RegisterObjType(&tclProcBodyType);
+    Tcl_RegisterObjType(&tclUnicodeType);
 
 #ifdef TCL_COMPILE_STATS
     Tcl_MutexLock(&tclObjMutex);
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index ea0cbd7..c70bcb9 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -10,11 +10,12 @@
  *	representation are called "expandable string objects".
  *
  * Copyright (c) 1995-1997 Sun Microsystems, Inc.
+ * Copyright (c) 1999 by Scriptics Corporation.
  *
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclStringObj.c,v 1.6 1999/05/07 20:07:35 stanton Exp $
+ * RCS: @(#) $Id: tclStringObj.c,v 1.7 1999/06/08 02:59:25 hershey Exp $
  */
 
 #include "tclInt.h"
@@ -382,11 +383,7 @@ Tcl_AppendObjToObj(objPtr, appendObjPtr)
     Tcl_Obj *objPtr;		/* Points to the object to append to. */
     Tcl_Obj *appendObjPtr;	/* Object to append. */
 {
-    int length;
-    char *stringRep;
-
-    stringRep = Tcl_GetStringFromObj(appendObjPtr, &length);
-    Tcl_AppendToObj(objPtr, stringRep, length);
+    TclAppendObjToUnicodeObj(objPtr, appendObjPtr);
 }
 
 /*
diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c
index d604c5b..533b967 100644
--- a/generic/tclTestObj.c
+++ b/generic/tclTestObj.c
@@ -7,11 +7,12 @@
  *	applications; they're only used for testing.
  *
  * Copyright (c) 1995-1998 Sun Microsystems, Inc.
+ * Copyright (c) 1999 by Scriptics Corporation.
  *
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclTestObj.c,v 1.3 1999/04/16 00:46:54 stanton Exp $
+ * RCS: @(#) $Id: tclTestObj.c,v 1.4 1999/06/08 02:59:26 hershey Exp $
  */
 
 #include "tclInt.h"
@@ -774,6 +775,23 @@ TestobjCmd(clientData, interp, objc, objv)
         }
         SetVarToObj(varIndex, Tcl_NewObj());
 	Tcl_SetObjResult(interp, varPtr[varIndex]);
+    } else if (strcmp(subCmd, "objtype") == 0) {
+	char *typeName;
+
+	/*
+	 * return an object containing the name of the argument's type
+	 * of internal rep.  If none exists, return "none".
+	 */
+	
+        if (objc != 3) {
+            goto wrongNumArgs;
+        }
+	if (objv[2]->typePtr == NULL) {
+	    Tcl_SetObjResult(interp, Tcl_NewStringObj("none", -1));
+	} else {
+	    typeName = objv[2]->typePtr->name;
+	    Tcl_SetObjResult(interp, Tcl_NewStringObj(typeName, -1));
+	}
     } else if (strcmp(subCmd, "refcount") == 0) {
 	char buf[TCL_INTEGER_SPACE];
 
@@ -810,7 +828,8 @@ TestobjCmd(clientData, interp, objc, objv)
         if (objc != 2) {
             goto wrongNumArgs;
         }
-	if (Tcl_AppendAllObjTypes(interp, Tcl_GetObjResult(interp)) != TCL_OK) {
+	if (Tcl_AppendAllObjTypes(interp,
+		Tcl_GetObjResult(interp)) != TCL_OK) {
 	    return TCL_ERROR;
 	}
     } else {
@@ -818,7 +837,7 @@ TestobjCmd(clientData, interp, objc, objv)
 		"bad option \"",
 		Tcl_GetString(objv[1]),
 		"\": must be assign, convert, duplicate, freeallvars, ",
-		"newobj, objcount, refcount, type, or types",
+		"newobj, objcount, objtype, refcount, type, or types",
 		(char *) NULL);
 	return TCL_ERROR;
     }
diff --git a/generic/tclUnicodeObj.c b/generic/tclUnicodeObj.c
new file mode 100644
index 0000000..869b8c7
--- /dev/null
+++ b/generic/tclUnicodeObj.c
@@ -0,0 +1,771 @@
+/* 
+ * tclUnicodeObj.c --
+ *
+ *	This file contains the implementation of the Unicode internal
+ *	representation of Tcl objects.
+ *
+ * Copyright (c) 1999 by Scriptics Corporation.
+ *
+ * See the file "license.terms" for information on usage and redistribution
+ * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+ *
+ * RCS: @(#) $Id: tclUnicodeObj.c,v 1.2 1999/06/08 02:59:27 hershey Exp $
+ */
+
+#include <math.h>
+#include "tclInt.h"
+#include "tclPort.h"
+
+/*
+ * Prototypes for local procedures defined in this file:
+ */
+
+static void		DupUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
+			    Tcl_Obj *copyPtr));
+static void		FreeUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
+static void		UpdateStringOfUnicode _ANSI_ARGS_((Tcl_Obj *objPtr));
+static int		SetUnicodeFromAny _ANSI_ARGS_((Tcl_Interp *interp,
+			    Tcl_Obj *objPtr));
+
+static int		AllSingleByteChars _ANSI_ARGS_((Tcl_Obj *objPtr));
+static void		TclAppendUniCharStrToObj _ANSI_ARGS_((
+	    		    register Tcl_Obj *objPtr, Tcl_UniChar *unichars,
+			    int numChars));
+static Tcl_Obj *	TclNewUnicodeObj _ANSI_ARGS_((Tcl_UniChar *unichars,
+			    int numChars));
+static void		SetOptUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr,
+			    int numChars));
+
+/*
+ * The following object type represents a Unicode string.  A Unicode string
+ * is an internationalized string.  Conceptually, a Unicode string is an
+ * array of 16-bit quantities organized as a sequence of properly formed
+ * UTF-8 characters.  There is a one-to-one map between Unicode and UTF
+ * characters.  The Unicode ojbect is opitmized for the case where each UTF
+ * char in a string is only one byte.  In this case, we store the value of
+ * numChars, but we don't copy the bytes to the unicodeObj->chars.  Before
+ * accessing obj->chars, check if unicodeObj->numChars == obj->length.
+ */
+
+Tcl_ObjType tclUnicodeType = {
+    "unicode",
+    FreeUnicodeInternalRep,
+    DupUnicodeInternalRep,
+    UpdateStringOfUnicode,
+    SetUnicodeFromAny
+};
+
+/*
+ * The following structure is the internal rep for a Unicode object.
+ * Keeps track of how much memory has been used and how much has been
+ * allocated for the Unicode to enable growing and shrinking of the
+ * Unicode object with fewer mallocs.  
+ */
+
+typedef struct Unicode {
+    int numChars;		/* The number of chars in the unicode
+				 * string. */
+    int used;			/* The number of bytes used in the unicode
+				 * string. */
+    int allocated;		/* The amount of space actually allocated
+				 * minus 1 byte. */
+    unsigned char chars[4];	/* The array of chars.  The actual size of
+				 * this field depends on the 'allocated' field
+				 * above. */
+} Unicode;
+
+#define UNICODE_SIZE(len)	\
+		((unsigned) (sizeof(Unicode) - 4 + (len)))
+#define GET_UNICODE(objPtr) \
+		((Unicode *) (objPtr)->internalRep.otherValuePtr)
+#define SET_UNICODE(objPtr, unicodePtr) \
+		(objPtr)->internalRep.otherValuePtr = (VOID *) (unicodePtr)
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclGetUnicodeLengthFromObj --
+ *
+ *	Get the length of the Unicode string from the Tcl object.  If
+ *	the object is not already a Unicode object, an attempt will be
+ *	made to convert it to one.
+ *
+ * Results:
+ *	Pointer to unicode string representing the unicode object.
+ *
+ * Side effects:
+ *	Frees old internal rep.  Allocates memory for new internal rep.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclGetUnicodeLengthFromObj(objPtr)
+    Tcl_Obj *objPtr;		/* The Unicode object. */
+{
+    int length;
+    Unicode *unicodePtr;
+    
+    SetUnicodeFromAny(NULL, objPtr);
+    unicodePtr = GET_UNICODE(objPtr);
+
+    length = unicodePtr->numChars;
+    return length;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclGetUniCharFromObj --
+ *
+ *	Get the index'th Unicode character from the Unicode object.  If
+ *	the object is not already a Unicode object, an attempt will be
+ *	made to convert it to one.  The index is assumed to be in the
+ *	appropriate range.
+ *
+ * Results:
+ *	Returns the index'th Unicode character in the Object.
+ *
+ * Side effects:
+ *	Fills unichar with the index'th Unicode character.
+ *
+ *----------------------------------------------------------------------
+ */
+
+Tcl_UniChar
+TclGetUniCharFromObj(objPtr, index)
+    Tcl_Obj *objPtr;		/* The Unicode object. */
+    int index;			/* Get the index'th character. */
+{
+    Tcl_UniChar *unicharPtr, unichar;
+    Unicode *unicodePtr;
+    int length;
+    
+    SetUnicodeFromAny(NULL, objPtr);
+    unicodePtr = GET_UNICODE(objPtr);
+    length = objPtr->length;
+    
+    if (AllSingleByteChars(objPtr)) {
+	int length;
+	char *str;
+
+	/*
+	 * All of the characters in the Utf string are 1 byte chars,
+	 * so we don't store the unicode char.  We get the Utf string
+	 * and convert the index'th byte to a Unicode character.
+	 */
+	
+	str = Tcl_GetStringFromObj(objPtr, &length);
+	Tcl_UtfToUniChar(&str[index], &unichar);	
+    } else {
+	unicharPtr = (Tcl_UniChar *)unicodePtr->chars;
+	unichar = unicharPtr[index];
+    }
+    return unichar;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclGetRangeFromObj --
+ *
+ *	Create a Tcl Object that contains the chars between first and
+ *	last of the object indicated by "objPtr".  If the object is not
+ *	already a Unicode object, an attempt will be made to convert it
+ *	to one.  The first and last indices are assumed to be in the
+ *	appropriate range.
+ *
+ * Results:
+ *	Returns a new Tcl Object of either "string" or "unicode" type,
+ *	containing the range of chars.
+ *
+ * Side effects:
+ *	Changes the internal rep of "objPtr" to unicode.
+ *
+ *----------------------------------------------------------------------
+ */
+
+Tcl_Obj*
+TclGetRangeFromObj(objPtr, first, last)
+   
+ Tcl_Obj *objPtr;		/* The Tcl object to find the range of. */
+    int first;			/* First index of the range. */
+    int last;			/* Last index of the range. */
+{
+    Tcl_Obj *newObjPtr;		/* The Tcl object to find the range of. */
+    Tcl_UniChar *unicharPtr;
+    Unicode *unicodePtr;
+    int length;
+    
+    SetUnicodeFromAny(NULL, objPtr);
+    unicodePtr = GET_UNICODE(objPtr);
+    length = objPtr->length;
+    
+    if (unicodePtr->numChars != length) {
+	unicharPtr = (Tcl_UniChar *)unicodePtr->chars;
+	newObjPtr = TclNewUnicodeObj(&unicharPtr[first], last-first+1);
+    } else {
+	int length;
+	char *str;
+
+	/*
+	 * All of the characters in the Utf string are 1 byte chars,
+	 * so we don't store the unicode char.  Create a new string
+	 * object containing the specified range of chars.
+	 */
+	
+	str = Tcl_GetStringFromObj(objPtr, &length);
+	newObjPtr = Tcl_NewStringObj(&str[first], last-first+1);	
+    }
+    return newObjPtr;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclAppendObjToUnicodeObj --
+ *
+ *	This procedure appends the contest of "srcObjPtr" to the Unicode
+ *	object "destPtr".
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	If srcObjPtr doesn't have an internal rep, then it is given a
+ *	Unicode internal rep.
+ *
+ *----------------------------------------------------------------------
+ */
+
+Tcl_Obj *
+TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr)
+    register Tcl_Obj *targetObjPtr;	/* Points to the object to
+					 * append to. */
+    register Tcl_Obj *srcObjPtr;	/* Points to the object to
+					 * append from. */
+{
+    int numBytes, numChars;
+    Tcl_Obj *resultObjPtr;
+    char *utfSrcStr;
+    Tcl_UniChar *unicharSrcStr;
+    Unicode *unicodePtr;
+    Tcl_DString dsPtr;
+    
+    /*
+     * Duplicate the target if it is shared.
+     * Change the result's internal rep to Unicode object.
+     */
+    
+    if (Tcl_IsShared(targetObjPtr)) {
+	resultObjPtr = Tcl_DuplicateObj(targetObjPtr);
+    } else {
+	resultObjPtr = targetObjPtr;
+    }
+    SetUnicodeFromAny(NULL, resultObjPtr);
+
+    /*
+     * Case where target chars are 1 byte long:
+     * If src obj is of "string" or null type, then convert it to "unicode"
+     * type.  Src objs of other types (such as int) are left in tact to keep
+     * them from shimmering between types.  If the src obj is a unichar obj,
+     * and all src chars are also 1 byte long, the src string is appended to
+     * the target "unicode" obj, and the target obj maintains its "optimized"
+     * status.
+     */
+
+    if (AllSingleByteChars(resultObjPtr)) {
+
+	int length;
+	char *stringRep;
+
+	if (srcObjPtr->typePtr == &tclStringType
+		|| srcObjPtr->typePtr == NULL) {
+	    SetUnicodeFromAny(NULL, srcObjPtr);
+	}
+
+	stringRep = Tcl_GetStringFromObj(srcObjPtr, &length);
+	Tcl_AppendToObj(resultObjPtr, stringRep, length);
+
+	if ((srcObjPtr->typePtr == &tclUnicodeType)
+		&& (AllSingleByteChars(srcObjPtr))) {
+	    SetOptUnicodeFromAny(resultObjPtr, resultObjPtr->length);
+	}
+	return resultObjPtr;
+    }
+
+    /*
+     * Extract a unicode string from "unicode" or "string" type objects.
+     * Extract the utf string from non-unicode objects, and convert the
+     * utf string to unichar string locally.
+     * If the src obj is a "string" obj, convert it to "unicode" type.
+     * Src objs of other types (such as int) are left in tact to keep
+     * them from shimmering between types.
+     */
+
+    Tcl_DStringInit(&dsPtr);
+    if (srcObjPtr->typePtr == &tclStringType || srcObjPtr->typePtr == NULL) {
+	SetUnicodeFromAny(NULL, srcObjPtr);
+    }
+    if (srcObjPtr->typePtr == &tclUnicodeType) {
+	if (AllSingleByteChars(srcObjPtr)) {
+
+	    unicodePtr = GET_UNICODE(srcObjPtr);
+	    numChars = unicodePtr->numChars;
+
+	    utfSrcStr = Tcl_GetStringFromObj(srcObjPtr, &numBytes);
+	    unicharSrcStr = (Tcl_UniChar *)Tcl_UtfToUniCharDString(utfSrcStr,
+		    numBytes, &dsPtr);
+	} else {
+	    unicodePtr = GET_UNICODE(srcObjPtr);
+	    numChars = unicodePtr->numChars;
+	    unicharSrcStr = (Tcl_UniChar *)unicodePtr->chars;
+	}
+    } else {
+	utfSrcStr = Tcl_GetStringFromObj(srcObjPtr, &numBytes);
+	numChars = Tcl_NumUtfChars(utfSrcStr, numBytes);
+	unicharSrcStr = (Tcl_UniChar *)Tcl_UtfToUniCharDString(utfSrcStr,
+		numBytes, &dsPtr);
+    }
+    if (numChars == 0) {
+	return resultObjPtr;
+    }
+
+    /*
+     * Append the unichar src string to the result object.
+     */
+
+    TclAppendUniCharStrToObj(resultObjPtr, unicharSrcStr, numChars);
+    Tcl_DStringFree(&dsPtr);
+    return resultObjPtr;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclAppendUniCharStrToObj --
+ *
+ *	This procedure appends the contents of "srcObjPtr" to the
+ *	Unicode object "objPtr".
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	If srcObjPtr doesn't have an internal rep, then it is given a
+ *	Unicode internal rep.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+TclAppendUniCharStrToObj(objPtr, unichars, numNewChars)
+    register Tcl_Obj *objPtr;	/* Points to the object to append to. */
+    Tcl_UniChar *unichars;	/* The unicode string to append to the
+			         * object. */
+    int numNewChars;		/* Number of chars in "unichars". */
+{
+    Unicode *unicodePtr;
+    int usedBytes, numNewBytes, totalNumBytes, totalNumChars;
+
+    /*
+     * Invalidate the StringRep.
+     */
+
+    Tcl_InvalidateStringRep(objPtr);
+
+    unicodePtr = GET_UNICODE(objPtr);
+    
+    usedBytes = unicodePtr->used;
+    totalNumChars = numNewChars + unicodePtr->numChars;
+    totalNumBytes = totalNumChars * sizeof(Tcl_UniChar);
+    numNewBytes = numNewChars * sizeof(Tcl_UniChar);
+    
+    if (unicodePtr->allocated < totalNumBytes) {
+	int allocatedBytes = totalNumBytes * 2;
+    
+	/*
+	 * There isn't currently enough space in the Unicode
+	 * representation so allocate additional space.  In fact,
+	 * overallocate so that there is room for future growth without
+	 * having to reallocate again.
+	 */
+
+	unicodePtr = (Unicode *) ckrealloc(unicodePtr,
+		UNICODE_SIZE(allocatedBytes));
+	memcpy((VOID *) (unicodePtr->chars + usedBytes),
+		(VOID *) unichars, (size_t) numNewBytes);
+
+	unicodePtr->allocated = allocatedBytes;	
+	unicodePtr = SET_UNICODE(objPtr, unicodePtr);
+    }
+    
+    memcpy((VOID *) (unicodePtr->chars + usedBytes),
+	    (VOID *) unichars, (size_t) numNewBytes);
+    unicodePtr->used = totalNumBytes;
+    unicodePtr->numChars = totalNumChars;
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TclNewUnicodeObj --
+ *
+ *	This procedure is creates a new Unicode object and initializes
+ *	it from the given Utf String.  If the Utf String is the same size
+ *	as the Unicode string, don't duplicate the data.
+ *
+ * Results:
+ *	The newly created object is returned.  This object will have no
+ *	initial string representation.  The returned object has a ref count
+ *	of 0.
+ *
+ * Side effects:
+ *	Memory allocated for new object and copy of Unicode argument.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+Tcl_Obj *
+TclNewUnicodeObj(unichars, numChars)
+    Tcl_UniChar *unichars;	/* The unicode string used to initialize
+				 * the new object. */
+    int numChars;		/* Number of characters in the unicode
+				 * string. */
+{
+    Tcl_Obj *objPtr;
+    Unicode *unicodePtr;
+    int numBytes;
+
+    numBytes = numChars * sizeof(Tcl_UniChar);
+    
+    TclNewObj(objPtr);
+    objPtr->bytes = NULL;
+    objPtr->typePtr = &tclUnicodeType;
+
+    unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(numBytes));
+    unicodePtr->used = numBytes;
+    unicodePtr->numChars = numChars;
+    unicodePtr->allocated = numBytes;
+    memcpy((VOID *) unicodePtr->chars, (VOID *) unichars, (size_t) numBytes);
+    SET_UNICODE(objPtr, unicodePtr);
+    return objPtr;
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TclAllSingleByteChars --
+ *
+ *	Initialize the internal representation of a Unicode Tcl_Obj
+ *	to a copy of the internal representation of an existing Unicode
+ *	object. 
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	Allocates memory.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+static int
+AllSingleByteChars(objPtr)
+    Tcl_Obj *objPtr;		/* Object whose char lengths to check. */
+{
+    Unicode *unicodePtr;
+    int numBytes, numChars;
+
+    unicodePtr = GET_UNICODE(objPtr);
+    numChars = unicodePtr->numChars;
+    numBytes = objPtr->length;
+
+    if (numChars == numBytes) {
+	return 1;
+    } else {
+	return 0;
+    }
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * DupUnicodeInternalRep --
+ *
+ *	Initialize the internal representation of a Unicode Tcl_Obj
+ *	to a copy of the internal representation of an existing Unicode
+ *	object. 
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	Allocates memory.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+static void
+DupUnicodeInternalRep(srcPtr, copyPtr)
+    Tcl_Obj *srcPtr;		/* Object with internal rep to copy. */
+    Tcl_Obj *copyPtr;		/* Object with internal rep to set. */
+{
+    Unicode *srcUnicodePtr = GET_UNICODE(srcPtr);
+    Unicode *copyUnicodePtr; /*GET_UNICODE(copyPtr);*/
+    
+    /*
+     * If the src obj is a string of 1-byte Utf chars, then copy the
+     * string rep of the source object and create an "empty" Unicode
+     * internal rep for the new object.  Otherwise, copy Unicode
+     * internal rep, and invalidate the string rep of the new object.
+     */
+    
+    if (AllSingleByteChars(srcPtr)) {
+	copyUnicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(4));
+    } else {
+	int used = srcUnicodePtr->used;
+	int allocated = srcUnicodePtr->allocated;
+	Tcl_UniChar *unichars;
+
+	unichars = (Tcl_UniChar *)srcUnicodePtr->chars;
+
+	copyUnicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(allocated));
+
+	copyUnicodePtr->used = used;	
+	copyUnicodePtr->allocated = allocated;
+	memcpy((VOID *) copyUnicodePtr->chars,
+		(VOID *) srcUnicodePtr->chars, (size_t) used);
+    }
+    copyUnicodePtr->numChars = srcUnicodePtr->numChars;
+    SET_UNICODE(copyPtr, copyUnicodePtr);
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TclSetUnicodeObj --
+ *
+ *	Modify an object to be a Unicode object and to have the specified
+ *	unicode string as its value.
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	The object's old string rep and internal rep is freed.
+ *	Memory allocated for copy of unicode argument.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+TclSetUnicodeObj(objPtr, chars, length)
+    Tcl_Obj *objPtr;		/* Object to initialize as a Unicode obj. */
+    unsigned char *chars;	/* The unicode string to use as the new
+				 * value. */
+    int length;			/* Length of the unicode string, which must
+				 * be >= 0. */
+{
+    Tcl_ObjType *typePtr;
+    Unicode *unicodePtr;
+
+    if (Tcl_IsShared(objPtr)) {
+	panic("TclSetUnicodeObj called with shared object");
+    }
+    typePtr = objPtr->typePtr;
+    if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) {
+	(*typePtr->freeIntRepProc)(objPtr);
+    }
+    Tcl_InvalidateStringRep(objPtr);
+
+    unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(length));
+    unicodePtr->used = length;
+    unicodePtr->allocated = length;
+    memcpy((VOID *) unicodePtr->chars, (VOID *) chars, (size_t) length);
+
+    objPtr->typePtr = &tclUnicodeType;
+    SET_UNICODE(objPtr, unicodePtr);
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * UpdateStringOfUnicode --
+ *
+ *	Update the string representation for a Unicode data object.
+ *	Note: This procedure does not invalidate an existing old string rep
+ *	so storage will be lost if this has not already been done. 
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	The object's string is set to a valid string that results from
+ *	the Unicode-to-string conversion.
+ *
+ *	The object becomes a string object -- the internal rep is
+ *	discarded and the typePtr becomes NULL.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+static void
+UpdateStringOfUnicode(objPtr)
+    Tcl_Obj *objPtr;		/* Unicode object whose string rep to
+				 * update. */
+{
+    int i, length, size;
+    Tcl_UniChar *src;
+    char dummy[TCL_UTF_MAX];
+    char *dst;
+    Unicode *unicodePtr;
+
+    unicodePtr = GET_UNICODE(objPtr);
+    src = (Tcl_UniChar *) unicodePtr->chars;
+    length = unicodePtr->used;
+
+    /*
+     * How much space will string rep need?
+     */
+     
+    size = 0;
+    for (i = 0; i < unicodePtr->numChars; i++) {
+	size += Tcl_UniCharToUtf((int) src[i], dummy);
+    }
+
+    dst = (char *) ckalloc((unsigned) (size + 1));
+    objPtr->bytes = dst;
+    objPtr->length = size;
+
+    for (i = 0; i < unicodePtr->numChars; i++) {
+	dst += Tcl_UniCharToUtf(src[i], dst);
+    }
+    *dst = '\0';
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * SetOptUnicodeFromAny --
+ *
+ *	Generate the Unicode internal rep from the string rep.
+ *
+ * Results:
+ *	The return value is always TCL_OK.
+ *
+ * Side effects:
+ *	A Unicode object is stored as the internal rep of objPtr.  The Unicode
+ * ojbect is opitmized for the case where each UTF char in a string is only
+ * one byte.  In this case, we store the value of numChars, but we don't copy
+ * the bytes to the unicodeObj->chars.  Before accessing obj->chars, check if
+ * all chars are 1 byte long.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+static void
+SetOptUnicodeFromAny(objPtr, numChars)
+    Tcl_Obj *objPtr;		/* The object to convert to type Unicode. */
+    int numChars;
+{
+    Tcl_ObjType *typePtr;
+    Unicode *unicodePtr;
+    
+    unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(4));
+    unicodePtr->numChars = numChars;
+
+    typePtr = objPtr->typePtr;
+    if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) {
+	(*typePtr->freeIntRepProc)(objPtr);
+    }
+    objPtr->typePtr = &tclUnicodeType;
+    SET_UNICODE(objPtr, unicodePtr);
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * SetUnicodeFromAny --
+ *
+ *	Generate the Unicode internal rep from the string rep.
+ *
+ * Results:
+ *	The return value is always TCL_OK.
+ *
+ * Side effects:
+ *	A Unicode object is stored as the internal rep of objPtr.  The Unicode
+ * ojbect is opitmized for the case where each UTF char in a string is only
+ * one byte.  In this case, we store the value of numChars, but we don't copy
+ * the bytes to the unicodeObj->chars.  Before accessing obj->chars, check if
+ * all chars are 1 byte long.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+static int
+SetUnicodeFromAny(interp, objPtr)
+    Tcl_Interp *interp;		/* Not used. */
+    Tcl_Obj *objPtr;		/* The object to convert to type Unicode. */
+{
+    Tcl_ObjType *typePtr;
+    int numBytes, numChars;
+    char *src, *srcEnd;
+    Unicode *unicodePtr;
+    unsigned char *dst;
+    
+    typePtr = objPtr->typePtr;
+    if (typePtr != &tclUnicodeType) {
+	src = Tcl_GetStringFromObj(objPtr, &numBytes);
+
+	numChars = Tcl_NumUtfChars(src, numBytes);
+	if (numChars == numBytes) {
+	    SetOptUnicodeFromAny(objPtr, numChars);
+	} else {
+	    unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(numChars
+		    * sizeof(Tcl_UniChar)));
+	    srcEnd = src + numBytes;
+	
+	    for (dst = unicodePtr->chars; src < srcEnd;
+		 dst += sizeof(Tcl_UniChar)) {
+		src += Tcl_UtfToUniChar(src, (Tcl_UniChar *) dst);
+	    }
+
+	    unicodePtr->used = numChars * sizeof(Tcl_UniChar);
+	    unicodePtr->numChars = numChars;
+	    unicodePtr->allocated = numChars * sizeof(Tcl_UniChar);	
+
+	    if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) {
+		(*typePtr->freeIntRepProc)(objPtr);
+	    }
+	    objPtr->typePtr = &tclUnicodeType;
+	    SET_UNICODE(objPtr, unicodePtr);
+	}
+    }
+    return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * FreeUnicodeInternalRep --
+ *
+ *	Deallocate the storage associated with a Unicode data object's
+ *	internal representation.
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	Frees memory. 
+ *
+ *----------------------------------------------------------------------
+ */
+
+static void
+FreeUnicodeInternalRep(objPtr)
+    Tcl_Obj *objPtr;		/* Object with internal rep to free. */
+{
+    ckfree((char *) GET_UNICODE(objPtr));
+}
diff --git a/generic/tclVar.c b/generic/tclVar.c
index 03b7757..f2df52e 100644
--- a/generic/tclVar.c
+++ b/generic/tclVar.c
@@ -14,7 +14,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclVar.c,v 1.8 1999/04/16 00:46:55 stanton Exp $
+ * RCS: @(#) $Id: tclVar.c,v 1.9 1999/06/08 02:59:27 hershey Exp $
  */
 
 #include "tclInt.h"
@@ -1291,7 +1291,7 @@ Tcl_SetVar2Ex(interp, part1, part2, newValuePtr, flags)
 		    oldValuePtr = varPtr->value.objPtr;
 		    Tcl_IncrRefCount(oldValuePtr); /* since var is ref */
 		}
-		Tcl_AppendToObj(oldValuePtr, bytes, length);
+		Tcl_AppendObjToObj(oldValuePtr, newValuePtr);
 	    }
 	}
     } else {
author	hershey <hershey>	1999-06-08 02:59:23 (GMT)
committer	hershey <hershey>	1999-06-08 02:59:23 (GMT)
commit	0e53e351cd3c0bdf51b84e459262c47f913c9a97 (patch)
tree	87cee8e23f1c9f621f583c5d97e3e62979935fa9 /generic
parent	b2759d9c544b22071eca46475d110812304e8faa (diff)
download	tcl-0e53e351cd3c0bdf51b84e459262c47f913c9a97.zip tcl-0e53e351cd3c0bdf51b84e459262c47f913c9a97.tar.gz tcl-0e53e351cd3c0bdf51b84e459262c47f913c9a97.tar.bz2