From 0e53e351cd3c0bdf51b84e459262c47f913c9a97 Mon Sep 17 00:00:00 2001 From: hershey Date: Tue, 8 Jun 1999 02:59:23 +0000 Subject: * tests/string.test: * generic/tclVar.c (Tcl_SetVar2Ex): * generic/tclStringObj.c (Tcl_AppendObjToObj): * generic/tclCmdMZ.c (Tcl_StringObjCmd): optimized the string index, string length, string range, and append command in cases where the object's internal rep is a bytearray. Objects with other internal reps are converted to have the new unicode internal rep. * unix/Makefile.in: * win/Makefile.in: * win/Makefile.vc: * tests/unicode.test: * generic/tclInt.h: * generic/tclObj.c: * generic/tclUnicodeObj.c: added a new object type to store the unicode representation of a string. * generic/tclTestObj.c: added the objtype option to the testobj command. This option returns the name of the type of internal rep an object has. --- ChangeLog | 24 ++ generic/tclCmdMZ.c | 139 ++++++--- generic/tclInt.h | 12 +- generic/tclObj.c | 4 +- generic/tclStringObj.c | 9 +- generic/tclTestObj.c | 25 +- generic/tclUnicodeObj.c | 771 ++++++++++++++++++++++++++++++++++++++++++++++++ generic/tclVar.c | 4 +- tests/string.test | 31 +- tests/unicode.test | 204 +++++++++++++ unix/Makefile.in | 9 +- win/Makefile.in | 3 +- win/makefile.vc | 3 +- 13 files changed, 1176 insertions(+), 62 deletions(-) create mode 100644 generic/tclUnicodeObj.c create mode 100644 tests/unicode.test diff --git a/ChangeLog b/ChangeLog index 9ae342f..4ec2485 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,27 @@ +1999-06-07 Melissa Hirschl + + * tests/string.test: + * generic/tclVar.c (Tcl_SetVar2Ex): + * generic/tclStringObj.c (Tcl_AppendObjToObj): + * generic/tclCmdMZ.c (Tcl_StringObjCmd): optimized the string + index, string length, string range, and append command in cases + where the object's internal rep is a bytearray. Objects with + other internal reps are converted to have the new unicode internal + rep. + + * unix/Makefile.in: + * win/Makefile.in: + * win/Makefile.vc: + * tests/unicode.test: + * generic/tclInt.h: + * generic/tclObj.c: + * generic/tclUnicodeObj.c: added a new object type to store the + unicode representation of a string. + + * generic/tclTestObj.c: added the objtype option to the testobj + command. This option returns the name of the type of internal rep + an object has. + 1999-06-04 * win/configure.in: diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 19b9ece..ebea22b 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -13,7 +13,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.12 1999/06/03 18:43:30 stanton Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.13 1999/06/08 02:59:23 hershey Exp $ */ #include "tclInt.h" @@ -1009,32 +1009,47 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) } case STR_INDEX: { int index; + char buf[TCL_UTF_MAX]; + Tcl_UniChar unichar; if (objc != 4) { Tcl_WrongNumArgs(interp, 2, objv, "string charIndex"); return TCL_ERROR; } - string1 = Tcl_GetStringFromObj(objv[2], &length1); - /* - * establish what 'end' really means - */ - length2 = Tcl_NumUtfChars(string1, length1); - if (TclGetIntForIndex(interp, objv[3], length2 - 1, - &index) != TCL_OK) { - return TCL_ERROR; - } + /* - * index must be between 0 and the UTF length to be valid + * If we have a ByteArray object, avoid indexing in the + * Utf string since the byte array contains one byte per + * character. Otherwise, use the Unicode string rep to + * get the index'th char. */ - if ((index >= 0) && (index < length2)) { - if (length1 == length2) { - /* no unicode chars */ - Tcl_SetStringObj(resultPtr, string1+index, 1); - } else { - char buf[TCL_UTF_MAX]; - length2 = Tcl_UniCharToUtf(Tcl_UniCharAtIndex(string1, - index), buf); + if (objv[2]->typePtr == &tclByteArrayType) { + + string1 = Tcl_GetByteArrayFromObj(objv[2], &length1); + + if (TclGetIntForIndex(interp, objv[3], length1 - 1, + &index) != TCL_OK) { + return TCL_ERROR; + } + Tcl_SetStringObj(resultPtr, &string1[index], 1); + } else { + string1 = Tcl_GetStringFromObj(objv[2], &length1); + + /* + * convert to Unicode internal rep to calulate what + * 'end' really means. + */ + + length2 = TclGetUnicodeLengthFromObj(objv[2]); + + if (TclGetIntForIndex(interp, objv[3], length2 - 1, + &index) != TCL_OK) { + return TCL_ERROR; + } + if ((index >= 0) && (index < length2)) { + unichar = TclGetUniCharFromObj(objv[2], index); + length2 = Tcl_UniCharToUtf((int)unichar, buf); Tcl_SetStringObj(resultPtr, buf, length2); } } @@ -1400,16 +1415,16 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) /* * If we have a ByteArray object, avoid recomputing the * string since the byte array contains one byte per - * character. + * character. Otherwise, use the Unicode string rep to + * calculate the length. */ if (objv[2]->typePtr == &tclByteArrayType) { (void) Tcl_GetByteArrayFromObj(objv[2], &length1); Tcl_SetIntObj(resultPtr, length1); } else { - string1 = Tcl_GetStringFromObj(objv[2], &length1); - Tcl_SetIntObj(resultPtr, Tcl_NumUtfChars(string1, - length1)); + Tcl_SetIntObj(resultPtr, + TclGetUnicodeLengthFromObj(objv[2])); } } break; @@ -1550,28 +1565,64 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) return TCL_ERROR; } - string1 = Tcl_GetStringFromObj(objv[2], &length1); - length1 = Tcl_NumUtfChars(string1, length1) - 1; - if (TclGetIntForIndex(interp, objv[3], length1, - &first) != TCL_OK) { - return TCL_ERROR; - } - if (TclGetIntForIndex(interp, objv[4], length1, - &last) != TCL_OK) { - return TCL_ERROR; - } - if (first < 0) { - first = 0; - } - if (last >= length1) { - last = length1; - } - if (last >= first) { - char *start, *end; + /* + * If we have a ByteArray object, avoid indexing in the + * Utf string since the byte array contains one byte per + * character. Otherwise, use the Unicode string rep to + * get the range. + */ - start = Tcl_UtfAtIndex(string1, first); - end = Tcl_UtfAtIndex(start, last - first + 1); - Tcl_SetStringObj(resultPtr, start, end - start); + if (objv[2]->typePtr == &tclByteArrayType) { + + string1 = Tcl_GetByteArrayFromObj(objv[2], &length1); + + if (TclGetIntForIndex(interp, objv[3], length1 - 1, + &first) != TCL_OK) { + return TCL_ERROR; + } + if (TclGetIntForIndex(interp, objv[4], length1 - 1, + &last) != TCL_OK) { + return TCL_ERROR; + } + if (first < 0) { + first = 0; + } + if (last >= length1 - 1) { + last = length1 - 1; + } + if (last >= first) { + int numBytes = last - first + 1; + resultPtr = Tcl_NewByteArrayObj(&string1[first], numBytes); + Tcl_SetObjResult(interp, resultPtr); + } + } else { + string1 = Tcl_GetStringFromObj(objv[2], &length1); + + /* + * Convert to Unicode internal rep to calulate length and + * create a result object. + */ + + length2 = TclGetUnicodeLengthFromObj(objv[2]) - 1; + + if (TclGetIntForIndex(interp, objv[3], length2, + &first) != TCL_OK) { + return TCL_ERROR; + } + if (TclGetIntForIndex(interp, objv[4], length2, + &last) != TCL_OK) { + return TCL_ERROR; + } + if (first < 0) { + first = 0; + } + if (last >= length1 - 1) { + last = length1 - 1; + } + if (last >= first) { + resultPtr = TclGetRangeFromObj(objv[2], first, last); + Tcl_SetObjResult(interp, resultPtr); + } } break; } diff --git a/generic/tclInt.h b/generic/tclInt.h index 68614bc..ed9002d 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -11,7 +11,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclInt.h,v 1.29 1999/05/13 01:50:32 stanton Exp $ + * RCS: @(#) $Id: tclInt.h,v 1.30 1999/06/08 02:59:24 hershey Exp $ */ #ifndef _TCLINT @@ -1509,6 +1509,7 @@ extern Tcl_ObjType tclIntType; extern Tcl_ObjType tclListType; extern Tcl_ObjType tclProcBodyType; extern Tcl_ObjType tclStringType; +extern Tcl_ObjType tclUnicodeType; /* * The head of the list of free Tcl objects, and the total number of Tcl @@ -1542,6 +1543,9 @@ EXTERN int TclAccess _ANSI_ARGS_((CONST char *path, EXTERN int TclAccessDeleteProc _ANSI_ARGS_((TclAccessProc_ *proc)); EXTERN int TclAccessInsertProc _ANSI_ARGS_((TclAccessProc_ *proc)); EXTERN void TclAllocateFreeObjects _ANSI_ARGS_((void)); +EXTERN Tcl_Obj * TclAppendObjToUnicodeObj _ANSI_ARGS_(( + register Tcl_Obj *targetObjPtr, + register Tcl_Obj *srcObjPtr)); EXTERN int TclArraySet _ANSI_ARGS_((Tcl_Interp *interp, Tcl_Obj *arrayNameObj, Tcl_Obj *arrayElemObj)); EXTERN int TclCleanupChildren _ANSI_ARGS_((Tcl_Interp *interp, @@ -1634,6 +1638,12 @@ EXTERN int TclGetOpenMode _ANSI_ARGS_((Tcl_Interp *interp, char *string, int *seekFlagPtr)); EXTERN Tcl_Command TclGetOriginalCommand _ANSI_ARGS_(( Tcl_Command command)); +EXTERN Tcl_Obj* TclGetRangeFromObj _ANSI_ARGS_((Tcl_Obj *objPtr, + int first, int last)); +EXTERN Tcl_UniChar TclGetUniCharFromObj _ANSI_ARGS_((Tcl_Obj *objPtr, + int index)); +EXTERN int TclGetUnicodeLengthFromObj _ANSI_ARGS_(( + Tcl_Obj *objPtr)); EXTERN int TclGlob _ANSI_ARGS_((Tcl_Interp *interp, char *pattern, int noComplain)); EXTERN int TclGlobalInvoke _ANSI_ARGS_((Tcl_Interp *interp, diff --git a/generic/tclObj.c b/generic/tclObj.c index f1858f8..423df28 100644 --- a/generic/tclObj.c +++ b/generic/tclObj.c @@ -5,11 +5,12 @@ * many Tcl commands. * * Copyright (c) 1995-1997 Sun Microsystems, Inc. + * Copyright (c) 1999 by Scriptics Corporation. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclObj.c,v 1.7 1999/05/28 23:02:33 stanton Exp $ + * RCS: @(#) $Id: tclObj.c,v 1.8 1999/06/08 02:59:25 hershey Exp $ */ #include "tclInt.h" @@ -137,6 +138,7 @@ TclInitObjSubsystem() Tcl_RegisterObjType(&tclListType); Tcl_RegisterObjType(&tclByteCodeType); Tcl_RegisterObjType(&tclProcBodyType); + Tcl_RegisterObjType(&tclUnicodeType); #ifdef TCL_COMPILE_STATS Tcl_MutexLock(&tclObjMutex); diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index ea0cbd7..c70bcb9 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -10,11 +10,12 @@ * representation are called "expandable string objects". * * Copyright (c) 1995-1997 Sun Microsystems, Inc. + * Copyright (c) 1999 by Scriptics Corporation. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclStringObj.c,v 1.6 1999/05/07 20:07:35 stanton Exp $ + * RCS: @(#) $Id: tclStringObj.c,v 1.7 1999/06/08 02:59:25 hershey Exp $ */ #include "tclInt.h" @@ -382,11 +383,7 @@ Tcl_AppendObjToObj(objPtr, appendObjPtr) Tcl_Obj *objPtr; /* Points to the object to append to. */ Tcl_Obj *appendObjPtr; /* Object to append. */ { - int length; - char *stringRep; - - stringRep = Tcl_GetStringFromObj(appendObjPtr, &length); - Tcl_AppendToObj(objPtr, stringRep, length); + TclAppendObjToUnicodeObj(objPtr, appendObjPtr); } /* diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c index d604c5b..533b967 100644 --- a/generic/tclTestObj.c +++ b/generic/tclTestObj.c @@ -7,11 +7,12 @@ * applications; they're only used for testing. * * Copyright (c) 1995-1998 Sun Microsystems, Inc. + * Copyright (c) 1999 by Scriptics Corporation. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclTestObj.c,v 1.3 1999/04/16 00:46:54 stanton Exp $ + * RCS: @(#) $Id: tclTestObj.c,v 1.4 1999/06/08 02:59:26 hershey Exp $ */ #include "tclInt.h" @@ -774,6 +775,23 @@ TestobjCmd(clientData, interp, objc, objv) } SetVarToObj(varIndex, Tcl_NewObj()); Tcl_SetObjResult(interp, varPtr[varIndex]); + } else if (strcmp(subCmd, "objtype") == 0) { + char *typeName; + + /* + * return an object containing the name of the argument's type + * of internal rep. If none exists, return "none". + */ + + if (objc != 3) { + goto wrongNumArgs; + } + if (objv[2]->typePtr == NULL) { + Tcl_SetObjResult(interp, Tcl_NewStringObj("none", -1)); + } else { + typeName = objv[2]->typePtr->name; + Tcl_SetObjResult(interp, Tcl_NewStringObj(typeName, -1)); + } } else if (strcmp(subCmd, "refcount") == 0) { char buf[TCL_INTEGER_SPACE]; @@ -810,7 +828,8 @@ TestobjCmd(clientData, interp, objc, objv) if (objc != 2) { goto wrongNumArgs; } - if (Tcl_AppendAllObjTypes(interp, Tcl_GetObjResult(interp)) != TCL_OK) { + if (Tcl_AppendAllObjTypes(interp, + Tcl_GetObjResult(interp)) != TCL_OK) { return TCL_ERROR; } } else { @@ -818,7 +837,7 @@ TestobjCmd(clientData, interp, objc, objv) "bad option \"", Tcl_GetString(objv[1]), "\": must be assign, convert, duplicate, freeallvars, ", - "newobj, objcount, refcount, type, or types", + "newobj, objcount, objtype, refcount, type, or types", (char *) NULL); return TCL_ERROR; } diff --git a/generic/tclUnicodeObj.c b/generic/tclUnicodeObj.c new file mode 100644 index 0000000..869b8c7 --- /dev/null +++ b/generic/tclUnicodeObj.c @@ -0,0 +1,771 @@ +/* + * tclUnicodeObj.c -- + * + * This file contains the implementation of the Unicode internal + * representation of Tcl objects. + * + * Copyright (c) 1999 by Scriptics Corporation. + * + * See the file "license.terms" for information on usage and redistribution + * of this file, and for a DISCLAIMER OF ALL WARRANTIES. + * + * RCS: @(#) $Id: tclUnicodeObj.c,v 1.2 1999/06/08 02:59:27 hershey Exp $ + */ + +#include +#include "tclInt.h" +#include "tclPort.h" + +/* + * Prototypes for local procedures defined in this file: + */ + +static void DupUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr, + Tcl_Obj *copyPtr)); +static void FreeUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr)); +static void UpdateStringOfUnicode _ANSI_ARGS_((Tcl_Obj *objPtr)); +static int SetUnicodeFromAny _ANSI_ARGS_((Tcl_Interp *interp, + Tcl_Obj *objPtr)); + +static int AllSingleByteChars _ANSI_ARGS_((Tcl_Obj *objPtr)); +static void TclAppendUniCharStrToObj _ANSI_ARGS_(( + register Tcl_Obj *objPtr, Tcl_UniChar *unichars, + int numChars)); +static Tcl_Obj * TclNewUnicodeObj _ANSI_ARGS_((Tcl_UniChar *unichars, + int numChars)); +static void SetOptUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr, + int numChars)); + +/* + * The following object type represents a Unicode string. A Unicode string + * is an internationalized string. Conceptually, a Unicode string is an + * array of 16-bit quantities organized as a sequence of properly formed + * UTF-8 characters. There is a one-to-one map between Unicode and UTF + * characters. The Unicode ojbect is opitmized for the case where each UTF + * char in a string is only one byte. In this case, we store the value of + * numChars, but we don't copy the bytes to the unicodeObj->chars. Before + * accessing obj->chars, check if unicodeObj->numChars == obj->length. + */ + +Tcl_ObjType tclUnicodeType = { + "unicode", + FreeUnicodeInternalRep, + DupUnicodeInternalRep, + UpdateStringOfUnicode, + SetUnicodeFromAny +}; + +/* + * The following structure is the internal rep for a Unicode object. + * Keeps track of how much memory has been used and how much has been + * allocated for the Unicode to enable growing and shrinking of the + * Unicode object with fewer mallocs. + */ + +typedef struct Unicode { + int numChars; /* The number of chars in the unicode + * string. */ + int used; /* The number of bytes used in the unicode + * string. */ + int allocated; /* The amount of space actually allocated + * minus 1 byte. */ + unsigned char chars[4]; /* The array of chars. The actual size of + * this field depends on the 'allocated' field + * above. */ +} Unicode; + +#define UNICODE_SIZE(len) \ + ((unsigned) (sizeof(Unicode) - 4 + (len))) +#define GET_UNICODE(objPtr) \ + ((Unicode *) (objPtr)->internalRep.otherValuePtr) +#define SET_UNICODE(objPtr, unicodePtr) \ + (objPtr)->internalRep.otherValuePtr = (VOID *) (unicodePtr) + + +/* + *---------------------------------------------------------------------- + * + * TclGetUnicodeLengthFromObj -- + * + * Get the length of the Unicode string from the Tcl object. If + * the object is not already a Unicode object, an attempt will be + * made to convert it to one. + * + * Results: + * Pointer to unicode string representing the unicode object. + * + * Side effects: + * Frees old internal rep. Allocates memory for new internal rep. + * + *---------------------------------------------------------------------- + */ + +int +TclGetUnicodeLengthFromObj(objPtr) + Tcl_Obj *objPtr; /* The Unicode object. */ +{ + int length; + Unicode *unicodePtr; + + SetUnicodeFromAny(NULL, objPtr); + unicodePtr = GET_UNICODE(objPtr); + + length = unicodePtr->numChars; + return length; +} + +/* + *---------------------------------------------------------------------- + * + * TclGetUniCharFromObj -- + * + * Get the index'th Unicode character from the Unicode object. If + * the object is not already a Unicode object, an attempt will be + * made to convert it to one. The index is assumed to be in the + * appropriate range. + * + * Results: + * Returns the index'th Unicode character in the Object. + * + * Side effects: + * Fills unichar with the index'th Unicode character. + * + *---------------------------------------------------------------------- + */ + +Tcl_UniChar +TclGetUniCharFromObj(objPtr, index) + Tcl_Obj *objPtr; /* The Unicode object. */ + int index; /* Get the index'th character. */ +{ + Tcl_UniChar *unicharPtr, unichar; + Unicode *unicodePtr; + int length; + + SetUnicodeFromAny(NULL, objPtr); + unicodePtr = GET_UNICODE(objPtr); + length = objPtr->length; + + if (AllSingleByteChars(objPtr)) { + int length; + char *str; + + /* + * All of the characters in the Utf string are 1 byte chars, + * so we don't store the unicode char. We get the Utf string + * and convert the index'th byte to a Unicode character. + */ + + str = Tcl_GetStringFromObj(objPtr, &length); + Tcl_UtfToUniChar(&str[index], &unichar); + } else { + unicharPtr = (Tcl_UniChar *)unicodePtr->chars; + unichar = unicharPtr[index]; + } + return unichar; +} + +/* + *---------------------------------------------------------------------- + * + * TclGetRangeFromObj -- + * + * Create a Tcl Object that contains the chars between first and + * last of the object indicated by "objPtr". If the object is not + * already a Unicode object, an attempt will be made to convert it + * to one. The first and last indices are assumed to be in the + * appropriate range. + * + * Results: + * Returns a new Tcl Object of either "string" or "unicode" type, + * containing the range of chars. + * + * Side effects: + * Changes the internal rep of "objPtr" to unicode. + * + *---------------------------------------------------------------------- + */ + +Tcl_Obj* +TclGetRangeFromObj(objPtr, first, last) + + Tcl_Obj *objPtr; /* The Tcl object to find the range of. */ + int first; /* First index of the range. */ + int last; /* Last index of the range. */ +{ + Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ + Tcl_UniChar *unicharPtr; + Unicode *unicodePtr; + int length; + + SetUnicodeFromAny(NULL, objPtr); + unicodePtr = GET_UNICODE(objPtr); + length = objPtr->length; + + if (unicodePtr->numChars != length) { + unicharPtr = (Tcl_UniChar *)unicodePtr->chars; + newObjPtr = TclNewUnicodeObj(&unicharPtr[first], last-first+1); + } else { + int length; + char *str; + + /* + * All of the characters in the Utf string are 1 byte chars, + * so we don't store the unicode char. Create a new string + * object containing the specified range of chars. + */ + + str = Tcl_GetStringFromObj(objPtr, &length); + newObjPtr = Tcl_NewStringObj(&str[first], last-first+1); + } + return newObjPtr; +} + +/* + *---------------------------------------------------------------------- + * + * TclAppendObjToUnicodeObj -- + * + * This procedure appends the contest of "srcObjPtr" to the Unicode + * object "destPtr". + * + * Results: + * None. + * + * Side effects: + * If srcObjPtr doesn't have an internal rep, then it is given a + * Unicode internal rep. + * + *---------------------------------------------------------------------- + */ + +Tcl_Obj * +TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr) + register Tcl_Obj *targetObjPtr; /* Points to the object to + * append to. */ + register Tcl_Obj *srcObjPtr; /* Points to the object to + * append from. */ +{ + int numBytes, numChars; + Tcl_Obj *resultObjPtr; + char *utfSrcStr; + Tcl_UniChar *unicharSrcStr; + Unicode *unicodePtr; + Tcl_DString dsPtr; + + /* + * Duplicate the target if it is shared. + * Change the result's internal rep to Unicode object. + */ + + if (Tcl_IsShared(targetObjPtr)) { + resultObjPtr = Tcl_DuplicateObj(targetObjPtr); + } else { + resultObjPtr = targetObjPtr; + } + SetUnicodeFromAny(NULL, resultObjPtr); + + /* + * Case where target chars are 1 byte long: + * If src obj is of "string" or null type, then convert it to "unicode" + * type. Src objs of other types (such as int) are left in tact to keep + * them from shimmering between types. If the src obj is a unichar obj, + * and all src chars are also 1 byte long, the src string is appended to + * the target "unicode" obj, and the target obj maintains its "optimized" + * status. + */ + + if (AllSingleByteChars(resultObjPtr)) { + + int length; + char *stringRep; + + if (srcObjPtr->typePtr == &tclStringType + || srcObjPtr->typePtr == NULL) { + SetUnicodeFromAny(NULL, srcObjPtr); + } + + stringRep = Tcl_GetStringFromObj(srcObjPtr, &length); + Tcl_AppendToObj(resultObjPtr, stringRep, length); + + if ((srcObjPtr->typePtr == &tclUnicodeType) + && (AllSingleByteChars(srcObjPtr))) { + SetOptUnicodeFromAny(resultObjPtr, resultObjPtr->length); + } + return resultObjPtr; + } + + /* + * Extract a unicode string from "unicode" or "string" type objects. + * Extract the utf string from non-unicode objects, and convert the + * utf string to unichar string locally. + * If the src obj is a "string" obj, convert it to "unicode" type. + * Src objs of other types (such as int) are left in tact to keep + * them from shimmering between types. + */ + + Tcl_DStringInit(&dsPtr); + if (srcObjPtr->typePtr == &tclStringType || srcObjPtr->typePtr == NULL) { + SetUnicodeFromAny(NULL, srcObjPtr); + } + if (srcObjPtr->typePtr == &tclUnicodeType) { + if (AllSingleByteChars(srcObjPtr)) { + + unicodePtr = GET_UNICODE(srcObjPtr); + numChars = unicodePtr->numChars; + + utfSrcStr = Tcl_GetStringFromObj(srcObjPtr, &numBytes); + unicharSrcStr = (Tcl_UniChar *)Tcl_UtfToUniCharDString(utfSrcStr, + numBytes, &dsPtr); + } else { + unicodePtr = GET_UNICODE(srcObjPtr); + numChars = unicodePtr->numChars; + unicharSrcStr = (Tcl_UniChar *)unicodePtr->chars; + } + } else { + utfSrcStr = Tcl_GetStringFromObj(srcObjPtr, &numBytes); + numChars = Tcl_NumUtfChars(utfSrcStr, numBytes); + unicharSrcStr = (Tcl_UniChar *)Tcl_UtfToUniCharDString(utfSrcStr, + numBytes, &dsPtr); + } + if (numChars == 0) { + return resultObjPtr; + } + + /* + * Append the unichar src string to the result object. + */ + + TclAppendUniCharStrToObj(resultObjPtr, unicharSrcStr, numChars); + Tcl_DStringFree(&dsPtr); + return resultObjPtr; +} + +/* + *---------------------------------------------------------------------- + * + * TclAppendUniCharStrToObj -- + * + * This procedure appends the contents of "srcObjPtr" to the + * Unicode object "objPtr". + * + * Results: + * None. + * + * Side effects: + * If srcObjPtr doesn't have an internal rep, then it is given a + * Unicode internal rep. + * + *---------------------------------------------------------------------- + */ + +void +TclAppendUniCharStrToObj(objPtr, unichars, numNewChars) + register Tcl_Obj *objPtr; /* Points to the object to append to. */ + Tcl_UniChar *unichars; /* The unicode string to append to the + * object. */ + int numNewChars; /* Number of chars in "unichars". */ +{ + Unicode *unicodePtr; + int usedBytes, numNewBytes, totalNumBytes, totalNumChars; + + /* + * Invalidate the StringRep. + */ + + Tcl_InvalidateStringRep(objPtr); + + unicodePtr = GET_UNICODE(objPtr); + + usedBytes = unicodePtr->used; + totalNumChars = numNewChars + unicodePtr->numChars; + totalNumBytes = totalNumChars * sizeof(Tcl_UniChar); + numNewBytes = numNewChars * sizeof(Tcl_UniChar); + + if (unicodePtr->allocated < totalNumBytes) { + int allocatedBytes = totalNumBytes * 2; + + /* + * There isn't currently enough space in the Unicode + * representation so allocate additional space. In fact, + * overallocate so that there is room for future growth without + * having to reallocate again. + */ + + unicodePtr = (Unicode *) ckrealloc(unicodePtr, + UNICODE_SIZE(allocatedBytes)); + memcpy((VOID *) (unicodePtr->chars + usedBytes), + (VOID *) unichars, (size_t) numNewBytes); + + unicodePtr->allocated = allocatedBytes; + unicodePtr = SET_UNICODE(objPtr, unicodePtr); + } + + memcpy((VOID *) (unicodePtr->chars + usedBytes), + (VOID *) unichars, (size_t) numNewBytes); + unicodePtr->used = totalNumBytes; + unicodePtr->numChars = totalNumChars; +} + +/* + *--------------------------------------------------------------------------- + * + * TclNewUnicodeObj -- + * + * This procedure is creates a new Unicode object and initializes + * it from the given Utf String. If the Utf String is the same size + * as the Unicode string, don't duplicate the data. + * + * Results: + * The newly created object is returned. This object will have no + * initial string representation. The returned object has a ref count + * of 0. + * + * Side effects: + * Memory allocated for new object and copy of Unicode argument. + * + *--------------------------------------------------------------------------- + */ + +Tcl_Obj * +TclNewUnicodeObj(unichars, numChars) + Tcl_UniChar *unichars; /* The unicode string used to initialize + * the new object. */ + int numChars; /* Number of characters in the unicode + * string. */ +{ + Tcl_Obj *objPtr; + Unicode *unicodePtr; + int numBytes; + + numBytes = numChars * sizeof(Tcl_UniChar); + + TclNewObj(objPtr); + objPtr->bytes = NULL; + objPtr->typePtr = &tclUnicodeType; + + unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(numBytes)); + unicodePtr->used = numBytes; + unicodePtr->numChars = numChars; + unicodePtr->allocated = numBytes; + memcpy((VOID *) unicodePtr->chars, (VOID *) unichars, (size_t) numBytes); + SET_UNICODE(objPtr, unicodePtr); + return objPtr; +} + +/* + *--------------------------------------------------------------------------- + * + * TclAllSingleByteChars -- + * + * Initialize the internal representation of a Unicode Tcl_Obj + * to a copy of the internal representation of an existing Unicode + * object. + * + * Results: + * None. + * + * Side effects: + * Allocates memory. + * + *--------------------------------------------------------------------------- + */ + +static int +AllSingleByteChars(objPtr) + Tcl_Obj *objPtr; /* Object whose char lengths to check. */ +{ + Unicode *unicodePtr; + int numBytes, numChars; + + unicodePtr = GET_UNICODE(objPtr); + numChars = unicodePtr->numChars; + numBytes = objPtr->length; + + if (numChars == numBytes) { + return 1; + } else { + return 0; + } +} + +/* + *--------------------------------------------------------------------------- + * + * DupUnicodeInternalRep -- + * + * Initialize the internal representation of a Unicode Tcl_Obj + * to a copy of the internal representation of an existing Unicode + * object. + * + * Results: + * None. + * + * Side effects: + * Allocates memory. + * + *--------------------------------------------------------------------------- + */ + +static void +DupUnicodeInternalRep(srcPtr, copyPtr) + Tcl_Obj *srcPtr; /* Object with internal rep to copy. */ + Tcl_Obj *copyPtr; /* Object with internal rep to set. */ +{ + Unicode *srcUnicodePtr = GET_UNICODE(srcPtr); + Unicode *copyUnicodePtr; /*GET_UNICODE(copyPtr);*/ + + /* + * If the src obj is a string of 1-byte Utf chars, then copy the + * string rep of the source object and create an "empty" Unicode + * internal rep for the new object. Otherwise, copy Unicode + * internal rep, and invalidate the string rep of the new object. + */ + + if (AllSingleByteChars(srcPtr)) { + copyUnicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(4)); + } else { + int used = srcUnicodePtr->used; + int allocated = srcUnicodePtr->allocated; + Tcl_UniChar *unichars; + + unichars = (Tcl_UniChar *)srcUnicodePtr->chars; + + copyUnicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(allocated)); + + copyUnicodePtr->used = used; + copyUnicodePtr->allocated = allocated; + memcpy((VOID *) copyUnicodePtr->chars, + (VOID *) srcUnicodePtr->chars, (size_t) used); + } + copyUnicodePtr->numChars = srcUnicodePtr->numChars; + SET_UNICODE(copyPtr, copyUnicodePtr); +} + +/* + *--------------------------------------------------------------------------- + * + * TclSetUnicodeObj -- + * + * Modify an object to be a Unicode object and to have the specified + * unicode string as its value. + * + * Results: + * None. + * + * Side effects: + * The object's old string rep and internal rep is freed. + * Memory allocated for copy of unicode argument. + * + *---------------------------------------------------------------------- + */ + +void +TclSetUnicodeObj(objPtr, chars, length) + Tcl_Obj *objPtr; /* Object to initialize as a Unicode obj. */ + unsigned char *chars; /* The unicode string to use as the new + * value. */ + int length; /* Length of the unicode string, which must + * be >= 0. */ +{ + Tcl_ObjType *typePtr; + Unicode *unicodePtr; + + if (Tcl_IsShared(objPtr)) { + panic("TclSetUnicodeObj called with shared object"); + } + typePtr = objPtr->typePtr; + if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) { + (*typePtr->freeIntRepProc)(objPtr); + } + Tcl_InvalidateStringRep(objPtr); + + unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(length)); + unicodePtr->used = length; + unicodePtr->allocated = length; + memcpy((VOID *) unicodePtr->chars, (VOID *) chars, (size_t) length); + + objPtr->typePtr = &tclUnicodeType; + SET_UNICODE(objPtr, unicodePtr); +} + +/* + *--------------------------------------------------------------------------- + * + * UpdateStringOfUnicode -- + * + * Update the string representation for a Unicode data object. + * Note: This procedure does not invalidate an existing old string rep + * so storage will be lost if this has not already been done. + * + * Results: + * None. + * + * Side effects: + * The object's string is set to a valid string that results from + * the Unicode-to-string conversion. + * + * The object becomes a string object -- the internal rep is + * discarded and the typePtr becomes NULL. + * + *--------------------------------------------------------------------------- + */ + +static void +UpdateStringOfUnicode(objPtr) + Tcl_Obj *objPtr; /* Unicode object whose string rep to + * update. */ +{ + int i, length, size; + Tcl_UniChar *src; + char dummy[TCL_UTF_MAX]; + char *dst; + Unicode *unicodePtr; + + unicodePtr = GET_UNICODE(objPtr); + src = (Tcl_UniChar *) unicodePtr->chars; + length = unicodePtr->used; + + /* + * How much space will string rep need? + */ + + size = 0; + for (i = 0; i < unicodePtr->numChars; i++) { + size += Tcl_UniCharToUtf((int) src[i], dummy); + } + + dst = (char *) ckalloc((unsigned) (size + 1)); + objPtr->bytes = dst; + objPtr->length = size; + + for (i = 0; i < unicodePtr->numChars; i++) { + dst += Tcl_UniCharToUtf(src[i], dst); + } + *dst = '\0'; +} + +/* + *--------------------------------------------------------------------------- + * + * SetOptUnicodeFromAny -- + * + * Generate the Unicode internal rep from the string rep. + * + * Results: + * The return value is always TCL_OK. + * + * Side effects: + * A Unicode object is stored as the internal rep of objPtr. The Unicode + * ojbect is opitmized for the case where each UTF char in a string is only + * one byte. In this case, we store the value of numChars, but we don't copy + * the bytes to the unicodeObj->chars. Before accessing obj->chars, check if + * all chars are 1 byte long. + * + *--------------------------------------------------------------------------- + */ + +static void +SetOptUnicodeFromAny(objPtr, numChars) + Tcl_Obj *objPtr; /* The object to convert to type Unicode. */ + int numChars; +{ + Tcl_ObjType *typePtr; + Unicode *unicodePtr; + + unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(4)); + unicodePtr->numChars = numChars; + + typePtr = objPtr->typePtr; + if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) { + (*typePtr->freeIntRepProc)(objPtr); + } + objPtr->typePtr = &tclUnicodeType; + SET_UNICODE(objPtr, unicodePtr); +} + +/* + *--------------------------------------------------------------------------- + * + * SetUnicodeFromAny -- + * + * Generate the Unicode internal rep from the string rep. + * + * Results: + * The return value is always TCL_OK. + * + * Side effects: + * A Unicode object is stored as the internal rep of objPtr. The Unicode + * ojbect is opitmized for the case where each UTF char in a string is only + * one byte. In this case, we store the value of numChars, but we don't copy + * the bytes to the unicodeObj->chars. Before accessing obj->chars, check if + * all chars are 1 byte long. + * + *--------------------------------------------------------------------------- + */ + +static int +SetUnicodeFromAny(interp, objPtr) + Tcl_Interp *interp; /* Not used. */ + Tcl_Obj *objPtr; /* The object to convert to type Unicode. */ +{ + Tcl_ObjType *typePtr; + int numBytes, numChars; + char *src, *srcEnd; + Unicode *unicodePtr; + unsigned char *dst; + + typePtr = objPtr->typePtr; + if (typePtr != &tclUnicodeType) { + src = Tcl_GetStringFromObj(objPtr, &numBytes); + + numChars = Tcl_NumUtfChars(src, numBytes); + if (numChars == numBytes) { + SetOptUnicodeFromAny(objPtr, numChars); + } else { + unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(numChars + * sizeof(Tcl_UniChar))); + srcEnd = src + numBytes; + + for (dst = unicodePtr->chars; src < srcEnd; + dst += sizeof(Tcl_UniChar)) { + src += Tcl_UtfToUniChar(src, (Tcl_UniChar *) dst); + } + + unicodePtr->used = numChars * sizeof(Tcl_UniChar); + unicodePtr->numChars = numChars; + unicodePtr->allocated = numChars * sizeof(Tcl_UniChar); + + if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) { + (*typePtr->freeIntRepProc)(objPtr); + } + objPtr->typePtr = &tclUnicodeType; + SET_UNICODE(objPtr, unicodePtr); + } + } + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * FreeUnicodeInternalRep -- + * + * Deallocate the storage associated with a Unicode data object's + * internal representation. + * + * Results: + * None. + * + * Side effects: + * Frees memory. + * + *---------------------------------------------------------------------- + */ + +static void +FreeUnicodeInternalRep(objPtr) + Tcl_Obj *objPtr; /* Object with internal rep to free. */ +{ + ckfree((char *) GET_UNICODE(objPtr)); +} diff --git a/generic/tclVar.c b/generic/tclVar.c index 03b7757..f2df52e 100644 --- a/generic/tclVar.c +++ b/generic/tclVar.c @@ -14,7 +14,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclVar.c,v 1.8 1999/04/16 00:46:55 stanton Exp $ + * RCS: @(#) $Id: tclVar.c,v 1.9 1999/06/08 02:59:27 hershey Exp $ */ #include "tclInt.h" @@ -1291,7 +1291,7 @@ Tcl_SetVar2Ex(interp, part1, part2, newValuePtr, flags) oldValuePtr = varPtr->value.objPtr; Tcl_IncrRefCount(oldValuePtr); /* since var is ref */ } - Tcl_AppendToObj(oldValuePtr, bytes, length); + Tcl_AppendObjToObj(oldValuePtr, newValuePtr); } } } else { diff --git a/tests/string.test b/tests/string.test index 01ad4bf..235dba8 100644 --- a/tests/string.test +++ b/tests/string.test @@ -11,12 +11,17 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: string.test,v 1.11 1999/06/03 18:43:30 stanton Exp $ +# RCS: @(#) $Id: string.test,v 1.12 1999/06/08 02:59:28 hershey Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { source [file join [pwd] [file dirname [info script]] defs.tcl] } +# Some tests require the testobj command + +set ::tcltest::testConfig(testobj) \ + [expr {[info commands testobj] != {}}] + test string-1.1 {error conditions} { list [catch {string gorp a b} msg] $msg } {1 {bad option "gorp": must be bytelength, compare, equal, first, index, is, last, length, map, match, range, repeat, replace, tolower, toupper, totitle, trim, trimleft, trimright, wordend, or wordstart}} @@ -226,6 +231,18 @@ test string-5.11 {string index, unicode} { test string-5.12 {string index, unicode over char length, under byte length} { string index \334\374\334\374 6 } {} +test string-5.13 {string index, bytearray object} { + string index [binary format a5 fuz] 0 +} f +test string-5.14 {string index, bytearray object} { + string index [binary format I* {0x50515253 0x52}] 3 +} S +test string-5.15 {string index, bytearray object} { + set b [binary format I* {0x50515253 0x52}] + set i1 [string index $b end-6] + set i2 [string index $b 1] + string compare $i1 $i2 +} 0 test string-6.1 {string is, too few args} { list [catch {string is} msg] $msg @@ -585,6 +602,12 @@ test string-9.4 {string length} { test string-9.5 {string length, unicode} { string le "abcd\u7266" } 5 +test string-9.6 {string length, bytearray object} { + string length [binary format a5 foo] +} 5 +test string-9.7 {string length, bytearray object} { + string length [binary format I* {0x50515253 0x52}] +} 8 test string-10.1 {string map, too few args} { list [catch {string map} msg] $msg @@ -798,6 +821,12 @@ test string-12.17 {string range, unicode} { test string-12.18 {string range, unicode} { string range ab\u7266cdefghijklmnop 2 3 } \u7266c +test string-12.19 {string range, bytearray object} { + set b [binary format I* {0x50515253 0x52}] + set r1 [string range $b 1 end-1] + set r2 [string range $b 1 6] + string compare $r1 $r2 +} 0 test string-13.1 {string repeat} { list [catch {string repeat} msg] $msg diff --git a/tests/unicode.test b/tests/unicode.test new file mode 100644 index 0000000..6ee91c8 --- /dev/null +++ b/tests/unicode.test @@ -0,0 +1,204 @@ +# This file tests the tclUnicode.c file. +# +# This file contains a collection of tests for one or more of the Tcl +# built-in commands. Sourcing this file into Tcl runs the tests and +# generates output for errors. No output means no errors were found. +# +# Copyright (c) 1999 by Scriptics Corporation. +# +# See the file "license.terms" for information on usage and redistribution +# of this file, and for a DISCLAIMER OF ALL WARRANTIES. +# +# RCS: @(#) $Id: unicode.test,v 1.2 1999/06/08 02:59:30 hershey Exp $ + +if {[lsearch [namespace children] ::tcltest] == -1} { + source [file join [pwd] [file dirname [info script]] defs.tcl] +} + +# Some tests require the testobj command + +set ::tcltest::testConfig(testobj) \ + [expr {[info commands testobj] != {}}] + +test unicode-1.1 {TclGetUniCharFromObj with byte-size chars} { + string index "abcdefghi" 0 +} "a" +test unicode-1.2 {TclGetUniCharFromObj with byte-size chars} { + string index "abcdefghi" 3 +} "d" +test unicode-1.3 {TclGetUniCharFromObj with byte-size chars} { + string index "abcdefghi" end +} "i" +test unicode-1.4 {TclGetUniCharFromObj with mixed width chars} { + string index "ïa¿b®c®¿dï" 0 +} "ï" +test unicode-1.5 {TclGetUniCharFromObj} { + string index "ïa¿b®c®¿dï" 4 +} "®" +test unicode-1.6 {TclGetUniCharFromObj} { + string index "ïa¿b®cï¿d®" end +} "®" + +test unicode-2.1 {TclGetUnicodeLengthFromObj with byte-size chars} { + string length "" +} 0 +test unicode-2.2 {TclGetUnicodeLengthFromObj with byte-size chars} { + string length "a" +} 1 +test unicode-2.3 {TclGetUnicodeLengthFromObj with byte-size chars} { + string length "abcdef" +} 6 +test unicode-2.4 {TclGetUnicodeLengthFromObj with mixed width chars} { + string length "®" +} 1 +test unicode-2.5 {TclGetUnicodeLengthFromObj with mixed width chars} { + string length "○○" +} 6 +test unicode-2.6 {TclGetUnicodeLengthFromObj with mixed width chars} { + string length "ïa¿b®cï¿d®" +} 10 + +test unicode-3.1 {TclGetRangeFromObj with all byte-size chars} {testobj} { + set x "abcdef" + list [testobj objtype $x] [set y [string range $x 1 end-1]] \ + [testobj objtype $x] [testobj objtype $y] +} {none bcde unicode none} + +test unicode-3.2 {TclGetRangeFromObj with some mixed width chars} {testobj} { + set x "abcïïdef" + list [testobj objtype $x] [set y [string range $x 1 end-1]] \ + [testobj objtype $x] [testobj objtype $y] +} {none bcïïde unicode unicode} + +test unicode-4.1 {UpdateStringOfUnicode} {testobj} { + set x 2345 + list [string index $x end] [testobj objtype $x] [incr x] \ + [testobj objtype $x] +} {5 unicode 2346 int} + +test unicode-5.1 {SetUnicodeFromAny called with non-unicode obj} {testobj} { + set x 2345 + list [incr x] [testobj objtype $x] [string index $x end] \ + [testobj objtype $x] +} {2346 int 6 unicode} + +test unicode-5.2 {SetUnicodeFromAny called with unicode obj} {testobj} { + set x "abcdef" + list [string length $x] [testobj objtype $x] \ + [string length $x] [testobj objtype $x] +} {6 unicode 6 unicode} + +test unicode-6.1 {DupUnicodeInternalRep, mixed width chars} {testobj} { + set x abcï¿®ghi + string length $x + set y $x + list [testobj objtype $x] [testobj objtype $y] [append x "®¿ï"] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {unicode unicode abcï¿®ghi®¿ï abcï¿®ghi unicode unicode} + +test unicode-6.2 {DupUnicodeInternalRep, mixed width chars} {testobj} { + set x abcï¿®ghi + set y $x + string length $x + list [testobj objtype $x] [testobj objtype $y] [append x "®¿ï"] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {unicode unicode abcï¿®ghi®¿ï abcï¿®ghi unicode unicode} + +test unicode-6.3 {DupUnicodeInternalRep, all byte-size chars} {testobj} { + set x abcdefghi + string length $x + set y $x + list [testobj objtype $x] [testobj objtype $y] [append x jkl] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {unicode unicode abcdefghijkl abcdefghi unicode unicode} + +test unicode-6.4 {DupUnicodeInternalRep, all byte-size chars} {testobj} { + set x abcdefghi + set y $x + string length $x + list [testobj objtype $x] [testobj objtype $y] [append x jkl] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {unicode unicode abcdefghijkl abcdefghi unicode unicode} + +test unicode-7.1 {TclAppendObjToUnicodeObj, mixed src & dest} {testobj} { + set x abcï¿®ghi + set y ®¿ï + string length $x + list [testobj objtype $x] [testobj objtype $y] [append x $y] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {unicode none abcï¿®ghi®¿ï ®¿ï unicode unicode} + +test unicode-7.2 {TclAppendObjToUnicodeObj, mixed src & dest} {testobj} { + set x abcï¿®ghi + string length $x + list [testobj objtype $x] [append x $x] [testobj objtype $x] \ + [append x $x] [testobj objtype $x] +} {unicode abcï¿®ghiabcï¿®ghi unicode\ +abcï¿®ghiabcï¿®ghiabcï¿®ghiabcï¿®ghi\ +unicode} + +test unicode-7.3 {TclAppendObjToUnicodeObj, mixed src & 1-byte dest} {testobj} { + set x abcdefghi + set y ®¿ï + string length $x + list [testobj objtype $x] [testobj objtype $y] [append x $y] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {unicode none abcdefghi®¿ï ®¿ï string unicode} + +test unicode-7.4 {TclAppendObjToUnicodeObj, 1-byte src & dest} {testobj} { + set x abcdefghi + set y jkl + string length $x + list [testobj objtype $x] [testobj objtype $y] [append x $y] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {unicode none abcdefghijkl jkl unicode unicode} + +test unicode-7.5 {TclAppendObjToUnicodeObj, 1-byte src & dest} {testobj} { + set x abcdefghi + string length $x + list [testobj objtype $x] [append x $x] [testobj objtype $x] \ + [append x $x] [testobj objtype $x] +} {unicode abcdefghiabcdefghi unicode abcdefghiabcdefghiabcdefghiabcdefghi\ +unicode} + +test unicode-7.6 {TclAppendObjToUnicodeObj, 1-byte src & mixed dest} {testobj} { + set x abcï¿®ghi + set y jkl + string length $x + list [testobj objtype $x] [testobj objtype $y] [append x $y] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {unicode none abcï¿®ghijkl jkl unicode unicode} + +test unicode-7.7 {TclAppendObjToUnicodeObj, integer src & dest} {testobj} { + set x [expr {4 * 5}] + set y [expr {4 + 5}] + list [testobj objtype $x] [testobj objtype $y] [append x $y] \ + [testobj objtype $x] [append x $y] [testobj objtype $x] \ + [testobj objtype $y] +} {int int 209 string 2099 string int} + +test unicode-7.8 {TclAppendObjToUnicodeObj, integer src & dest} {testobj} { + set x [expr {4 * 5}] + list [testobj objtype $x] [append x $x] [testobj objtype $x] \ + [append x $x] [testobj objtype $x] +} {int 2020 string 20202020 unicode} + +test unicode-7.9 {TclAppendObjToUnicodeObj, integer src & 1-byte dest} {testobj} { + set x abcdefghi + set y [expr {4 + 5}] + string length $x + list [testobj objtype $x] [testobj objtype $y] [append x $y] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {unicode int abcdefghi9 9 string int} + +test unicode-7.10 {TclAppendObjToUnicodeObj, integer src & mixed dest} {testobj} { + set x abcï¿®ghi + set y [expr {4 + 5}] + string length $x + list [testobj objtype $x] [testobj objtype $y] [append x $y] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {unicode int abcï¿®ghi9 9 unicode int} + +# cleanup +::tcltest::cleanupTests +return diff --git a/unix/Makefile.in b/unix/Makefile.in index a0f8e1e..379e477 100644 --- a/unix/Makefile.in +++ b/unix/Makefile.in @@ -5,7 +5,7 @@ # "autoconf" program (constructs like "@foo@" will get replaced in the # actual Makefile. # -# RCS: @(#) $Id: Makefile.in,v 1.27 1999/06/02 22:05:55 surles Exp $ +# RCS: @(#) $Id: Makefile.in,v 1.28 1999/06/08 02:59:30 hershey Exp $ # Current Tcl version; used in various names. @@ -270,7 +270,8 @@ GENERIC_OBJS = regcomp.o regexec.o regfree.o regerror.o tclAlloc.o \ tclObj.o tclPanic.o tclParse.o tclParseExpr.o tclPipe.o \ tclPkg.o tclPosixStr.o tclPreserve.o tclProc.o tclRegexp.o \ tclResolve.o tclResult.o tclScan.o tclStringObj.o tclThread.o \ - tclStubInit.o tclStubLib.o tclTimer.o tclUtf.o tclUtil.o tclVar.o + tclStubInit.o tclStubLib.o tclTimer.o tclUnicodeObj.o tclUtf.o \ + tclUtil.o tclVar.o STUB_LIB_OBJS = tclStubLib.o ${COMPAT_OBJS} @@ -351,6 +352,7 @@ GENERIC_SRCS = \ $(GENERIC_DIR)/tclTestProcBodyObj.c \ $(GENERIC_DIR)/tclThread.c \ $(GENERIC_DIR)/tclTimer.c \ + $(GENERIC_DIR)/tclUnicodeObj.c \ $(GENERIC_DIR)/tclUtil.c \ $(GENERIC_DIR)/tclVar.c @@ -889,6 +891,9 @@ tclThread.o: $(GENERIC_DIR)/tclThread.c tclThreadTest.o: $(GENERIC_DIR)/tclThreadTest.c $(CC) -c $(CC_SWITCHES) $(GENERIC_DIR)/tclThreadTest.c +tclUnicodeObj.o: $(GENERIC_DIR)/tclUnicodeObj.c + $(CC) -c $(CC_SWITCHES) $(GENERIC_DIR)/tclUnicodeObj.c + tclUnixChan.o: $(UNIX_DIR)/tclUnixChan.c $(CC) -c $(CC_SWITCHES) $(UNIX_DIR)/tclUnixChan.c diff --git a/win/Makefile.in b/win/Makefile.in index bdb25b7..20e587c 100644 --- a/win/Makefile.in +++ b/win/Makefile.in @@ -5,7 +5,7 @@ # "autoconf" program (constructs like "@foo@" will get replaced in the # actual Makefile. # -# RCS: @(#) $Id: Makefile.in,v 1.3 1999/06/05 00:18:12 stanton Exp $ +# RCS: @(#) $Id: Makefile.in,v 1.4 1999/06/08 02:59:31 hershey Exp $ VERSION = @TCL_VERSION@ @@ -209,6 +209,7 @@ GENERIC_OBJS = \ tclStubLib.$(OBJEXT) \ tclThread.$(OBJEXT) \ tclTimer.$(OBJEXT) \ + tclUnicodeObj.$(OBJEXT) \ tclUtf.$(OBJEXT) \ tclUtil.$(OBJEXT) \ tclVar.$(OBJEXT) diff --git a/win/makefile.vc b/win/makefile.vc index cce6320..06926c7 100644 --- a/win/makefile.vc +++ b/win/makefile.vc @@ -6,7 +6,7 @@ # Copyright (c) 1995-1996 Sun Microsystems, Inc. # Copyright (c) 1998-1999 by Scriptics Corporation. # -# RCS: @(#) $Id: makefile.vc,v 1.34 1999/05/07 23:40:37 stanton Exp $ +# RCS: @(#) $Id: makefile.vc,v 1.35 1999/06/08 02:59:31 hershey Exp $ # Does not depend on the presence of any environment variables in # order to compile tcl; all needed information is derived from @@ -186,6 +186,7 @@ TCLOBJS = \ $(TMPDIR)\tclStubLib.obj \ $(TMPDIR)\tclThread.obj \ $(TMPDIR)\tclTimer.obj \ + $(TMPDIR)\tclUnicodeObj.obj \ $(TMPDIR)\tclUtf.obj \ $(TMPDIR)\tclUtil.obj \ $(TMPDIR)\tclVar.obj \ -- cgit v0.12