From 73d440a8ed4e3ef4fd1c30ce5708061a261396dc Mon Sep 17 00:00:00 2001 From: hershey Date: Tue, 15 Jun 1999 01:16:21 +0000 Subject: Merged String and Unicode object types. Added new functions to the puplic API: Tcl_NewUnicodeObj, Tcl_SetUnicodeObj, Tcl_GetUnicode, Tcl_GetUniChar, Tcl_GetCharLength, Tcl_GetRange, Tcl_AppendUnicodeToObj. Note: some stringObj tests are still failing--the teststringobj command still needs to be updated. --- ChangeLog | 13 + doc/StringObj.3 | 151 +++++-- generic/tcl.decls | 26 +- generic/tclCmdMZ.c | 32 +- generic/tclDecls.h | 57 ++- generic/tclInt.h | 18 +- generic/tclObj.c | 3 +- generic/tclRegexp.c | 6 +- generic/tclStringObj.c | 1077 +++++++++++++++++++++++++++++++++++++++++++---- generic/tclStubInit.c | 9 +- generic/tclUnicodeObj.c | 882 -------------------------------------- tests/stringObj.test | 181 +++++++- tests/unicode.test | 204 --------- unix/Makefile.in | 12 +- win/Makefile.in | 3 +- win/makefile.vc | 3 +- 16 files changed, 1431 insertions(+), 1246 deletions(-) delete mode 100644 generic/tclUnicodeObj.c delete mode 100644 tests/unicode.test diff --git a/ChangeLog b/ChangeLog index 20ce77a..bed88a4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +1999-06-14 Melissa Hirschl + + * doc/StringObj.3 + * test/stringObj.test + * unix/Makefile.in + * win/Makefile.in + * win/makefile.vc + * generic/tclStringObj.c: + Merged String and Unicode object types. Added new functions to + the puplic API: Tcl_NewUnicodeObj, Tcl_SetUnicodeObj, + Tcl_GetUnicode, Tcl_GetUniChar, Tcl_GetCharLength, Tcl_GetRange, + Tcl_AppendUnicodeToObj. + 1999-06-09 * generic/tclUnicodeObj.c: Lots of cleanup and simplification. diff --git a/doc/StringObj.3 b/doc/StringObj.3 index 46fe959..d195362 100644 --- a/doc/StringObj.3 +++ b/doc/StringObj.3 @@ -4,38 +4,71 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" RCS: @(#) $Id: StringObj.3,v 1.4 1999/04/16 00:46:33 stanton Exp $ +'\" RCS: @(#) $Id: StringObj.3,v 1.5 1999/06/15 01:16:21 hershey Exp $ '\" .so man.macros .TH Tcl_StringObj 3 8.1 Tcl "Tcl Library Procedures" .BS .SH NAME -Tcl_NewStringObj, Tcl_SetStringObj, Tcl_GetStringFromObj, Tcl_GetString, Tcl_AppendToObj, Tcl_AppendStringsToObj, Tcl_AppendStringsToObjVA, Tcl_AppendObjToObj, Tcl_SetObjLength, Tcl_ConcatObj \- manipulate Tcl objects as strings +Tcl_NewStringObj, Tcl_NewUnicodeObj, Tcl_SetStringObj, Tcl_SetUnicodeObj, Tcl_GetStringFromObj, Tcl_GetString, Tcl_GetUnicode, Tcl_GetUniChar, Tcl_GetCharLength, Tcl_GetRange, Tcl_AppendToObj, Tcl_AppendUnicodeToObj, Tcl_AppendStringsToObj, Tcl_AppendStringsToObjVA, Tcl_AppendObjToObj, Tcl_SetObjLength, Tcl_ConcatObj \- manipulate Tcl objects as strings .SH SYNOPSIS .nf \fB#include \fR .sp Tcl_Obj * \fBTcl_NewStringObj\fR(\fIbytes, length\fR) +.VS 8.1.2 .sp +Tcl_Obj * +\fBTcl_NewUnicodeObj\fR(\fIunicode, numChars\fR) +.VE +.sp +void \fBTcl_SetStringObj\fR(\fIobjPtr, bytes, length\fR) +.VS 8.1.2 +.sp +void +\fBTcl_SetUnicodeObj\fR(\fIobjPtr, unicode, numChars\fR) +.VE .sp char * \fBTcl_GetStringFromObj\fR(\fIobjPtr, lengthPtr\fR) .sp char * \fBTcl_GetString\fR(\fIobjPtr\fR) +.VS 8.1.2 +.sp +Tcl_UniChar * +\fBTcl_GetUnicode\fR(\fIobjPtr\fR) .sp +Tcl_UniChar +\fBTcl_GetUniChar\fR(\fIobjPtr, index\fR) +.sp +int +\fBTcl_GetCharLength\fR(\fIobjPtr\fR) +.sp +Tcl_Obj * +\fBTcl_GetRange\fR(\fIobjPtr, first, last\fR) +.VE +.sp +void \fBTcl_AppendToObj\fR(\fIobjPtr, bytes, length\fR) -.VS +.VS 8.1.2 .sp -\fBTcl_AppendObjToObj\fR(\fIobjPtr, appendObjPtr\fR) +void +\fBTcl_AppendUnicodeToObj\fR(\fIobjPtr, unicode, numChars\fR) .VE .sp +void +\fBTcl_AppendObjToObj\fR(\fIobjPtr, appendObjPtr\fR) +.sp +void \fBTcl_AppendStringsToObj\fR(\fIobjPtr, string, string, ... \fB(char *) NULL\fR) .sp +void \fBTcl_AppendStringsToObjVA\fR(\fIobjPtr, argList\fR) .sp +void \fBTcl_SetObjLength\fR(\fIobjPtr, newLength\fR) .sp Tcl_Obj * @@ -51,12 +84,29 @@ unless \fIlength\fR is negative. The number of bytes to copy from \fIbytes\fR when initializing, setting, or appending to a string object. If negative, all bytes up to the first null are used. +.AP Tcl_UniChar *unicode in +Points to the first byte of an array of Unicode characters +used to set or append to a string object. +This byte array may contain embedded null characters +unless \fInumChars\fR is negative. +.VS 8.1.2 +.AP int numChars in +The number of Unicode characters to copy from \fIunicode\fR when +initializing, setting, or appending to a string object. +If negative, all characters up to the first null character are used. +.AP int index in +The index of the Unicode character to return. +.AP int first in +The index of the first Unicode character in the Unicode range to be +returned as a new object. +.AP int last in +The index of the last Unicode character in the Unicode range to be +returned as a new object. +.VE .AP Tcl_Obj *objPtr in/out Points to an object to manipulate. -.VS .AP Tcl_Obj *appendObjPtr in The object to append to \fIobjPtr\fR in \fBTcl_AppendObjToObj\fR. -.VE .AP int *lengthPtr out If non-NULL, the location where \fBTcl_GetStringFromObj\fR will store the the length of an object's string representation. @@ -82,39 +132,74 @@ of the object to store additional information to make the string manipulations more efficient. In particular, they make a series of append operations efficient by allocating extra storage space for the string so that it doesn't have to be copied for each append. +.VS 8.1.2 +Also, indexing and length computations are optimized because the +Unicode string representation is calculated and cached as needed. +.VE .PP +.VS 8.1.2 \fBTcl_NewStringObj\fR and \fBTcl_SetStringObj\fR create a new object -or modify an existing object to hold a copy of -the string given by \fIbytes\fR and \fIlength\fR. -\fBTcl_NewStringObj\fR returns a pointer to a newly created object -with reference count zero. -Both procedures set the object to hold a copy of the specified string. -\fBTcl_SetStringObj\fR frees any old string representation -as well as any old internal representation of the object. +or modify an existing object to hold a copy of the string given by +\fIbytes\fR and \fIlength\fR. \fBTcl_NewUnicodeObj\fR and +\fBTcl_SetUnicodeObj\fR create a new object or modify an existing +object to hold a copy of the Unicode string given by \fIunicode\fR and +\fInumChars\fR. \fBTcl_NewStringObj\fR and \fBTcl_NewUnicodeObj\fR +return a pointer to a newly created object with reference count zero. +All four procedures set the object to hold a copy of the specified +string. \fBTcl_SetStringObj\fR and \fBTcl_SetUnicodeObj\fR free any +old string representation as well as any old internal representation +of the object. +.VE +.PP +\fBTcl_GetStringFromObj\fR and \fBTcl_GetString\fR return an object's +string representation. This is given by the returned byte pointer and +(for \fBTcl_GetStringFromObj\fR) length, which is stored in +\fIlengthPtr\fR if it is non-NULL. If the object's UTF string +representation is invalid (its byte pointer is NULL), the string +representation is regenerated from the object's internal +representation. The storage referenced by the returned byte pointer +is owned by the object manager and should not be modified by the +caller. The procedure \fBTcl_GetString\fR is used in the common case +where the caller does not need the length of the string +representation. .PP -\fBTcl_GetStringFromObj\fR and \fBTcl_GetString\fR -return an object's string representation. -This is given by the returned byte pointer -and (for \fBTcl_GetStringFromObj\fR) length, -which is stored in \fIlengthPtr\fR if it is non-NULL. -If the object's string representation is invalid -(its byte pointer is NULL), -the string representation is regenerated from the -object's internal representation. -The storage referenced by the returned byte pointer -is owned by the object manager and should not be modified by the caller. -The procedure \fBTcl_GetString\fR is used in the common case -where the caller does not need the length of the string representation. +.VS 8.1.2 +\fBTcl_GetUnicode\fR returns an object's value as a Unicode string. +\fBTcl_GetUniChar\fR returns the \fIindex\fR'th character in the +object's Unicode representation. +.PP +\fBTcl_GetRange\fR returns a newly created object comprised of the +characters between \fIfirst\fR and \fIlast\fR (inclusive) in the +object's Unicode representation. If the object's Unicode +representation is invalid, the Unicode representation is regenerated +from the object's string representation. +.PP +\fBTcl_GetCharLength\fR returns the number of characters (as opposed +to bytes) in the string object. .PP \fBTcl_AppendToObj\fR appends the data given by \fIbytes\fR and -\fIlength\fR to the object specified by \fIobjPtr\fR. It does this -in a way that handles repeated calls relatively efficiently (it -overallocates the string space to avoid repeated reallocations -and copies of object's string value). -.VS +\fIlength\fR to the string representation of the object specified by +\fIobjPtr\fR. If the object has an invalid string representation, +then an attempt is made to convert \fIbytes\fR is to the Unicode +format. If the conversion is successful, then the converted form of +\fIbytes\fR is appended to the object's Unicode representation. +Otherwise, the object's Unicode representation is invalidated and +converted to the UTF format, and \fIbytes\fR is appended to the +object's new string representation. +.PP +\fBTcl_AppendUnicodeToObj\fR appends the Unicode string given by +\fIunicode\fR and \fInumChars\fR to the object specified by +\fIobjPtr\fR. If the object has an invalid Unicode representation, +then \fIunicode\fR is converted to the UTF format and appended to the +object's string representation. Appends are optimized to handle +repeated appends relatively efficiently (it overallocates the string +or Unicode space to avoid repeated reallocations and copies of +object's string value). .PP \fBTcl_AppendObjToObj\fR is similar to \fBTcl_AppendToObj\fR, but it -appends the string value of \fIappendObjPtr\fR to \fIobjPtr\fR. +appends the string or Unicode value (whichever exists and is best +suited to be appended to \fIobjPtr\fR) of \fIappendObjPtr\fR to +\fIobjPtr\fR. .VE .PP \fBTcl_AppendStringsToObj\fR is similar to \fBTcl_AppendToObj\fR @@ -158,4 +243,4 @@ Tcl_NewObj, Tcl_IncrRefCount, Tcl_DecrRefCount .SH KEYWORDS append, internal representation, object, object type, string object, -string type, string representation, concat, concatenate +string type, string representation, concat, concatenate, unicode diff --git a/generic/tcl.decls b/generic/tcl.decls index 8447520..8b9d46d 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -10,7 +10,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: tcl.decls,v 1.15 1999/06/10 04:28:49 stanton Exp $ +# RCS: @(#) $Id: tcl.decls,v 1.16 1999/06/15 01:16:21 hershey Exp $ library tcl @@ -1284,7 +1284,29 @@ declare 376 generic { declare 377 generic { void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr) } - +declare 378 generic { + Tcl_Obj * Tcl_NewUnicodeObj(Tcl_UniChar *unicode, int numChars) +} +declare 379 generic { + void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, Tcl_UniChar *unicode, \ + int numChars) +} +declare 380 generic { + int Tcl_GetCharLength (Tcl_Obj *objPtr) +} +declare 381 generic { + Tcl_UniChar Tcl_GetUniChar (Tcl_Obj *objPtr, int index) +} +declare 382 generic { + Tcl_UniChar * Tcl_GetUnicode (Tcl_Obj *objPtr) +} +declare 383 generic { + Tcl_Obj * Tcl_GetRange (Tcl_Obj *objPtr, int first, int last) +} +declare 384 generic { + void Tcl_AppendUnicodeToObj (register Tcl_Obj *objPtr, \ + Tcl_UniChar *unicode, int length) +} ############################################################################## diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 4f20815..3746cfc 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -13,7 +13,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.14 1999/06/10 04:28:50 stanton Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.15 1999/06/15 01:16:22 hershey Exp $ */ #include "tclInt.h" @@ -274,7 +274,7 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv) newPtr = Tcl_NewListObj(2, objs); } else { if (i <= info.nsubs) { - newPtr = TclGetRangeFromObj(objPtr, info.matches[i].start, + newPtr = Tcl_GetRange(objPtr, info.matches[i].start, info.matches[i].end - 1); } else { newPtr = Tcl_NewObj(); @@ -385,8 +385,8 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) Tcl_IncrRefCount(resultPtr); objPtr = objv[1]; - wlen = TclGetUnicodeLengthFromObj(objPtr); - wstring = TclGetUnicodeFromObj(objPtr); + wlen = Tcl_GetCharLength(objPtr); + wstring = Tcl_GetUnicode(objPtr); subspec = Tcl_GetString(objv[2]); varPtr = objv[3]; @@ -430,7 +430,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) Tcl_RegExpGetInfo(regExpr, &info); start = info.matches[0].start; end = info.matches[0].end; - TclAppendUnicodeToObj(resultPtr, wstring + offset, start); + Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start); /* * Append the subSpec argument to the variable, making appropriate @@ -468,7 +468,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) subStart = info.matches[index].start; subEnd = info.matches[index].end; if ((subStart >= 0) && (subEnd >= 0)) { - TclAppendUnicodeToObj(resultPtr, wstring + offset + subStart, + Tcl_AppendUnicodeToObj(resultPtr, wstring + offset + subStart, subEnd - subStart); } if (*src == '\\') { @@ -485,7 +485,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) * in order to prevent infinite loops. */ - TclAppendUnicodeToObj(resultPtr, wstring + offset, 1); + Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); offset++; } offset += end; @@ -500,7 +500,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) */ if ((offset < wlen) || (numMatches == 0)) { - TclAppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset); + Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset); } if (Tcl_ObjSetVar2(interp, varPtr, NULL, resultPtr, 0) == NULL) { Tcl_AppendResult(interp, "couldn't set variable \"", @@ -973,8 +973,11 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) if (length2 == utflen) { /* no unicode chars */ string2 += start; + length2 -= start; } else { - string2 = Tcl_UtfAtIndex(string2, start); + char *s = Tcl_UtfAtIndex(string2, start); + length2 -= s - string2; + string2 = s; } } } @@ -1049,14 +1052,14 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) * 'end' really means. */ - length2 = TclGetUnicodeLengthFromObj(objv[2]); + length2 = Tcl_GetCharLength(objv[2]); if (TclGetIntForIndex(interp, objv[3], length2 - 1, &index) != TCL_OK) { return TCL_ERROR; } if ((index >= 0) && (index < length2)) { - unichar = TclGetUniCharFromObj(objv[2], index); + unichar = Tcl_GetUniChar(objv[2], index); length2 = Tcl_UniCharToUtf((int)unichar, buf); Tcl_SetStringObj(resultPtr, buf, length2); } @@ -1432,7 +1435,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) Tcl_SetIntObj(resultPtr, length1); } else { Tcl_SetIntObj(resultPtr, - TclGetUnicodeLengthFromObj(objv[2])); + Tcl_GetCharLength(objv[2])); } } break; @@ -1611,7 +1614,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) * create a result object. */ - length2 = TclGetUnicodeLengthFromObj(objv[2]) - 1; + length2 = Tcl_GetCharLength(objv[2]) - 1; if (TclGetIntForIndex(interp, objv[3], length2, &first) != TCL_OK) { @@ -1628,7 +1631,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) last = length1 - 1; } if (last >= first) { - resultPtr = TclGetRangeFromObj(objv[2], first, last); + resultPtr = Tcl_GetRange(objv[2], first, last); Tcl_SetObjResult(interp, resultPtr); } } @@ -1761,6 +1764,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) Tcl_SetStringObj(resultPtr, string1, start - string1); Tcl_AppendToObj(resultPtr, string2, length2); Tcl_AppendToObj(resultPtr, end, -1); + ckfree(string2); } break; diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 2a7ac93..8cec11a 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclDecls.h,v 1.15 1999/06/10 04:28:50 stanton Exp $ + * RCS: @(#) $Id: tclDecls.h,v 1.16 1999/06/15 01:16:22 hershey Exp $ */ #ifndef _TCLDECLS @@ -1175,6 +1175,26 @@ EXTERN int Tcl_RegExpMatchObj _ANSI_ARGS_((Tcl_Interp * interp, /* 377 */ EXTERN void Tcl_RegExpGetInfo _ANSI_ARGS_((Tcl_RegExp regexp, Tcl_RegExpInfo * infoPtr)); +/* 378 */ +EXTERN Tcl_Obj * Tcl_NewUnicodeObj _ANSI_ARGS_((Tcl_UniChar * unicode, + int numChars)); +/* 379 */ +EXTERN void Tcl_SetUnicodeObj _ANSI_ARGS_((Tcl_Obj * objPtr, + Tcl_UniChar * unicode, int numChars)); +/* 380 */ +EXTERN int Tcl_GetCharLength _ANSI_ARGS_((Tcl_Obj * objPtr)); +/* 381 */ +EXTERN Tcl_UniChar Tcl_GetUniChar _ANSI_ARGS_((Tcl_Obj * objPtr, + int index)); +/* 382 */ +EXTERN Tcl_UniChar * Tcl_GetUnicode _ANSI_ARGS_((Tcl_Obj * objPtr)); +/* 383 */ +EXTERN Tcl_Obj * Tcl_GetRange _ANSI_ARGS_((Tcl_Obj * objPtr, + int first, int last)); +/* 384 */ +EXTERN void Tcl_AppendUnicodeToObj _ANSI_ARGS_(( + register Tcl_Obj * objPtr, + Tcl_UniChar * unicode, int length)); typedef struct TclStubHooks { struct TclPlatStubs *tclPlatStubs; @@ -1620,6 +1640,13 @@ typedef struct TclStubs { int (*tcl_UniCharIsPunct) _ANSI_ARGS_((int ch)); /* 375 */ int (*tcl_RegExpMatchObj) _ANSI_ARGS_((Tcl_Interp * interp, Tcl_RegExp regexp, Tcl_Obj * objPtr, int offset, int nmatches, int flags)); /* 376 */ void (*tcl_RegExpGetInfo) _ANSI_ARGS_((Tcl_RegExp regexp, Tcl_RegExpInfo * infoPtr)); /* 377 */ + Tcl_Obj * (*tcl_NewUnicodeObj) _ANSI_ARGS_((Tcl_UniChar * unicode, int numChars)); /* 378 */ + void (*tcl_SetUnicodeObj) _ANSI_ARGS_((Tcl_Obj * objPtr, Tcl_UniChar * unicode, int numChars)); /* 379 */ + int (*tcl_GetCharLength) _ANSI_ARGS_((Tcl_Obj * objPtr)); /* 380 */ + Tcl_UniChar (*tcl_GetUniChar) _ANSI_ARGS_((Tcl_Obj * objPtr, int index)); /* 381 */ + Tcl_UniChar * (*tcl_GetUnicode) _ANSI_ARGS_((Tcl_Obj * objPtr)); /* 382 */ + Tcl_Obj * (*tcl_GetRange) _ANSI_ARGS_((Tcl_Obj * objPtr, int first, int last)); /* 383 */ + void (*tcl_AppendUnicodeToObj) _ANSI_ARGS_((register Tcl_Obj * objPtr, Tcl_UniChar * unicode, int length)); /* 384 */ } TclStubs; #ifdef __cplusplus @@ -3168,6 +3195,34 @@ extern TclStubs *tclStubsPtr; #define Tcl_RegExpGetInfo \ (tclStubsPtr->tcl_RegExpGetInfo) /* 377 */ #endif +#ifndef Tcl_NewUnicodeObj +#define Tcl_NewUnicodeObj \ + (tclStubsPtr->tcl_NewUnicodeObj) /* 378 */ +#endif +#ifndef Tcl_SetUnicodeObj +#define Tcl_SetUnicodeObj \ + (tclStubsPtr->tcl_SetUnicodeObj) /* 379 */ +#endif +#ifndef Tcl_GetCharLength +#define Tcl_GetCharLength \ + (tclStubsPtr->tcl_GetCharLength) /* 380 */ +#endif +#ifndef Tcl_GetUniChar +#define Tcl_GetUniChar \ + (tclStubsPtr->tcl_GetUniChar) /* 381 */ +#endif +#ifndef Tcl_GetUnicode +#define Tcl_GetUnicode \ + (tclStubsPtr->tcl_GetUnicode) /* 382 */ +#endif +#ifndef Tcl_GetRange +#define Tcl_GetRange \ + (tclStubsPtr->tcl_GetRange) /* 383 */ +#endif +#ifndef Tcl_AppendUnicodeToObj +#define Tcl_AppendUnicodeToObj \ + (tclStubsPtr->tcl_AppendUnicodeToObj) /* 384 */ +#endif #endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */ diff --git a/generic/tclInt.h b/generic/tclInt.h index d30d439..cfa587b 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -11,7 +11,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclInt.h,v 1.32 1999/06/10 04:28:51 stanton Exp $ + * RCS: @(#) $Id: tclInt.h,v 1.33 1999/06/15 01:16:23 hershey Exp $ */ #ifndef _TCLINT @@ -1509,7 +1509,6 @@ extern Tcl_ObjType tclIntType; extern Tcl_ObjType tclListType; extern Tcl_ObjType tclProcBodyType; extern Tcl_ObjType tclStringType; -extern Tcl_ObjType tclUnicodeType; /* * The head of the list of free Tcl objects, and the total number of Tcl @@ -1543,12 +1542,6 @@ EXTERN int TclAccess _ANSI_ARGS_((CONST char *path, EXTERN int TclAccessDeleteProc _ANSI_ARGS_((TclAccessProc_ *proc)); EXTERN int TclAccessInsertProc _ANSI_ARGS_((TclAccessProc_ *proc)); EXTERN void TclAllocateFreeObjects _ANSI_ARGS_((void)); -EXTERN Tcl_Obj * TclAppendObjToUnicodeObj _ANSI_ARGS_(( - register Tcl_Obj *targetObjPtr, - register Tcl_Obj *srcObjPtr)); -EXTERN void TclAppendUnicodeToObj _ANSI_ARGS_(( - register Tcl_Obj *objPtr, Tcl_UniChar *unichars, - int length)); EXTERN int TclArraySet _ANSI_ARGS_((Tcl_Interp *interp, Tcl_Obj *arrayNameObj, Tcl_Obj *arrayElemObj)); EXTERN int TclCleanupChildren _ANSI_ARGS_((Tcl_Interp *interp, @@ -1641,13 +1634,6 @@ EXTERN int TclGetOpenMode _ANSI_ARGS_((Tcl_Interp *interp, char *string, int *seekFlagPtr)); EXTERN Tcl_Command TclGetOriginalCommand _ANSI_ARGS_(( Tcl_Command command)); -EXTERN Tcl_Obj* TclGetRangeFromObj _ANSI_ARGS_((Tcl_Obj *objPtr, - int first, int last)); -EXTERN Tcl_UniChar TclGetUniCharFromObj _ANSI_ARGS_((Tcl_Obj *objPtr, - int index)); -EXTERN Tcl_UniChar * TclGetUnicodeFromObj _ANSI_ARGS_((Tcl_Obj *objPtr)); -EXTERN int TclGetUnicodeLengthFromObj _ANSI_ARGS_(( - Tcl_Obj *objPtr)); EXTERN int TclGlob _ANSI_ARGS_((Tcl_Interp *interp, char *pattern, int noComplain)); EXTERN int TclGlobalInvoke _ANSI_ARGS_((Tcl_Interp *interp, @@ -1695,8 +1681,6 @@ EXTERN Var * TclLookupVar _ANSI_ARGS_((Tcl_Interp *interp, EXTERN int TclMathInProgress _ANSI_ARGS_((void)); EXTERN int TclNeedSpace _ANSI_ARGS_((char *start, char *end)); EXTERN Tcl_Obj * TclNewProcBodyObj _ANSI_ARGS_((Proc *procPtr)); -EXTERN Tcl_Obj * TclNewUnicodeObj _ANSI_ARGS_((Tcl_UniChar *unichars, - int numChars)); EXTERN int TclObjCommandComplete _ANSI_ARGS_((Tcl_Obj *cmdPtr)); EXTERN int TclObjInterpProc _ANSI_ARGS_((ClientData clientData, Tcl_Interp *interp, int objc, diff --git a/generic/tclObj.c b/generic/tclObj.c index 423df28..67be178 100644 --- a/generic/tclObj.c +++ b/generic/tclObj.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclObj.c,v 1.8 1999/06/08 02:59:25 hershey Exp $ + * RCS: @(#) $Id: tclObj.c,v 1.9 1999/06/15 01:16:23 hershey Exp $ */ #include "tclInt.h" @@ -138,7 +138,6 @@ TclInitObjSubsystem() Tcl_RegisterObjType(&tclListType); Tcl_RegisterObjType(&tclByteCodeType); Tcl_RegisterObjType(&tclProcBodyType); - Tcl_RegisterObjType(&tclUnicodeType); #ifdef TCL_COMPILE_STATS Tcl_MutexLock(&tclObjMutex); diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index 7590c8d..3e28224 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclRegexp.c,v 1.7 1999/06/10 04:28:51 stanton Exp $ + * RCS: @(#) $Id: tclRegexp.c,v 1.8 1999/06/15 01:16:24 hershey Exp $ */ #include "tclInt.h" @@ -429,8 +429,8 @@ Tcl_RegExpMatchObj(interp, re, objPtr, offset, nmatches, flags) Tcl_IncrRefCount(objPtr); - udata = TclGetUnicodeFromObj(objPtr); - length = TclGetUnicodeLengthFromObj(objPtr); + udata = Tcl_GetUnicode(objPtr); + length = Tcl_GetCharLength(objPtr); /* * Save the target object so we can extract strings from it later. diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index c70bcb9..8dc6e90 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -9,13 +9,29 @@ * track of the extra space. Objects with this internal * representation are called "expandable string objects". * + * Since some string operations work with UTF strings and others require Unicode + format, the string obeject type stores one or both formats. If the object is + created with a Unicode string, then UTF form is not stored until it is + required by a string operation. The string object always stores the number of + characters, so if the object is created with a UTF string, we automatically + convert it to unicode (as this costs little more than + +A Unicode string + * is an internationalized string. Conceptually, a Unicode string is an + * array of 16-bit quantities organized as a sequence of properly formed + * UTF-8 characters. There is a one-to-one map between Unicode and UTF + * characters. The Unicode ojbect is opitmized for the case where each UTF + * char in a string is only one byte. In this case, we store the value of + * numChars, but we don't copy the bytes to the unicodeObj->chars. Before + * accessing obj->chars, check if unicodeObj->numChars == obj->length. + * * Copyright (c) 1995-1997 Sun Microsystems, Inc. * Copyright (c) 1999 by Scriptics Corporation. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclStringObj.c,v 1.7 1999/06/08 02:59:25 hershey Exp $ + * RCS: @(#) $Id: tclStringObj.c,v 1.8 1999/06/15 01:16:25 hershey Exp $ */ #include "tclInt.h" @@ -24,7 +40,20 @@ * Prototypes for procedures defined later in this file: */ -static void ConvertToStringType _ANSI_ARGS_((Tcl_Obj *objPtr)); +static void AppendUnicodeToUnicodeRep _ANSI_ARGS_(( + Tcl_Obj *objPtr, Tcl_UniChar *unicode, + int appendNumChars)); +static void AppendUnicodeToUtfRep _ANSI_ARGS_(( + Tcl_Obj *objPtr, Tcl_UniChar *unicode, + int numChars)); +static void AppendUtfToUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr, + char *bytes, int numBytes)); +static void AppendUtfToUtfRep _ANSI_ARGS_((Tcl_Obj *objPtr, + char *bytes, int numBytes)); + +static void FillUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr)); + +static void FreeStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr)); static void DupStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr, Tcl_Obj *copyPtr)); static int SetStringFromAny _ANSI_ARGS_((Tcl_Interp *interp, @@ -38,11 +67,46 @@ static void UpdateStringOfString _ANSI_ARGS_((Tcl_Obj *objPtr)); Tcl_ObjType tclStringType = { "string", /* name */ - (Tcl_FreeInternalRepProc *) NULL, /* freeIntRepProc */ + FreeStringInternalRep, /* freeIntRepPro */ DupStringInternalRep, /* dupIntRepProc */ UpdateStringOfString, /* updateStringProc */ SetStringFromAny /* setFromAnyProc */ }; + +/* + * The following structure is the internal rep for a String object. + * It keeps track of how much memory has been used and how much has been + * allocated for the Unicode and UTF string to enable growing and + * shrinking of the UTF and Unicode reps of the String object with fewer + * mallocs. To optimize string length and indexing operations, this + * structure also stores the number of characters (same of UTF and Unicode!) + * once that value has been computede. + */ + +typedef struct String { + int numChars; /* The number of chars in the string. + * -1 means this value has not been + * calculated. >= 0 means that there is a + * valid Unicode rep, or that the number + * of UTF bytes == the number of chars. */ + size_t allocated; /* The amount of space actually allocated + * for the UTF string (minus 1 byte for + * the termination char). */ + size_t uallocated; /* The amount of space actually allocated + * for the Unicode string. 0 means the + * Unicode string rep is invalid. */ + Tcl_UniChar unicode[2]; /* The array of Unicode chars. The actual + * size of this field depends on the + * 'uallocated' field above. */ +} String; + +#define STRING_SIZE(len) \ + ((unsigned) (sizeof(String) + ((len-1) * sizeof(Tcl_UniChar)))) +#define GET_STRING(objPtr) \ + ((String *) (objPtr)->internalRep.otherValuePtr) +#define SET_STRING(objPtr, stringPtr) \ + (objPtr)->internalRep.otherValuePtr = (VOID *) (stringPtr) + /* *---------------------------------------------------------------------- @@ -182,6 +246,327 @@ Tcl_DbNewStringObj(bytes, length, file, line) #endif /* TCL_MEM_DEBUG */ /* + *--------------------------------------------------------------------------- + * + * TclNewUnicodeObj -- + * + * This procedure is creates a new String object and initializes + * it from the given Utf String. If the Utf String is the same size + * as the Unicode string, don't duplicate the data. + * + * Results: + * The newly created object is returned. This object will have no + * initial string representation. The returned object has a ref count + * of 0. + * + * Side effects: + * Memory allocated for new object and copy of Unicode argument. + * + *--------------------------------------------------------------------------- + */ + +Tcl_Obj * +Tcl_NewUnicodeObj(unicode, numChars) + Tcl_UniChar *unicode; /* The unicode string used to initialize + * the new object. */ + int numChars; /* Number of characters in the unicode + * string. */ +{ + Tcl_Obj *objPtr; + String *stringPtr; + int uallocated = (numChars + 1) * sizeof(Tcl_UniChar); + + /* + * Create a new obj with an invalid string rep. + */ + + TclNewObj(objPtr); + Tcl_InvalidateStringRep(objPtr); + objPtr->typePtr = &tclStringType; + + stringPtr = (String *) ckalloc(STRING_SIZE(uallocated)); + stringPtr->numChars = numChars; + stringPtr->uallocated = uallocated; + stringPtr->allocated = 0; + memcpy((VOID *) stringPtr->unicode, (VOID *) unicode, + (size_t) (numChars * sizeof(Tcl_UniChar))); + stringPtr->unicode[numChars] = 0; + SET_STRING(objPtr, stringPtr); + return objPtr; +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_GetCharLength -- + * + * Get the length of the Unicode string from the Tcl object. + * + * Results: + * Pointer to unicode string representing the unicode object. + * + * Side effects: + * Frees old internal rep. Allocates memory for new "String" + * internal rep. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_GetCharLength(objPtr) + Tcl_Obj *objPtr; /* The String object to get the num chars of. */ +{ + String *stringPtr; + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + +/* if (objPtr->bytes == NULL) { */ +/* printf("called Tcl_GetCharLength with unicode str.\n"); */ +/* } else { */ +/* printf("called Tcl_GetCharLength with str = %s\n", objPtr->bytes); */ +/* } */ + + /* + * If numChars is unknown, then calculate the number of characaters + * while populating the Unicode string. + */ + + if (stringPtr->numChars == -1) { + + stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); + + if (stringPtr->numChars == objPtr->length) { + + /* + * Since we've just calucalated the number of chars, and all + * UTF chars are 1-byte long, we don't need to store the + * unicode string. + */ + + stringPtr->uallocated = 0; + + } else { + + /* + * Since we've just calucalated the number of chars, and not + * all UTF chars are 1-byte long, go ahead and populate the + * unicode string. + */ + + FillUnicodeRep(objPtr); + + /* + * We need to fetch the pointer again because we have just + * reallocated the structure to make room for the Unicode data. + */ + + stringPtr = GET_STRING(objPtr); + } + } + return stringPtr->numChars; +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_GetUniChar -- + * + * Get the index'th Unicode character from the String object. The + * index is assumed to be in the appropriate range. + * + * Results: + * Returns the index'th Unicode character in the Object. + * + * Side effects: + * Fills unichar with the index'th Unicode character. + * + *---------------------------------------------------------------------- + */ + +Tcl_UniChar +Tcl_GetUniChar(objPtr, index) + Tcl_Obj *objPtr; /* The object to get the Unicode charater from. */ + int index; /* Get the index'th Unicode character. */ +{ + Tcl_UniChar unichar; + String *stringPtr; + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + +/* if (objPtr->bytes == NULL) { */ +/* printf("called Tcl_GetUniChar with unicode str.\n"); */ +/* } else { */ +/* printf("called Tcl_GetUniChar with str = %s\n", objPtr->bytes); */ +/* } */ + + if (stringPtr->numChars == -1) { + + /* + * We haven't yet calculated the length, so we don't have the + * Unicode str. We need to know the number of chars before we + * can do indexing. + */ + + Tcl_GetCharLength(objPtr); + + /* + * We need to fetch the pointer again because we may have just + * reallocated the structure. + */ + + stringPtr = GET_STRING(objPtr); + } + if (stringPtr->uallocated == 0) { + char *bytes; + + /* + * All of the characters in the Utf string are 1 byte chars, + * so we don't store the unicode char. We get the Utf string + * and convert the index'th byte to a Unicode character. + */ + + bytes = Tcl_GetString(objPtr); + Tcl_UtfToUniChar(&bytes[index], &unichar); + } else { + unichar = stringPtr->unicode[index]; + } + return unichar; +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_GetUnicode -- + * + * Get the index'th Unicode character from the String object. If + * the object is not already a String object, it will be converted + * to one. If the String object does not have a Unicode rep, then + * one is create from the UTF string format. + * + * Results: + * Returns a pointer to the object's internal Unicode string. + * + * Side effects: + * Converts the object to have the String internal rep. + * + *---------------------------------------------------------------------- + */ + +Tcl_UniChar * +Tcl_GetUnicode(objPtr) + Tcl_Obj *objPtr; /* The object to find the unicode string for. */ +{ + String *stringPtr; + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + +/* if (objPtr->bytes == NULL) { */ +/* printf("called Tcl_GetUnicode with unicode str.\n"); */ +/* } else { */ +/* printf("called Tcl_GetUnicode with str = %s\n", objPtr->bytes); */ +/* } */ + + if ((stringPtr->numChars == -1) || (stringPtr->uallocated == 0)) { + + /* + * We haven't yet calculated the length, or all of the characters + * in the Utf string are 1 byte chars (so we didn't store the + * unicode str). Since this function must return a unicode string, + * and one has not yet been stored, force the Unicode to be + * calculated and stored now. + */ + + FillUnicodeRep(objPtr); + + /* + * We need to fetch the pointer again because we have just + * reallocated the structure to make room for the Unicode data. + */ + + stringPtr = GET_STRING(objPtr); + } + return stringPtr->unicode; +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_GetRange -- + * + * Create a Tcl Object that contains the chars between first and last + * of the object indicated by "objPtr". If the object is not already + * a String object, convert it to one. The first and last indices + * are assumed to be in the appropriate range. + * + * Results: + * Returns a new Tcl Object of the String type. + * + * Side effects: + * Changes the internal rep of "objPtr" to the String type. + * + *---------------------------------------------------------------------- + */ + +Tcl_Obj* +Tcl_GetRange(objPtr, first, last) + + Tcl_Obj *objPtr; /* The Tcl object to find the range of. */ + int first; /* First index of the range. */ + int last; /* Last index of the range. */ +{ + Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ + String *stringPtr; + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + + if (stringPtr->numChars == -1) { + + /* + * We haven't yet calculated the length, so we don't have the + * Unicode str. We need to know the number of chars before we + * can do indexing. + */ + + Tcl_GetCharLength(objPtr); + + /* + * We need to fetch the pointer again because we may have just + * reallocated the structure. + */ + + stringPtr = GET_STRING(objPtr); + } + + if (stringPtr->numChars == objPtr->length) { + char *str = Tcl_GetString(objPtr); + + /* + * All of the characters in the Utf string are 1 byte chars, + * so we don't store the unicode char. Create a new string + * object containing the specified range of chars. + */ + + newObjPtr = Tcl_NewStringObj(&str[first], last-first+1); + + /* + * Since we know the new string only has 1-byte chars, we + * can set it's numChars field. + */ + +/* stringPtr = GET_STRING(newObjPtr); */ +/* stringPtr->numChars = last-first+1; */ + } else { + newObjPtr = Tcl_NewUnicodeObj(stringPtr->unicode + first, + last-first+1); + } + return newObjPtr; +} + +/* *---------------------------------------------------------------------- * * Tcl_SetStringObj -- @@ -237,6 +622,7 @@ Tcl_SetStringObj(objPtr, bytes, length) length = (bytes? strlen(bytes) : 0); } TclInitStringRep(objPtr, bytes, length); +/* printf("called Tcl_SetStringObj with str = %s\n", objPtr->bytes); */ } /* @@ -272,15 +658,23 @@ Tcl_SetObjLength(objPtr, length) * terminating null byte. */ { char *new; + String *stringPtr; if (Tcl_IsShared(objPtr)) { panic("Tcl_SetObjLength called with shared object"); } - if (objPtr->typePtr != &tclStringType) { - ConvertToStringType(objPtr); - } - - if ((long)length > objPtr->internalRep.longValue) { + SetStringFromAny(NULL, objPtr); + + /* + * Invalidate the unicode data. + */ + + stringPtr = GET_STRING(objPtr); + stringPtr->numChars = -1; + stringPtr->uallocated = 0; + + if (length > stringPtr->allocated) { + /* * Not enough space in current string. Reallocate the string * space and free the old string. @@ -290,11 +684,13 @@ Tcl_SetObjLength(objPtr, length) if (objPtr->bytes != NULL) { memcpy((VOID *) new, (VOID *) objPtr->bytes, (size_t) objPtr->length); +/* new[objPtr->length] = 0; */ Tcl_InvalidateStringRep(objPtr); } objPtr->bytes = new; - objPtr->internalRep.longValue = (long) length; + stringPtr->allocated = length; } + objPtr->length = length; if ((objPtr->bytes != NULL) && (objPtr->bytes != tclEmptyStringRep)) { objPtr->bytes[length] = 0; @@ -302,6 +698,60 @@ Tcl_SetObjLength(objPtr, length) } /* + *--------------------------------------------------------------------------- + * + * TclSetUnicodeObj -- + * + * Modify an object to hold the Unicode string indicated by "unicode". + * + * Results: + * None. + * + * Side effects: + * Memory allocated for new "String" internal rep. + * + *--------------------------------------------------------------------------- + */ + +void +Tcl_SetUnicodeObj(objPtr, unicode, numChars) + Tcl_Obj *objPtr; /* The object to set the string of. */ + Tcl_UniChar *unicode; /* The unicode string used to initialize + * the object. */ + int numChars; /* Number of characters in the unicode + * string. */ +{ + Tcl_ObjType *typePtr; + String *stringPtr; + size_t uallocated = (numChars + 1) * sizeof(Tcl_UniChar); + + /* + * Free the internal rep if one exists, and invalidate the string rep. + */ + + typePtr = objPtr->typePtr; + if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) { + (*typePtr->freeIntRepProc)(objPtr); + } + objPtr->typePtr = &tclStringType; + + /* + * Allocate enough space for the String structure + Unicode string. + */ + + stringPtr = (String *) ckalloc(STRING_SIZE(uallocated)); + stringPtr->numChars = numChars; + stringPtr->uallocated = uallocated; + stringPtr->allocated = 0; + memcpy((VOID *) stringPtr->unicode, (VOID *) unicode, + (size_t) (numChars * sizeof(Tcl_UniChar))); + stringPtr->unicode[numChars] = 0; + SET_STRING(objPtr, stringPtr); + Tcl_InvalidateStringRep(objPtr); + return; +} + +/* *---------------------------------------------------------------------- * * Tcl_AppendToObj -- @@ -327,37 +777,106 @@ Tcl_AppendToObj(objPtr, bytes, length) * "bytes". If < 0, then append all bytes * up to NULL byte. */ { - int newLength, oldLength; + String *stringPtr; if (Tcl_IsShared(objPtr)) { panic("Tcl_AppendToObj called with shared object"); } - if (objPtr->typePtr != &tclStringType) { - ConvertToStringType(objPtr); - } + + SetStringFromAny(NULL, objPtr); + if (length < 0) { - length = (bytes? strlen(bytes) : 0); + length = (bytes ? strlen(bytes) : 0); } if (length == 0) { return; } - oldLength = objPtr->length; - newLength = length + oldLength; - if ((long)newLength > objPtr->internalRep.longValue) { - /* - * There isn't currently enough space in the string - * representation so allocate additional space. In fact, - * overallocate so that there is room for future growth without - * having to reallocate again. - */ - Tcl_SetObjLength(objPtr, 2*newLength); + /* + * TEMPORARY!!! This is terribly inefficient, but it works, and Don + * needs for me to check this stuff in ASAP. -Melissa + */ + +/* printf("called Tcl_AppendToObj with str = %s\n", bytes); */ + UpdateStringOfString(objPtr); + AppendUtfToUtfRep(objPtr, bytes, length); + return; + + /* + * If objPtr has a valid Unicode rep, then append the Unicode + * conversion of "bytes" to the objPtr's Unicode rep, otherwise + * append "bytes" to objPtr's string rep. + */ + + stringPtr = GET_STRING(objPtr); + if (stringPtr->allocated > 0) { + AppendUtfToUnicodeRep(objPtr, bytes, length); + + stringPtr = GET_STRING(objPtr); +/* printf(" ended Tcl_AppendToObj with %d unicode chars.\n", */ +/* stringPtr->numChars); */ + } else { + AppendUtfToUtfRep(objPtr, bytes, length); +/* printf(" ended Tcl_AppendToObj with str = %s\n", objPtr->bytes); */ } - if (length > 0) { - memcpy((VOID *) (objPtr->bytes + oldLength), (VOID *) bytes, - (size_t) length); - objPtr->length = newLength; - objPtr->bytes[objPtr->length] = 0; +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_AppendUnicodeToObj -- + * + * This procedure appends a Unicode string to an object in the + * most efficient manner possible. Length must be >= 0. + * + * Results: + * None. + * + * Side effects: + * Invalidates the string rep and creates a new Unicode string. + * + *---------------------------------------------------------------------- + */ + +void +Tcl_AppendUnicodeToObj(objPtr, unicode, length) + register Tcl_Obj *objPtr; /* Points to the object to append to. */ + Tcl_UniChar *unicode; /* The unicode string to append to the + * object. */ + int length; /* Number of chars in "unicode". */ +{ + String *stringPtr; + + if (Tcl_IsShared(objPtr)) { + panic("Tcl_AppendUnicodeToObj called with shared object"); + } + + if (length == 0) { + return; + } + + SetStringFromAny(NULL, objPtr); + + /* + * TEMPORARY!!! This is terribly inefficient, but it works, and Don + * needs for me to check this stuff in ASAP. -Melissa + */ + + UpdateStringOfString(objPtr); + AppendUnicodeToUtfRep(objPtr, unicode, length); + return; + + /* + * If objPtr has a valid Unicode rep, then append the "unicode" + * to the objPtr's Unicode rep, otherwise the UTF conversion of + * "unicode" to objPtr's string rep. + */ + + stringPtr = GET_STRING(objPtr); + if (stringPtr->allocated > 0) { + AppendUnicodeToUnicodeRep(objPtr, unicode, length); + } else { + AppendUnicodeToUtfRep(objPtr, unicode, length); } } @@ -367,6 +886,7 @@ Tcl_AppendToObj(objPtr, bytes, length) * Tcl_AppendObjToObj -- * * This procedure appends the string rep of one object to another. + * "objPtr" cannot be a shared object. * * Results: * None. @@ -383,7 +903,273 @@ Tcl_AppendObjToObj(objPtr, appendObjPtr) Tcl_Obj *objPtr; /* Points to the object to append to. */ Tcl_Obj *appendObjPtr; /* Object to append. */ { - TclAppendObjToUnicodeObj(objPtr, appendObjPtr); + String *stringPtr; + int length; + char *bytes; + + SetStringFromAny(NULL, objPtr); + + /* + * TEMPORARY!!! This is terribly inefficient, but it works, and Don + * needs for me to check this stuff in ASAP. -Melissa + */ + + UpdateStringOfString(objPtr); + bytes = Tcl_GetStringFromObj(appendObjPtr, &length); + AppendUtfToUtfRep(objPtr, bytes, length); + return; + + /* + * If objPtr has a valid Unicode rep, then get a Unicode string + * from appendObjPtr and append it. + */ + + stringPtr = GET_STRING(objPtr); + if (stringPtr->allocated > 0) { + + /* + * If appendObjPtr is not of the "String" type, don't convert it. + */ + + if (appendObjPtr->typePtr == &tclStringType) { + stringPtr = GET_STRING(appendObjPtr); + if ((stringPtr->numChars == -1) + || (stringPtr->uallocated == 0)) { + + /* + * If appendObjPtr is a string obj with no valide Unicode + * rep, then fill its unicode rep. + */ + + FillUnicodeRep(appendObjPtr); + stringPtr = GET_STRING(appendObjPtr); + } + AppendUnicodeToUnicodeRep(objPtr, stringPtr->unicode, + stringPtr->numChars); + } else { + bytes = Tcl_GetStringFromObj(appendObjPtr, &length); + AppendUtfToUnicodeRep(objPtr, bytes, length); + } + return; + } + + /* + * Append to objPtr's UTF string rep. + */ + + bytes = Tcl_GetStringFromObj(appendObjPtr, &length); + AppendUtfToUtfRep(objPtr, bytes, length); +} + +/* + *---------------------------------------------------------------------- + * + * AppendUnicodeToUnicodeRep -- + * + * This procedure appends the contents of "unicode" to the Unicode + * rep of "objPtr". objPtr must already have a valid Unicode rep. + * + * Results: + * None. + * + * Side effects: + * objPtr's internal rep is reallocated. + * + *---------------------------------------------------------------------- + */ + +static void +AppendUnicodeToUnicodeRep(objPtr, unicode, appendNumChars) + Tcl_Obj *objPtr; /* Points to the object to append to. */ + Tcl_UniChar *unicode; /* String to append. */ + int appendNumChars; /* Number of chars of "unicode" to append. */ +{ + String *stringPtr; + int numChars; + size_t newSize; + + if (appendNumChars == 0) { + return; + } + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + + /* + * Make the buffer big enough for the result. + */ + + numChars = stringPtr->numChars + appendNumChars; + newSize = (numChars + 1) * sizeof(Tcl_UniChar); + + if (newSize > stringPtr->uallocated) { + stringPtr->uallocated = newSize * 2; + stringPtr = (String *) ckrealloc((char*)stringPtr, + STRING_SIZE(stringPtr->uallocated)); + SET_STRING(objPtr, stringPtr); + } + + /* + * Copy the new string onto the end of the old string, then add the + * trailing null. + */ + + memcpy((VOID*) (stringPtr->unicode + stringPtr->numChars), unicode, + appendNumChars * sizeof(Tcl_UniChar)); + stringPtr->unicode[numChars] = 0; + stringPtr->numChars = numChars; + + SET_STRING(objPtr, stringPtr); + Tcl_InvalidateStringRep(objPtr); +} + +/* + *---------------------------------------------------------------------- + * + * AppendUnicodeToUtfRep -- + * + * This procedure converts the contents of "unicode" to UTF and + * appends the UTF to the string rep of "objPtr". + * + * Results: + * None. + * + * Side effects: + * objPtr's internal rep is reallocated. + * + *---------------------------------------------------------------------- + */ + +static void +AppendUnicodeToUtfRep(objPtr, unicode, numChars) + Tcl_Obj *objPtr; /* Points to the object to append to. */ + Tcl_UniChar *unicode; /* String to convert to UTF. */ + int numChars; /* Number of chars of "unicode" to convert. */ +{ + Tcl_DString dsPtr; + int length = numChars * sizeof(Tcl_UniChar); + char *bytes; + + if (numChars == 0) { + return; + } + + Tcl_DStringInit(&dsPtr); + bytes = (char *)Tcl_UniCharToUtfDString(unicode, numChars, &dsPtr); + AppendUtfToUtfRep(objPtr, bytes, Tcl_DStringLength(&dsPtr)); + Tcl_DStringFree(&dsPtr); +} + +/* + *---------------------------------------------------------------------- + * + * AppendUtfToUnicodeRep -- + * + * This procedure converts the contents of "bytes" to Unicode and + * appends the Unicode to the Unicode rep of "objPtr". objPtr must + * already have a valid Unicode rep. + * + * Results: + * None. + * + * Side effects: + * objPtr's internal rep is reallocated. + * + *---------------------------------------------------------------------- + */ + +static void +AppendUtfToUnicodeRep(objPtr, bytes, numBytes) + Tcl_Obj *objPtr; /* Points to the object to append to. */ + char *bytes; /* String to convert to Unicode. */ + int numBytes; /* Number of bytes of "bytes" to convert. */ +{ + Tcl_DString dsPtr; + int numChars; + Tcl_UniChar *unicode; + + if (numBytes < 0) { + numBytes = (bytes ? strlen(bytes) : 0); + } + if (numBytes == 0) { + return; + } + + Tcl_DStringInit(&dsPtr); + numChars = Tcl_NumUtfChars(bytes, numBytes); + unicode = (Tcl_UniChar *)Tcl_UtfToUniCharDString(bytes, numBytes, &dsPtr); + AppendUnicodeToUnicodeRep(objPtr, unicode, numChars); + Tcl_DStringFree(&dsPtr); +} + +/* + *---------------------------------------------------------------------- + * + * AppendUtfToUtfRep -- + * + * This procedure appends "numBytes" bytes of "bytes" to the UTF string + * rep of "objPtr". objPtr must already have a valid String rep. + * + * Results: + * None. + * + * Side effects: + * objPtr's internal rep is reallocated. + * + *---------------------------------------------------------------------- + */ + +static void +AppendUtfToUtfRep(objPtr, bytes, numBytes) + Tcl_Obj *objPtr; /* Points to the object to append to. */ + char *bytes; /* String to append. */ + int numBytes; /* Number of bytes of "bytes" to append. */ +{ + String *stringPtr; + int newLength, oldLength; + + if (numBytes < 0) { + numBytes = (bytes ? strlen(bytes) : 0); + } + if (numBytes == 0) { + return; + } + + /* + * Copy the new string onto the end of the old string, then add the + * trailing null. + */ + + oldLength = objPtr->length; + newLength = numBytes + oldLength; + + stringPtr = GET_STRING(objPtr); + if (newLength > stringPtr->allocated) { + + /* + * There isn't currently enough space in the string + * representation so allocate additional space. If the current + * string representation isn't empty (i.e. it looks like we're + * doing a series of appends) then overallocate the space so + * that we won't have to do as much reallocation in the future. + */ + + Tcl_SetObjLength(objPtr, + (oldLength == 0) ? newLength : 2*newLength); + } else { + + /* + * Invalidate the unicode data. + */ + + stringPtr->numChars = -1; + stringPtr->uallocated = 0; + } + + memcpy((VOID *) (objPtr->bytes + oldLength), (VOID *) bytes, + (size_t) numBytes); + objPtr->bytes[newLength] = 0; + objPtr->length = newLength; } /* @@ -409,6 +1195,7 @@ Tcl_AppendStringsToObjVA (objPtr, argList) Tcl_Obj *objPtr; /* Points to the object to append to. */ va_list argList; /* Variable argument list. */ { + String *stringPtr; va_list tmpArgList; int newLength, oldLength; register char *string, *dst; @@ -416,9 +1203,8 @@ Tcl_AppendStringsToObjVA (objPtr, argList) if (Tcl_IsShared(objPtr)) { panic("Tcl_AppendStringsToObj called with shared object"); } - if (objPtr->typePtr != &tclStringType) { - ConvertToStringType(objPtr); - } + + SetStringFromAny(NULL, objPtr); /* * Figure out how much space is needed for all the strings, and @@ -440,7 +1226,9 @@ Tcl_AppendStringsToObjVA (objPtr, argList) return; } - if ((long)newLength > objPtr->internalRep.longValue) { + stringPtr = GET_STRING(objPtr); + if (newLength > stringPtr->allocated) { + /* * There isn't currently enough space in the string * representation so allocate additional space. If the current @@ -514,45 +1302,64 @@ Tcl_AppendStringsToObj TCL_VARARGS_DEF(Tcl_Obj *,arg1) } /* - *---------------------------------------------------------------------- + *--------------------------------------------------------------------------- * - * ConvertToStringType -- + * FillUnicodeRep -- * - * This procedure converts the internal representation of an object - * to "expandable string" type. + * Populate the Unicode internal rep with the Unicode form of its string + * rep. The object must alread have a "String" internal rep. * * Results: * None. * * Side effects: - * Any old internal reputation for objPtr is freed and the - * internal representation is set to that for an expandable string - * (the field internalRep.longValue holds 1 less than the allocated - * length of objPtr's string representation). + * Reallocates the String internal rep. * - *---------------------------------------------------------------------- + *--------------------------------------------------------------------------- */ static void -ConvertToStringType(objPtr) - register Tcl_Obj *objPtr; /* Pointer to object. Must have a - * typePtr that isn't &tclStringType. */ +FillUnicodeRep(objPtr) + Tcl_Obj *objPtr; /* The object in which to fill the unicode rep. */ { - if (objPtr->typePtr != NULL) { - if (objPtr->bytes == NULL) { - objPtr->typePtr->updateStringProc(objPtr); - } - if (objPtr->typePtr->freeIntRepProc != NULL) { - objPtr->typePtr->freeIntRepProc(objPtr); - } + String *stringPtr; + size_t uallocated; + char *src, *srcEnd; + Tcl_UniChar *dst; + src = objPtr->bytes; + + stringPtr = GET_STRING(objPtr); + if (stringPtr->numChars == -1) { + stringPtr->numChars = Tcl_NumUtfChars(src, objPtr->length); } - objPtr->typePtr = &tclStringType; - if (objPtr->bytes != NULL) { - objPtr->internalRep.longValue = (long)objPtr->length; - } else { - objPtr->internalRep.longValue = 0; - objPtr->length = 0; + + uallocated = stringPtr->numChars * sizeof(Tcl_UniChar); + if (uallocated > stringPtr->uallocated) { + + /* + * If not enought space has been allocated for the unicode rep, + * reallocate the internal rep object with double the amount of + * space needed, so the unicode string can grow without being + * reallocated. + */ + + uallocated *= 2; + stringPtr = (String *) ckrealloc((char*) stringPtr, + STRING_SIZE(uallocated)); + stringPtr->uallocated = uallocated; + } + + /* + * Convert src to Unicode and store the coverted data in "unicode". + */ + + srcEnd = src + objPtr->length; + for (dst = stringPtr->unicode; src < srcEnd; dst++) { + src += Tcl_UtfToUniChar(src, dst); } + *dst = 0; + + SET_STRING(objPtr, stringPtr); } /* @@ -581,13 +1388,40 @@ DupStringInternalRep(srcPtr, copyPtr) register Tcl_Obj *copyPtr; /* Object with internal rep to set. Must * not currently have an internal rep.*/ { + String *srcStringPtr = GET_STRING(srcPtr); + String *copyStringPtr; + + /* + * If the src obj is a string of 1-byte Utf chars, then copy the + * string rep of the source object and create an "empty" Unicode + * internal rep for the new object. Otherwise, copy Unicode + * internal rep, and invalidate the string rep of the new object. + */ + + if (srcStringPtr->numChars == srcPtr->length) { + copyStringPtr = (String *) ckalloc(STRING_SIZE(0)); + copyStringPtr->uallocated = 0; + } else { + copyStringPtr = (String *) ckalloc( + STRING_SIZE(srcStringPtr->uallocated)); + copyStringPtr->uallocated = srcStringPtr->uallocated; + + memcpy((VOID *) copyStringPtr->unicode, + (VOID *) srcStringPtr->unicode, + (size_t) srcStringPtr->numChars * sizeof(Tcl_UniChar)); + copyStringPtr->unicode[srcStringPtr->numChars] = 0; + } + copyStringPtr->numChars = srcStringPtr->numChars; + /* * Tricky point: the string value was copied by generic object * management code, so it doesn't contain any extra bytes that * might exist in the source object. */ - copyPtr->internalRep.longValue = (long)copyPtr->length; + copyStringPtr->allocated = copyPtr->length; + + SET_STRING(copyPtr, copyStringPtr); copyPtr->typePtr = &tclStringType; } @@ -596,15 +1430,14 @@ DupStringInternalRep(srcPtr, copyPtr) * * SetStringFromAny -- * - * Create an internal representation of type "expandable string" - * for an object. + * Create an internal representation of type "String" for an object. * * Results: * This operation always succeeds and returns TCL_OK. * * Side effects: - * This procedure does nothing; there is no advantage in converting - * the internal representation now, so we just defer it. + * Any old internal reputation for objPtr is freed and the + * internal representation is set to "String". * *---------------------------------------------------------------------- */ @@ -614,6 +1447,42 @@ SetStringFromAny(interp, objPtr) Tcl_Interp *interp; /* Used for error reporting if not NULL. */ Tcl_Obj *objPtr; /* The object to convert. */ { + String *stringPtr; + + /* + * The Unicode object is opitmized for the case where each UTF char + * in a string is only one byte. In this case, we store the value of + * numChars, but we don't copy the bytes to the unicodeObj->unicode. + */ + + if (objPtr->typePtr != &tclStringType) { + + if (objPtr->typePtr != NULL) { + if (objPtr->bytes == NULL) { + objPtr->typePtr->updateStringProc(objPtr); + } + if ((objPtr->typePtr->freeIntRepProc) != NULL) { + (*objPtr->typePtr->freeIntRepProc)(objPtr); + } + } + objPtr->typePtr = &tclStringType; + + /* + * Allocate enough space for the basic String structure. + */ + + stringPtr = (String *) ckalloc(STRING_SIZE(0)); + stringPtr->numChars = -1; + stringPtr->uallocated = 0; + + if (objPtr->bytes != NULL) { + stringPtr->allocated = objPtr->length; + objPtr->bytes[objPtr->length] = 0; + } else { + objPtr->length = 0; + } + SET_STRING(objPtr, stringPtr); + } return TCL_OK; } @@ -623,13 +1492,14 @@ SetStringFromAny(interp, objPtr) * UpdateStringOfString -- * * Update the string representation for an object whose internal - * representation is "expandable string". + * representation is "String". * * Results: * None. * * Side effects: - * None. + * The object's string may be set by converting its Unicode + * represention to UTF format. * *---------------------------------------------------------------------- */ @@ -638,16 +1508,73 @@ static void UpdateStringOfString(objPtr) Tcl_Obj *objPtr; /* Object with string rep to update. */ { - /* - * The string is almost always valid already, in which case there's - * nothing for us to do. The only case we have to worry about is if - * the object is totally null. In this case, set the string rep to - * an empty string. - */ + int i, length, size; + Tcl_UniChar *unicode; + char dummy[TCL_UTF_MAX]; + char *dst; + String *stringPtr; - if (objPtr->bytes == NULL) { - objPtr->bytes = tclEmptyStringRep; - objPtr->length = 0; + stringPtr = GET_STRING(objPtr); + if ((objPtr->bytes == NULL) || (stringPtr->allocated == 0)) { + + if (stringPtr->numChars <= 0) { + + /* + * If there is no Unicode rep, or the string has 0 chars, + * then set the string rep to an empty string. + */ + + objPtr->bytes = tclEmptyStringRep; + objPtr->length = 0; + return; + } + + unicode = stringPtr->unicode; + length = stringPtr->numChars * sizeof(Tcl_UniChar); + + /* + * Translate the Unicode string to UTF. "size" will hold the + * amount of space the UTF string needs. + */ + + size = 0; + for (i = 0; i < stringPtr->numChars; i++) { + size += Tcl_UniCharToUtf((int) unicode[i], dummy); + } + + dst = (char *) ckalloc((unsigned) (size + 1)); + objPtr->bytes = dst; + objPtr->length = size; + stringPtr->allocated = size; + + for (i = 0; i < stringPtr->numChars; i++) { + dst += Tcl_UniCharToUtf(unicode[i], dst); + } + *dst = '\0'; } return; } + +/* + *---------------------------------------------------------------------- + * + * FreeStringInternalRep -- + * + * Deallocate the storage associated with a String data object's + * internal representation. + * + * Results: + * None. + * + * Side effects: + * Frees memory. + * + *---------------------------------------------------------------------- + */ + +static void +FreeStringInternalRep(objPtr) + Tcl_Obj *objPtr; /* Object with internal rep to free. */ +{ + ckfree((char *) GET_STRING(objPtr)); +} diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 9a3b4f3..a6f7e45 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclStubInit.c,v 1.17 1999/06/10 04:28:51 stanton Exp $ + * RCS: @(#) $Id: tclStubInit.c,v 1.18 1999/06/15 01:16:25 hershey Exp $ */ #include "tclInt.h" @@ -758,6 +758,13 @@ TclStubs tclStubs = { Tcl_UniCharIsPunct, /* 375 */ Tcl_RegExpMatchObj, /* 376 */ Tcl_RegExpGetInfo, /* 377 */ + Tcl_NewUnicodeObj, /* 378 */ + Tcl_SetUnicodeObj, /* 379 */ + Tcl_GetCharLength, /* 380 */ + Tcl_GetUniChar, /* 381 */ + Tcl_GetUnicode, /* 382 */ + Tcl_GetRange, /* 383 */ + Tcl_AppendUnicodeToObj, /* 384 */ }; /* !END!: Do not edit above this line. */ diff --git a/generic/tclUnicodeObj.c b/generic/tclUnicodeObj.c deleted file mode 100644 index 1809b20..0000000 --- a/generic/tclUnicodeObj.c +++ /dev/null @@ -1,882 +0,0 @@ -/* - * tclUnicodeObj.c -- - * - * This file contains the implementation of the Unicode internal - * representation of Tcl objects. - * - * Copyright (c) 1999 by Scriptics Corporation. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: tclUnicodeObj.c,v 1.6 1999/06/10 19:14:54 hershey Exp $ - */ - -#include -#include "tclInt.h" -#include "tclPort.h" - -/* - * Prototypes for local procedures defined in this file: - */ - -static int AllSingleByteChars _ANSI_ARGS_((Tcl_Obj *objPtr)); -static void AppendUniCharStrToObj _ANSI_ARGS_((Tcl_Obj *objPtr, - Tcl_UniChar *unichars, int numNewChars)); -static void DupUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr, - Tcl_Obj *copyPtr)); -static void FreeUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr)); -static void UpdateStringOfUnicode _ANSI_ARGS_((Tcl_Obj *objPtr)); -static void SetOptUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr, - int numChars)); -static void SetFullUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr, - char *src, int numBytes, int numChars)); -static int SetUnicodeFromAny _ANSI_ARGS_((Tcl_Interp *interp, - Tcl_Obj *objPtr)); - -/* - * The following object type represents a Unicode string. A Unicode string - * is an internationalized string. Conceptually, a Unicode string is an - * array of 16-bit quantities organized as a sequence of properly formed - * UTF-8 characters. There is a one-to-one map between Unicode and UTF - * characters. The Unicode ojbect is opitmized for the case where each UTF - * char in a string is only one byte. In this case, we store the value of - * numChars, but we don't copy the bytes to the unicodeObj->chars. Before - * accessing obj->chars, check if unicodeObj->numChars == obj->length. - */ - -Tcl_ObjType tclUnicodeType = { - "unicode", - FreeUnicodeInternalRep, - DupUnicodeInternalRep, - UpdateStringOfUnicode, - SetUnicodeFromAny -}; - -/* - * The following structure is the internal rep for a Unicode object. - * Keeps track of how much memory has been used and how much has been - * allocated for the Unicode to enable growing and shrinking of the - * Unicode object with fewer mallocs. - */ - -typedef struct Unicode { - int numChars; /* The number of chars in the unicode - * string. */ - size_t allocated; /* The amount of space actually allocated. */ - Tcl_UniChar chars[2]; /* The array of chars. The actual size of - * this field depends on the 'allocated' field - * above. */ -} Unicode; - -#define UNICODE_SIZE(len) \ - ((unsigned) (sizeof(Unicode) - (sizeof(Tcl_UniChar)*2) + (len))) -#define GET_UNICODE(objPtr) \ - ((Unicode *) (objPtr)->internalRep.otherValuePtr) -#define SET_UNICODE(objPtr, unicodePtr) \ - (objPtr)->internalRep.otherValuePtr = (VOID *) (unicodePtr) - - -/* - *---------------------------------------------------------------------- - * - * TclGetUnicodeFromObj -- - * - * Get the index'th Unicode character from the Unicode object. If - * the object is not already a Unicode object, an attempt will be - * made to convert it to one. The index is assumed to be in the - * appropriate range. - * - * Results: - * Returns a pointer to the object's internal unicode string. - * - * Side effects: - * Converts the object to have the Unicode internal rep. - * - *---------------------------------------------------------------------- - */ - -Tcl_UniChar * -TclGetUnicodeFromObj(objPtr) - Tcl_Obj *objPtr; /* The object to find the unicode string for. */ -{ - Unicode *unicodePtr; - int numBytes; - char *src; - - SetUnicodeFromAny(NULL, objPtr); - unicodePtr = GET_UNICODE(objPtr); - - if (unicodePtr->allocated == 0) { - - /* - * If all of the characters in the Utf string are 1 byte chars, - * we don't normally store the unicode str. Since this - * function must return a unicode string, and one has not yet - * been stored, force the Unicode to be calculated and stored - * now. - */ - - src = Tcl_GetStringFromObj(objPtr, &numBytes); - SetFullUnicodeFromAny(objPtr, src, numBytes, unicodePtr->numChars); - - /* - * We need to fetch the pointer again because we have just - * reallocated the structure to make room for the Unicode data. - */ - - unicodePtr = GET_UNICODE(objPtr); - } - return unicodePtr->chars; -} - -/* - *---------------------------------------------------------------------- - * - * TclGetUnicodeLengthFromObj -- - * - * Get the length of the Unicode string from the Tcl object. If - * the object is not already a Unicode object, an attempt will be - * made to convert it to one. - * - * Results: - * Pointer to unicode string representing the unicode object. - * - * Side effects: - * Frees old internal rep. Allocates memory for new internal rep. - * - *---------------------------------------------------------------------- - */ - -int -TclGetUnicodeLengthFromObj(objPtr) - Tcl_Obj *objPtr; /* The Unicode object. */ -{ - int length; - Unicode *unicodePtr; - - SetUnicodeFromAny(NULL, objPtr); - unicodePtr = GET_UNICODE(objPtr); - - length = unicodePtr->numChars; - return length; -} - -/* - *---------------------------------------------------------------------- - * - * TclGetUniCharFromObj -- - * - * Get the index'th Unicode character from the Unicode object. If - * the object is not already a Unicode object, an attempt will be - * made to convert it to one. The index is assumed to be in the - * appropriate range. - * - * Results: - * Returns the index'th Unicode character in the Object. - * - * Side effects: - * Fills unichar with the index'th Unicode character. - * - *---------------------------------------------------------------------- - */ - -Tcl_UniChar -TclGetUniCharFromObj(objPtr, index) - Tcl_Obj *objPtr; /* The Unicode object. */ - int index; /* Get the index'th character. */ -{ - Tcl_UniChar unichar; - Unicode *unicodePtr; - int length; - - SetUnicodeFromAny(NULL, objPtr); - unicodePtr = GET_UNICODE(objPtr); - length = objPtr->length; - - if (AllSingleByteChars(objPtr)) { - int length; - char *str; - - /* - * All of the characters in the Utf string are 1 byte chars, - * so we don't store the unicode char. We get the Utf string - * and convert the index'th byte to a Unicode character. - */ - - str = Tcl_GetStringFromObj(objPtr, &length); - Tcl_UtfToUniChar(&str[index], &unichar); - } else { - unichar = unicodePtr->chars[index]; - } - return unichar; -} - -/* - *---------------------------------------------------------------------- - * - * TclGetRangeFromObj -- - * - * Create a Tcl Object that contains the chars between first and last - * of the object indicated by "objPtr". If the object is not already - * a Unicode object, an attempt will be made to convert it to one. - * The first and last indices are assumed to be in the appropriate - * range. - * - * Results: - * Returns a new Tcl Object of either "string" or "unicode" type, - * containing the range of chars. - * - * Side effects: - * Changes the internal rep of "objPtr" to unicode. - * - *---------------------------------------------------------------------- - */ - -Tcl_Obj* -TclGetRangeFromObj(objPtr, first, last) - - Tcl_Obj *objPtr; /* The Tcl object to find the range of. */ - int first; /* First index of the range. */ - int last; /* Last index of the range. */ -{ - Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ - Unicode *unicodePtr; - int length; - - SetUnicodeFromAny(NULL, objPtr); - unicodePtr = GET_UNICODE(objPtr); - length = objPtr->length; - - if (unicodePtr->numChars != length) { - newObjPtr = TclNewUnicodeObj(unicodePtr->chars + first, last-first+1); - } else { - int length; - char *str; - - /* - * All of the characters in the Utf string are 1 byte chars, - * so we don't store the unicode char. Create a new string - * object containing the specified range of chars. - */ - - str = Tcl_GetStringFromObj(objPtr, &length); - newObjPtr = Tcl_NewStringObj(&str[first], last-first+1); - } - return newObjPtr; -} - -/* - *---------------------------------------------------------------------- - * - * TclAppendObjToUnicodeObj -- - * - * This procedure appends the contents of "srcObjPtr" to the Unicode - * object "destPtr". - * - * Results: - * None. - * - * Side effects: - * If srcObjPtr doesn't have an internal rep, then it is given a - * Unicode internal rep. - * - *---------------------------------------------------------------------- - */ - -Tcl_Obj * -TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr) - register Tcl_Obj *targetObjPtr; /* Points to the object to - * append to. */ - register Tcl_Obj *srcObjPtr; /* Points to the object to - * append from. */ -{ - int numBytes, numChars; - Tcl_Obj *resultObjPtr; - char *utfSrcStr; - Tcl_UniChar *unicharSrcStr; - Unicode *unicodePtr; - Tcl_DString dsPtr; - - /* - * Duplicate the target if it is shared. - * Change the result's internal rep to Unicode object. - */ - - if (Tcl_IsShared(targetObjPtr)) { - resultObjPtr = Tcl_DuplicateObj(targetObjPtr); - } else { - resultObjPtr = targetObjPtr; - } - SetUnicodeFromAny(NULL, resultObjPtr); - - /* - * Case where target chars are 1 byte long: - * If src obj is of "string" or null type, then convert it to "unicode" - * type. Src objs of other types (such as int) are left in tact to keep - * them from shimmering between types. If the src obj is a unichar obj, - * and all src chars are also 1 byte long, the src string is appended to - * the target "unicode" obj, and the target obj maintains its "optimized" - * status. - */ - - if (AllSingleByteChars(resultObjPtr)) { - - int length; - char *stringRep; - - if (srcObjPtr->typePtr == &tclStringType - || srcObjPtr->typePtr == NULL) { - SetUnicodeFromAny(NULL, srcObjPtr); - } - - stringRep = Tcl_GetStringFromObj(srcObjPtr, &length); - Tcl_AppendToObj(resultObjPtr, stringRep, length); - - if ((srcObjPtr->typePtr == &tclUnicodeType) - && (AllSingleByteChars(srcObjPtr))) { - SetOptUnicodeFromAny(resultObjPtr, resultObjPtr->length); - } - return resultObjPtr; - } - - /* - * Extract a unicode string from "unicode" or "string" type objects. - * Extract the utf string from non-unicode objects, and convert the - * utf string to unichar string locally. - * If the src obj is a "string" obj, convert it to "unicode" type. - * Src objs of other types (such as int) are left in tact to keep - * them from shimmering between types. - */ - - Tcl_DStringInit(&dsPtr); - if (srcObjPtr->typePtr == &tclStringType || srcObjPtr->typePtr == NULL) { - SetUnicodeFromAny(NULL, srcObjPtr); - } - if (srcObjPtr->typePtr == &tclUnicodeType) { - if (AllSingleByteChars(srcObjPtr)) { - - unicodePtr = GET_UNICODE(srcObjPtr); - numChars = unicodePtr->numChars; - - utfSrcStr = Tcl_GetStringFromObj(srcObjPtr, &numBytes); - unicharSrcStr = (Tcl_UniChar *)Tcl_UtfToUniCharDString(utfSrcStr, - numBytes, &dsPtr); - } else { - unicodePtr = GET_UNICODE(srcObjPtr); - numChars = unicodePtr->numChars; - unicharSrcStr = unicodePtr->chars; - } - } else { - utfSrcStr = Tcl_GetStringFromObj(srcObjPtr, &numBytes); - numChars = Tcl_NumUtfChars(utfSrcStr, numBytes); - unicharSrcStr = (Tcl_UniChar *)Tcl_UtfToUniCharDString(utfSrcStr, - numBytes, &dsPtr); - } - if (numChars == 0) { - return resultObjPtr; - } - - /* - * Append the unichar src string to the result object. - */ - - AppendUniCharStrToObj(resultObjPtr, unicharSrcStr, numChars); - Tcl_DStringFree(&dsPtr); - return resultObjPtr; -} - -/* - *---------------------------------------------------------------------- - * - * AppendUniCharStrToObj -- - * - * This procedure appends the contents of "srcObjPtr" to the - * Unicode object "objPtr". - * - * Results: - * None. - * - * Side effects: - * If srcObjPtr doesn't have an internal rep, then it is given a - * Unicode internal rep. - * - *---------------------------------------------------------------------- - */ - -static void -AppendUniCharStrToObj(objPtr, unichars, numNewChars) - register Tcl_Obj *objPtr; /* Points to the object to append to. */ - Tcl_UniChar *unichars; /* The unicode string to append to the - * object. */ - int numNewChars; /* Number of chars in "unichars". */ -{ - Unicode *unicodePtr; - int numChars; - size_t numBytes; - - SetUnicodeFromAny(NULL, objPtr); - unicodePtr = GET_UNICODE(objPtr); - - numChars = numNewChars + unicodePtr->numChars; - numBytes = (numChars + 1) * sizeof(Tcl_UniChar); - - if (unicodePtr->allocated < numBytes) { - int allocatedBytes = numBytes * 2; - - /* - * There isn't currently enough space in the Unicode - * representation so allocate additional space. In fact, - * overallocate so that there is room for future growth without - * having to reallocate again. - */ - - unicodePtr = (Unicode *) ckrealloc((char*) unicodePtr, - UNICODE_SIZE(allocatedBytes)); - unicodePtr->allocated = allocatedBytes; - unicodePtr = SET_UNICODE(objPtr, unicodePtr); - } - memcpy((VOID *) (unicodePtr->chars + unicodePtr->numChars), - (VOID *) unichars, (size_t) numNewChars * sizeof(Tcl_UniChar)); - unicodePtr->chars[numChars] = 0; - unicodePtr->numChars = numChars; - - /* - * Invalidate the StringRep. - */ - - Tcl_InvalidateStringRep(objPtr); -} - -/* - *---------------------------------------------------------------------- - * - * TclAppendUnicodeToObj -- - * - * This procedure appends a Unicode string to an object in the - * most efficient manner possible. - * - * Results: - * None. - * - * Side effects: - * Invalidates the string rep and creates a new Unicode string. - * - *---------------------------------------------------------------------- - */ - -void -TclAppendUnicodeToObj(objPtr, unichars, length) - register Tcl_Obj *objPtr; /* Points to the object to append to. */ - Tcl_UniChar *unichars; /* The unicode string to append to the - * object. */ - int length; /* Number of chars in "unichars". */ -{ - Unicode *unicodePtr; - int numChars, i; - size_t newSize; - char *src; - Tcl_UniChar *dst; - - if (Tcl_IsShared(objPtr)) { - panic("TclAppendUnicodeToObj called with shared object"); - } - - SetUnicodeFromAny(NULL, objPtr); - unicodePtr = GET_UNICODE(objPtr); - - /* - * Make the buffer big enough for the result. - */ - - numChars = unicodePtr->numChars + length; - newSize = (numChars + 1) * sizeof(Tcl_UniChar); - - if (newSize > unicodePtr->allocated) { - int allocated = newSize * 2; - - unicodePtr = (Unicode *) ckrealloc((char*)unicodePtr, - UNICODE_SIZE(allocated)); - - if (unicodePtr->allocated == 0) { - /* - * If the original string was not in Unicode form, add it to the - * beginning of the buffer. - */ - - src = objPtr->bytes; - dst = unicodePtr->chars; - for (i = 0; i < unicodePtr->numChars; i++) { - src += Tcl_UtfToUniChar(src, dst++); - } - } - unicodePtr->allocated = allocated; - } - - /* - * Copy the new string onto the end of the old string, then add the - * trailing null. - */ - - memcpy((VOID*) (unicodePtr->chars + unicodePtr->numChars), unichars, - length * sizeof(Tcl_UniChar)); - unicodePtr->numChars = numChars; - unicodePtr->chars[numChars] = 0; - - SET_UNICODE(objPtr, unicodePtr); - - Tcl_InvalidateStringRep(objPtr); -} - -/* - *--------------------------------------------------------------------------- - * - * TclNewUnicodeObj -- - * - * This procedure is creates a new Unicode object and initializes - * it from the given Utf String. If the Utf String is the same size - * as the Unicode string, don't duplicate the data. - * - * Results: - * The newly created object is returned. This object will have no - * initial string representation. The returned object has a ref count - * of 0. - * - * Side effects: - * Memory allocated for new object and copy of Unicode argument. - * - *--------------------------------------------------------------------------- - */ - -Tcl_Obj * -TclNewUnicodeObj(unichars, numChars) - Tcl_UniChar *unichars; /* The unicode string used to initialize - * the new object. */ - int numChars; /* Number of characters in the unicode - * string. */ -{ - Tcl_Obj *objPtr; - Unicode *unicodePtr; - int numBytes, allocated; - - numBytes = numChars * sizeof(Tcl_UniChar); - - /* - * Allocate extra space for the null character - */ - - allocated = numBytes + sizeof(Tcl_UniChar); - - TclNewObj(objPtr); - objPtr->bytes = NULL; - objPtr->typePtr = &tclUnicodeType; - - unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(allocated)); - unicodePtr->numChars = numChars; - unicodePtr->allocated = allocated; - memcpy((VOID *) unicodePtr->chars, (VOID *) unichars, (size_t) numBytes); - unicodePtr->chars[numChars] = 0; - SET_UNICODE(objPtr, unicodePtr); - return objPtr; -} - -/* - *--------------------------------------------------------------------------- - * - * TclAllSingleByteChars -- - * - * Initialize the internal representation of a Unicode Tcl_Obj - * to a copy of the internal representation of an existing Unicode - * object. - * - * Results: - * None. - * - * Side effects: - * Allocates memory. - * - *--------------------------------------------------------------------------- - */ - -static int -AllSingleByteChars(objPtr) - Tcl_Obj *objPtr; /* Object whose char lengths to check. */ -{ - Unicode *unicodePtr; - int numBytes, numChars; - - unicodePtr = GET_UNICODE(objPtr); - numChars = unicodePtr->numChars; - numBytes = objPtr->length; - - if (numChars == numBytes) { - return 1; - } else { - return 0; - } -} - -/* - *--------------------------------------------------------------------------- - * - * DupUnicodeInternalRep -- - * - * Initialize the internal representation of a Unicode Tcl_Obj - * to a copy of the internal representation of an existing Unicode - * object. - * - * Results: - * None. - * - * Side effects: - * Allocates memory. - * - *--------------------------------------------------------------------------- - */ - -static void -DupUnicodeInternalRep(srcPtr, copyPtr) - Tcl_Obj *srcPtr; /* Object with internal rep to copy. */ - Tcl_Obj *copyPtr; /* Object with internal rep to set. */ -{ - Unicode *srcUnicodePtr = GET_UNICODE(srcPtr); - Unicode *copyUnicodePtr; - - /* - * If the src obj is a string of 1-byte Utf chars, then copy the - * string rep of the source object and create an "empty" Unicode - * internal rep for the new object. Otherwise, copy Unicode - * internal rep, and invalidate the string rep of the new object. - */ - - if (srcUnicodePtr->numChars == srcPtr->length) { - copyUnicodePtr = (Unicode *) ckalloc(sizeof(Unicode)); - copyUnicodePtr->allocated = 0; - } else { - int allocated = srcUnicodePtr->allocated; - - copyUnicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(allocated)); - - copyUnicodePtr->allocated = allocated; - memcpy((VOID *) copyUnicodePtr->chars, - (VOID *) srcUnicodePtr->chars, - (size_t) (srcUnicodePtr->numChars + 1) * sizeof(Tcl_UniChar)); - } - copyUnicodePtr->numChars = srcUnicodePtr->numChars; - SET_UNICODE(copyPtr, copyUnicodePtr); - copyPtr->typePtr = &tclUnicodeType; -} - -/* - *--------------------------------------------------------------------------- - * - * UpdateStringOfUnicode -- - * - * Update the string representation for a Unicode data object. - * Note: This procedure does not invalidate an existing old string rep - * so storage will be lost if this has not already been done. - * - * Results: - * None. - * - * Side effects: - * The object's string is set to a valid string that results from - * the Unicode-to-string conversion. - * - * The object becomes a string object -- the internal rep is - * discarded and the typePtr becomes NULL. - * - *--------------------------------------------------------------------------- - */ - -static void -UpdateStringOfUnicode(objPtr) - Tcl_Obj *objPtr; /* Unicode object whose string rep to - * update. */ -{ - int i, length, size; - Tcl_UniChar *src; - char dummy[TCL_UTF_MAX]; - char *dst; - Unicode *unicodePtr; - - unicodePtr = GET_UNICODE(objPtr); - src = unicodePtr->chars; - length = unicodePtr->numChars * sizeof(Tcl_UniChar); - - /* - * How much space will string rep need? - */ - - size = 0; - for (i = 0; i < unicodePtr->numChars; i++) { - size += Tcl_UniCharToUtf((int) src[i], dummy); - } - - dst = (char *) ckalloc((unsigned) (size + 1)); - objPtr->bytes = dst; - objPtr->length = size; - - for (i = 0; i < unicodePtr->numChars; i++) { - dst += Tcl_UniCharToUtf(src[i], dst); - } - *dst = '\0'; -} - -/* - *--------------------------------------------------------------------------- - * - * SetOptUnicodeFromAny -- - * - * Generate the optimized Unicode internal rep from the string rep. - * - * Results: - * None. - * - * Side effects: - * The Unicode ojbect is opitmized for the case where each UTF char in - * a string is only one byte. In this case, we store the value of - * numChars, but we don't copy the bytes to the unicodeObj->chars. - * Before accessing obj->chars, check if all chars are 1 byte long. - * - *--------------------------------------------------------------------------- - */ - -static void -SetOptUnicodeFromAny(objPtr, numChars) - Tcl_Obj *objPtr; /* The object to convert to type Unicode. */ - int numChars; -{ - Tcl_ObjType *typePtr; - Unicode *unicodePtr; - - typePtr = objPtr->typePtr; - if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) { - (*typePtr->freeIntRepProc)(objPtr); - } - objPtr->typePtr = &tclUnicodeType; - - /* - * Allocate enough space for the basic Unicode structure. - */ - - unicodePtr = (Unicode *) ckalloc(sizeof(Unicode)); - unicodePtr->numChars = numChars; - unicodePtr->allocated = 0; - SET_UNICODE(objPtr, unicodePtr); -} - -/* - *--------------------------------------------------------------------------- - * - * SetFullUnicodeFromAny -- - * - * Generate the full (non-optimized) Unicode internal rep from the - * string rep. - * - * Results: - * None. - * - * Side effects: - * The Unicode internal rep will contain a copy of the string "src" in - * unicode format. - * - *--------------------------------------------------------------------------- - */ - -static void -SetFullUnicodeFromAny(objPtr, src, numBytes, numChars) - Tcl_Obj *objPtr; /* The object to convert to type Unicode. */ - char *src; - int numBytes; - int numChars; -{ - Tcl_ObjType *typePtr; - Unicode *unicodePtr; - char *srcEnd; - Tcl_UniChar *dst; - size_t length = (numChars + 1) * sizeof(Tcl_UniChar); - - unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(length)); - srcEnd = src + numBytes; - - for (dst = unicodePtr->chars; src < srcEnd; dst++) { - src += Tcl_UtfToUniChar(src, dst); - } - *dst = 0; - - unicodePtr->numChars = numChars; - unicodePtr->allocated = length; - - typePtr = objPtr->typePtr; - if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) { - (*typePtr->freeIntRepProc)(objPtr); - } - objPtr->typePtr = &tclUnicodeType; - SET_UNICODE(objPtr, unicodePtr); -} - -/* - *--------------------------------------------------------------------------- - * - * SetUnicodeFromAny -- - * - * Generate the Unicode internal rep from the string rep. - * - * Results: - * The return value is always TCL_OK. - * - * Side effects: - * A Unicode object is stored as the internal rep of objPtr. The Unicode - * object is opitmized for the case where each UTF char in a string is - * only one byte. In this case, we store the value of numChars, but we - * don't copy the bytes to the unicodeObj->chars. Before accessing - * obj->chars, check if all chars are 1 byte long. - * - *--------------------------------------------------------------------------- - */ - -static int -SetUnicodeFromAny(interp, objPtr) - Tcl_Interp *interp; /* Not used. */ - Tcl_Obj *objPtr; /* The object to convert to type Unicode. */ -{ - int numBytes, numChars; - char *src; - - if (objPtr->typePtr != &tclUnicodeType) { - src = Tcl_GetStringFromObj(objPtr, &numBytes); - - numChars = Tcl_NumUtfChars(src, numBytes); - if (numChars == numBytes) { - SetOptUnicodeFromAny(objPtr, numChars); - } else { - SetFullUnicodeFromAny(objPtr, src, numBytes, numChars); - } - } - return TCL_OK; -} - -/* - *---------------------------------------------------------------------- - * - * FreeUnicodeInternalRep -- - * - * Deallocate the storage associated with a Unicode data object's - * internal representation. - * - * Results: - * None. - * - * Side effects: - * Frees memory. - * - *---------------------------------------------------------------------- - */ - -static void -FreeUnicodeInternalRep(objPtr) - Tcl_Obj *objPtr; /* Object with internal rep to free. */ -{ - ckfree((char *) GET_UNICODE(objPtr)); -} diff --git a/tests/stringObj.test b/tests/stringObj.test index 33100e5..9b72e94 100644 --- a/tests/stringObj.test +++ b/tests/stringObj.test @@ -12,7 +12,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: stringObj.test,v 1.3 1999/04/16 00:47:35 stanton Exp $ +# RCS: @(#) $Id: stringObj.test,v 1.4 1999/06/15 01:16:27 hershey Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { source [file join [pwd] [file dirname [info script]] defs.tcl] @@ -190,6 +190,185 @@ test stringObj-8.1 {DupStringInternalRep procedure} { [teststringobj get 2] } {5 10 5 5 abcde} +test unicode-1.1 {TclGetUniCharFromObj with byte-size chars} { + string index "abcdefghi" 0 +} "a" +test unicode-1.2 {TclGetUniCharFromObj with byte-size chars} { + string index "abcdefghi" 3 +} "d" +test unicode-1.3 {TclGetUniCharFromObj with byte-size chars} { + string index "abcdefghi" end +} "i" +test unicode-1.4 {TclGetUniCharFromObj with mixed width chars} { + string index "ïa¿b®c®¿dï" 0 +} "ï" +test unicode-1.5 {TclGetUniCharFromObj} { + string index "ïa¿b®c®¿dï" 4 +} "®" +test unicode-1.6 {TclGetUniCharFromObj} { + string index "ïa¿b®cï¿d®" end +} "®" + +test unicode-2.1 {TclGetUnicodeLengthFromObj with byte-size chars} { + string length "" +} 0 +test unicode-2.2 {TclGetUnicodeLengthFromObj with byte-size chars} { + string length "a" +} 1 +test unicode-2.3 {TclGetUnicodeLengthFromObj with byte-size chars} { + string length "abcdef" +} 6 +test unicode-2.4 {TclGetUnicodeLengthFromObj with mixed width chars} { + string length "®" +} 1 +test unicode-2.5 {TclGetUnicodeLengthFromObj with mixed width chars} { + string length "○○" +} 6 +test unicode-2.6 {TclGetUnicodeLengthFromObj with mixed width chars} { + string length "ïa¿b®cï¿d®" +} 10 + +test unicode-3.1 {TclGetRangeFromObj with all byte-size chars} {testobj} { + set x "abcdef" + list [testobj objtype $x] [set y [string range $x 1 end-1]] \ + [testobj objtype $x] [testobj objtype $y] +} {none bcde string none} + +test unicode-3.2 {TclGetRangeFromObj with some mixed width chars} {testobj} { + set x "abcïïdef" + list [testobj objtype $x] [set y [string range $x 1 end-1]] \ + [testobj objtype $x] [testobj objtype $y] +} {none bcïïde string string} + +test unicode-4.1 {UpdateStringOfUnicode} {testobj} { + set x 2345 + list [string index $x end] [testobj objtype $x] [incr x] \ + [testobj objtype $x] +} {5 string 2346 int} + +test unicode-5.1 {SetUnicodeFromAny called with non-unicode obj} {testobj} { + set x 2345 + list [incr x] [testobj objtype $x] [string index $x end] \ + [testobj objtype $x] +} {2346 int 6 string} + +test unicode-5.2 {SetUnicodeFromAny called with unicode obj} {testobj} { + set x "abcdef" + list [string length $x] [testobj objtype $x] \ + [string length $x] [testobj objtype $x] +} {6 string 6 string} + +test unicode-6.1 {DupUnicodeInternalRep, mixed width chars} {testobj} { + set x abcï¿®ghi + string length $x + set y $x + list [testobj objtype $x] [testobj objtype $y] [append x "®¿ï"] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {string string abcï¿®ghi®¿ï abcï¿®ghi string string} + +test unicode-6.2 {DupUnicodeInternalRep, mixed width chars} {testobj} { + set x abcï¿®ghi + set y $x + string length $x + list [testobj objtype $x] [testobj objtype $y] [append x "®¿ï"] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {string string abcï¿®ghi®¿ï abcï¿®ghi string string} + +test unicode-6.3 {DupUnicodeInternalRep, all byte-size chars} {testobj} { + set x abcdefghi + string length $x + set y $x + list [testobj objtype $x] [testobj objtype $y] [append x jkl] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {string string abcdefghijkl abcdefghi string string} + +test unicode-6.4 {DupUnicodeInternalRep, all byte-size chars} {testobj} { + set x abcdefghi + set y $x + string length $x + list [testobj objtype $x] [testobj objtype $y] [append x jkl] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {string string abcdefghijkl abcdefghi string string} + +test unicode-7.1 {TclAppendObjToUnicodeObj, mixed src & dest} {testobj} { + set x abcï¿®ghi + set y ®¿ï + string length $x + list [testobj objtype $x] [testobj objtype $y] [append x $y] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {string none abcï¿®ghi®¿ï ®¿ï string string} + +test unicode-7.2 {TclAppendObjToUnicodeObj, mixed src & dest} {testobj} { + set x abcï¿®ghi + string length $x + list [testobj objtype $x] [append x $x] [testobj objtype $x] \ + [append x $x] [testobj objtype $x] +} {string abcï¿®ghiabcï¿®ghi string\ +abcï¿®ghiabcï¿®ghiabcï¿®ghiabcï¿®ghi\ +string} + +test unicode-7.3 {TclAppendObjToUnicodeObj, mixed src & 1-byte dest} {testobj} { + set x abcdefghi + set y ®¿ï + string length $x + list [testobj objtype $x] [testobj objtype $y] [append x $y] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {string none abcdefghi®¿ï ®¿ï string string} + +test unicode-7.4 {TclAppendObjToUnicodeObj, 1-byte src & dest} {testobj} { + set x abcdefghi + set y jkl + string length $x + list [testobj objtype $x] [testobj objtype $y] [append x $y] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {string none abcdefghijkl jkl string string} + +test unicode-7.5 {TclAppendObjToUnicodeObj, 1-byte src & dest} {testobj} { + set x abcdefghi + string length $x + list [testobj objtype $x] [append x $x] [testobj objtype $x] \ + [append x $x] [testobj objtype $x] +} {string abcdefghiabcdefghi string abcdefghiabcdefghiabcdefghiabcdefghi\ +string} + +test unicode-7.6 {TclAppendObjToUnicodeObj, 1-byte src & mixed dest} {testobj} { + set x abcï¿®ghi + set y jkl + string length $x + list [testobj objtype $x] [testobj objtype $y] [append x $y] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {string none abcï¿®ghijkl jkl string string} + +test unicode-7.7 {TclAppendObjToUnicodeObj, integer src & dest} {testobj} { + set x [expr {4 * 5}] + set y [expr {4 + 5}] + list [testobj objtype $x] [testobj objtype $y] [append x $y] \ + [testobj objtype $x] [append x $y] [testobj objtype $x] \ + [testobj objtype $y] +} {int int 209 string 2099 string int} + +test unicode-7.8 {TclAppendObjToUnicodeObj, integer src & dest} {testobj} { + set x [expr {4 * 5}] + list [testobj objtype $x] [append x $x] [testobj objtype $x] \ + [append x $x] [testobj objtype $x] +} {int 2020 string 20202020 string} + +test unicode-7.9 {TclAppendObjToUnicodeObj, integer src & 1-byte dest} {testobj} { + set x abcdefghi + set y [expr {4 + 5}] + string length $x + list [testobj objtype $x] [testobj objtype $y] [append x $y] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {string int abcdefghi9 9 string int} + +test unicode-7.10 {TclAppendObjToUnicodeObj, integer src & mixed dest} {testobj} { + set x abcï¿®ghi + set y [expr {4 + 5}] + string length $x + list [testobj objtype $x] [testobj objtype $y] [append x $y] \ + [set y] [testobj objtype $x] [testobj objtype $y] +} {string int abcï¿®ghi9 9 string int} + testobj freeallvars # cleanup diff --git a/tests/unicode.test b/tests/unicode.test deleted file mode 100644 index 6ee91c8..0000000 --- a/tests/unicode.test +++ /dev/null @@ -1,204 +0,0 @@ -# This file tests the tclUnicode.c file. -# -# This file contains a collection of tests for one or more of the Tcl -# built-in commands. Sourcing this file into Tcl runs the tests and -# generates output for errors. No output means no errors were found. -# -# Copyright (c) 1999 by Scriptics Corporation. -# -# See the file "license.terms" for information on usage and redistribution -# of this file, and for a DISCLAIMER OF ALL WARRANTIES. -# -# RCS: @(#) $Id: unicode.test,v 1.2 1999/06/08 02:59:30 hershey Exp $ - -if {[lsearch [namespace children] ::tcltest] == -1} { - source [file join [pwd] [file dirname [info script]] defs.tcl] -} - -# Some tests require the testobj command - -set ::tcltest::testConfig(testobj) \ - [expr {[info commands testobj] != {}}] - -test unicode-1.1 {TclGetUniCharFromObj with byte-size chars} { - string index "abcdefghi" 0 -} "a" -test unicode-1.2 {TclGetUniCharFromObj with byte-size chars} { - string index "abcdefghi" 3 -} "d" -test unicode-1.3 {TclGetUniCharFromObj with byte-size chars} { - string index "abcdefghi" end -} "i" -test unicode-1.4 {TclGetUniCharFromObj with mixed width chars} { - string index "ïa¿b®c®¿dï" 0 -} "ï" -test unicode-1.5 {TclGetUniCharFromObj} { - string index "ïa¿b®c®¿dï" 4 -} "®" -test unicode-1.6 {TclGetUniCharFromObj} { - string index "ïa¿b®cï¿d®" end -} "®" - -test unicode-2.1 {TclGetUnicodeLengthFromObj with byte-size chars} { - string length "" -} 0 -test unicode-2.2 {TclGetUnicodeLengthFromObj with byte-size chars} { - string length "a" -} 1 -test unicode-2.3 {TclGetUnicodeLengthFromObj with byte-size chars} { - string length "abcdef" -} 6 -test unicode-2.4 {TclGetUnicodeLengthFromObj with mixed width chars} { - string length "®" -} 1 -test unicode-2.5 {TclGetUnicodeLengthFromObj with mixed width chars} { - string length "○○" -} 6 -test unicode-2.6 {TclGetUnicodeLengthFromObj with mixed width chars} { - string length "ïa¿b®cï¿d®" -} 10 - -test unicode-3.1 {TclGetRangeFromObj with all byte-size chars} {testobj} { - set x "abcdef" - list [testobj objtype $x] [set y [string range $x 1 end-1]] \ - [testobj objtype $x] [testobj objtype $y] -} {none bcde unicode none} - -test unicode-3.2 {TclGetRangeFromObj with some mixed width chars} {testobj} { - set x "abcïïdef" - list [testobj objtype $x] [set y [string range $x 1 end-1]] \ - [testobj objtype $x] [testobj objtype $y] -} {none bcïïde unicode unicode} - -test unicode-4.1 {UpdateStringOfUnicode} {testobj} { - set x 2345 - list [string index $x end] [testobj objtype $x] [incr x] \ - [testobj objtype $x] -} {5 unicode 2346 int} - -test unicode-5.1 {SetUnicodeFromAny called with non-unicode obj} {testobj} { - set x 2345 - list [incr x] [testobj objtype $x] [string index $x end] \ - [testobj objtype $x] -} {2346 int 6 unicode} - -test unicode-5.2 {SetUnicodeFromAny called with unicode obj} {testobj} { - set x "abcdef" - list [string length $x] [testobj objtype $x] \ - [string length $x] [testobj objtype $x] -} {6 unicode 6 unicode} - -test unicode-6.1 {DupUnicodeInternalRep, mixed width chars} {testobj} { - set x abcï¿®ghi - string length $x - set y $x - list [testobj objtype $x] [testobj objtype $y] [append x "®¿ï"] \ - [set y] [testobj objtype $x] [testobj objtype $y] -} {unicode unicode abcï¿®ghi®¿ï abcï¿®ghi unicode unicode} - -test unicode-6.2 {DupUnicodeInternalRep, mixed width chars} {testobj} { - set x abcï¿®ghi - set y $x - string length $x - list [testobj objtype $x] [testobj objtype $y] [append x "®¿ï"] \ - [set y] [testobj objtype $x] [testobj objtype $y] -} {unicode unicode abcï¿®ghi®¿ï abcï¿®ghi unicode unicode} - -test unicode-6.3 {DupUnicodeInternalRep, all byte-size chars} {testobj} { - set x abcdefghi - string length $x - set y $x - list [testobj objtype $x] [testobj objtype $y] [append x jkl] \ - [set y] [testobj objtype $x] [testobj objtype $y] -} {unicode unicode abcdefghijkl abcdefghi unicode unicode} - -test unicode-6.4 {DupUnicodeInternalRep, all byte-size chars} {testobj} { - set x abcdefghi - set y $x - string length $x - list [testobj objtype $x] [testobj objtype $y] [append x jkl] \ - [set y] [testobj objtype $x] [testobj objtype $y] -} {unicode unicode abcdefghijkl abcdefghi unicode unicode} - -test unicode-7.1 {TclAppendObjToUnicodeObj, mixed src & dest} {testobj} { - set x abcï¿®ghi - set y ®¿ï - string length $x - list [testobj objtype $x] [testobj objtype $y] [append x $y] \ - [set y] [testobj objtype $x] [testobj objtype $y] -} {unicode none abcï¿®ghi®¿ï ®¿ï unicode unicode} - -test unicode-7.2 {TclAppendObjToUnicodeObj, mixed src & dest} {testobj} { - set x abcï¿®ghi - string length $x - list [testobj objtype $x] [append x $x] [testobj objtype $x] \ - [append x $x] [testobj objtype $x] -} {unicode abcï¿®ghiabcï¿®ghi unicode\ -abcï¿®ghiabcï¿®ghiabcï¿®ghiabcï¿®ghi\ -unicode} - -test unicode-7.3 {TclAppendObjToUnicodeObj, mixed src & 1-byte dest} {testobj} { - set x abcdefghi - set y ®¿ï - string length $x - list [testobj objtype $x] [testobj objtype $y] [append x $y] \ - [set y] [testobj objtype $x] [testobj objtype $y] -} {unicode none abcdefghi®¿ï ®¿ï string unicode} - -test unicode-7.4 {TclAppendObjToUnicodeObj, 1-byte src & dest} {testobj} { - set x abcdefghi - set y jkl - string length $x - list [testobj objtype $x] [testobj objtype $y] [append x $y] \ - [set y] [testobj objtype $x] [testobj objtype $y] -} {unicode none abcdefghijkl jkl unicode unicode} - -test unicode-7.5 {TclAppendObjToUnicodeObj, 1-byte src & dest} {testobj} { - set x abcdefghi - string length $x - list [testobj objtype $x] [append x $x] [testobj objtype $x] \ - [append x $x] [testobj objtype $x] -} {unicode abcdefghiabcdefghi unicode abcdefghiabcdefghiabcdefghiabcdefghi\ -unicode} - -test unicode-7.6 {TclAppendObjToUnicodeObj, 1-byte src & mixed dest} {testobj} { - set x abcï¿®ghi - set y jkl - string length $x - list [testobj objtype $x] [testobj objtype $y] [append x $y] \ - [set y] [testobj objtype $x] [testobj objtype $y] -} {unicode none abcï¿®ghijkl jkl unicode unicode} - -test unicode-7.7 {TclAppendObjToUnicodeObj, integer src & dest} {testobj} { - set x [expr {4 * 5}] - set y [expr {4 + 5}] - list [testobj objtype $x] [testobj objtype $y] [append x $y] \ - [testobj objtype $x] [append x $y] [testobj objtype $x] \ - [testobj objtype $y] -} {int int 209 string 2099 string int} - -test unicode-7.8 {TclAppendObjToUnicodeObj, integer src & dest} {testobj} { - set x [expr {4 * 5}] - list [testobj objtype $x] [append x $x] [testobj objtype $x] \ - [append x $x] [testobj objtype $x] -} {int 2020 string 20202020 unicode} - -test unicode-7.9 {TclAppendObjToUnicodeObj, integer src & 1-byte dest} {testobj} { - set x abcdefghi - set y [expr {4 + 5}] - string length $x - list [testobj objtype $x] [testobj objtype $y] [append x $y] \ - [set y] [testobj objtype $x] [testobj objtype $y] -} {unicode int abcdefghi9 9 string int} - -test unicode-7.10 {TclAppendObjToUnicodeObj, integer src & mixed dest} {testobj} { - set x abcï¿®ghi - set y [expr {4 + 5}] - string length $x - list [testobj objtype $x] [testobj objtype $y] [append x $y] \ - [set y] [testobj objtype $x] [testobj objtype $y] -} {unicode int abcï¿®ghi9 9 unicode int} - -# cleanup -::tcltest::cleanupTests -return diff --git a/unix/Makefile.in b/unix/Makefile.in index b4a7b55..a6fda07 100644 --- a/unix/Makefile.in +++ b/unix/Makefile.in @@ -5,7 +5,9 @@ # "autoconf" program (constructs like "@foo@" will get replaced in the # actual Makefile. # -# RCS: @(#) $Id: Makefile.in,v 1.29 1999/06/11 01:53:03 surles Exp $ +# RCS: @(#) $Id: Makefile.in,v 1.30 1999/06/15 01:16:28 hershey Exp $ + +# PURIFY = # Current Tcl version; used in various names. @@ -223,6 +225,7 @@ DLTEST_DIR = @TCL_SRC_DIR@/unix/dltest # Must be absolute to so the corresponding tcltest's tcl_library is absolute. TCL_BUILDTIME_LIBRARY= @TCL_SRC_DIR@/library +#CC = purify @CC@ CC = @CC@ #---------------------------------------------------------------- @@ -269,8 +272,7 @@ GENERIC_OBJS = regcomp.o regexec.o regfree.o regerror.o tclAlloc.o \ tclObj.o tclPanic.o tclParse.o tclParseExpr.o tclPipe.o \ tclPkg.o tclPosixStr.o tclPreserve.o tclProc.o tclRegexp.o \ tclResolve.o tclResult.o tclScan.o tclStringObj.o tclThread.o \ - tclStubInit.o tclStubLib.o tclTimer.o tclUnicodeObj.o tclUtf.o \ - tclUtil.o tclVar.o + tclStubInit.o tclStubLib.o tclTimer.o tclUtf.o tclUtil.o tclVar.o STUB_LIB_OBJS = tclStubLib.o ${COMPAT_OBJS} @@ -351,7 +353,6 @@ GENERIC_SRCS = \ $(GENERIC_DIR)/tclTestProcBodyObj.c \ $(GENERIC_DIR)/tclThread.c \ $(GENERIC_DIR)/tclTimer.c \ - $(GENERIC_DIR)/tclUnicodeObj.c \ $(GENERIC_DIR)/tclUtil.c \ $(GENERIC_DIR)/tclVar.c @@ -890,9 +891,6 @@ tclThread.o: $(GENERIC_DIR)/tclThread.c tclThreadTest.o: $(GENERIC_DIR)/tclThreadTest.c $(CC) -c $(CC_SWITCHES) $(GENERIC_DIR)/tclThreadTest.c -tclUnicodeObj.o: $(GENERIC_DIR)/tclUnicodeObj.c - $(CC) -c $(CC_SWITCHES) $(GENERIC_DIR)/tclUnicodeObj.c - tclUnixChan.o: $(UNIX_DIR)/tclUnixChan.c $(CC) -c $(CC_SWITCHES) $(UNIX_DIR)/tclUnixChan.c diff --git a/win/Makefile.in b/win/Makefile.in index bb778d9..25f297d 100644 --- a/win/Makefile.in +++ b/win/Makefile.in @@ -5,7 +5,7 @@ # "autoconf" program (constructs like "@foo@" will get replaced in the # actual Makefile. # -# RCS: @(#) $Id: Makefile.in,v 1.6 1999/06/11 02:06:27 surles Exp $ +# RCS: @(#) $Id: Makefile.in,v 1.7 1999/06/15 01:16:29 hershey Exp $ VERSION = @TCL_VERSION@ @@ -224,7 +224,6 @@ GENERIC_OBJS = \ tclStubLib.$(OBJEXT) \ tclThread.$(OBJEXT) \ tclTimer.$(OBJEXT) \ - tclUnicodeObj.$(OBJEXT) \ tclUtf.$(OBJEXT) \ tclUtil.$(OBJEXT) \ tclVar.$(OBJEXT) diff --git a/win/makefile.vc b/win/makefile.vc index 06926c7..644d758 100644 --- a/win/makefile.vc +++ b/win/makefile.vc @@ -6,7 +6,7 @@ # Copyright (c) 1995-1996 Sun Microsystems, Inc. # Copyright (c) 1998-1999 by Scriptics Corporation. # -# RCS: @(#) $Id: makefile.vc,v 1.35 1999/06/08 02:59:31 hershey Exp $ +# RCS: @(#) $Id: makefile.vc,v 1.36 1999/06/15 01:16:29 hershey Exp $ # Does not depend on the presence of any environment variables in # order to compile tcl; all needed information is derived from @@ -186,7 +186,6 @@ TCLOBJS = \ $(TMPDIR)\tclStubLib.obj \ $(TMPDIR)\tclThread.obj \ $(TMPDIR)\tclTimer.obj \ - $(TMPDIR)\tclUnicodeObj.obj \ $(TMPDIR)\tclUtf.obj \ $(TMPDIR)\tclUtil.obj \ $(TMPDIR)\tclVar.obj \ -- cgit v0.12