diff options
author | hershey <hershey> | 1999-06-15 01:16:21 (GMT) |
---|---|---|
committer | hershey <hershey> | 1999-06-15 01:16:21 (GMT) |
commit | 73d440a8ed4e3ef4fd1c30ce5708061a261396dc (patch) | |
tree | dc505f5e222817db5fe70f5b9eb5d01607656d12 /generic | |
parent | 9710a67498395708dbbe9b7e401cec3c6227b394 (diff) | |
download | tcl-73d440a8ed4e3ef4fd1c30ce5708061a261396dc.zip tcl-73d440a8ed4e3ef4fd1c30ce5708061a261396dc.tar.gz tcl-73d440a8ed4e3ef4fd1c30ce5708061a261396dc.tar.bz2 |
Merged String and Unicode object types. Added new functions to
the puplic API: Tcl_NewUnicodeObj, Tcl_SetUnicodeObj,
Tcl_GetUnicode, Tcl_GetUniChar, Tcl_GetCharLength, Tcl_GetRange,
Tcl_AppendUnicodeToObj.
Note: some stringObj tests are still failing--the teststringobj command
still needs to be updated.
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tcl.decls | 26 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 32 | ||||
-rw-r--r-- | generic/tclDecls.h | 57 | ||||
-rw-r--r-- | generic/tclInt.h | 18 | ||||
-rw-r--r-- | generic/tclObj.c | 3 | ||||
-rw-r--r-- | generic/tclRegexp.c | 6 | ||||
-rw-r--r-- | generic/tclStringObj.c | 1077 | ||||
-rw-r--r-- | generic/tclStubInit.c | 9 | ||||
-rw-r--r-- | generic/tclUnicodeObj.c | 882 |
9 files changed, 1113 insertions, 997 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls index 8447520..8b9d46d 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -10,7 +10,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: tcl.decls,v 1.15 1999/06/10 04:28:49 stanton Exp $ +# RCS: @(#) $Id: tcl.decls,v 1.16 1999/06/15 01:16:21 hershey Exp $ library tcl @@ -1284,7 +1284,29 @@ declare 376 generic { declare 377 generic { void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr) } - +declare 378 generic { + Tcl_Obj * Tcl_NewUnicodeObj(Tcl_UniChar *unicode, int numChars) +} +declare 379 generic { + void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, Tcl_UniChar *unicode, \ + int numChars) +} +declare 380 generic { + int Tcl_GetCharLength (Tcl_Obj *objPtr) +} +declare 381 generic { + Tcl_UniChar Tcl_GetUniChar (Tcl_Obj *objPtr, int index) +} +declare 382 generic { + Tcl_UniChar * Tcl_GetUnicode (Tcl_Obj *objPtr) +} +declare 383 generic { + Tcl_Obj * Tcl_GetRange (Tcl_Obj *objPtr, int first, int last) +} +declare 384 generic { + void Tcl_AppendUnicodeToObj (register Tcl_Obj *objPtr, \ + Tcl_UniChar *unicode, int length) +} ############################################################################## diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 4f20815..3746cfc 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -13,7 +13,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclCmdMZ.c,v 1.14 1999/06/10 04:28:50 stanton Exp $ + * RCS: @(#) $Id: tclCmdMZ.c,v 1.15 1999/06/15 01:16:22 hershey Exp $ */ #include "tclInt.h" @@ -274,7 +274,7 @@ Tcl_RegexpObjCmd(dummy, interp, objc, objv) newPtr = Tcl_NewListObj(2, objs); } else { if (i <= info.nsubs) { - newPtr = TclGetRangeFromObj(objPtr, info.matches[i].start, + newPtr = Tcl_GetRange(objPtr, info.matches[i].start, info.matches[i].end - 1); } else { newPtr = Tcl_NewObj(); @@ -385,8 +385,8 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) Tcl_IncrRefCount(resultPtr); objPtr = objv[1]; - wlen = TclGetUnicodeLengthFromObj(objPtr); - wstring = TclGetUnicodeFromObj(objPtr); + wlen = Tcl_GetCharLength(objPtr); + wstring = Tcl_GetUnicode(objPtr); subspec = Tcl_GetString(objv[2]); varPtr = objv[3]; @@ -430,7 +430,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) Tcl_RegExpGetInfo(regExpr, &info); start = info.matches[0].start; end = info.matches[0].end; - TclAppendUnicodeToObj(resultPtr, wstring + offset, start); + Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start); /* * Append the subSpec argument to the variable, making appropriate @@ -468,7 +468,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) subStart = info.matches[index].start; subEnd = info.matches[index].end; if ((subStart >= 0) && (subEnd >= 0)) { - TclAppendUnicodeToObj(resultPtr, wstring + offset + subStart, + Tcl_AppendUnicodeToObj(resultPtr, wstring + offset + subStart, subEnd - subStart); } if (*src == '\\') { @@ -485,7 +485,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) * in order to prevent infinite loops. */ - TclAppendUnicodeToObj(resultPtr, wstring + offset, 1); + Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); offset++; } offset += end; @@ -500,7 +500,7 @@ Tcl_RegsubObjCmd(dummy, interp, objc, objv) */ if ((offset < wlen) || (numMatches == 0)) { - TclAppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset); + Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset); } if (Tcl_ObjSetVar2(interp, varPtr, NULL, resultPtr, 0) == NULL) { Tcl_AppendResult(interp, "couldn't set variable \"", @@ -973,8 +973,11 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) if (length2 == utflen) { /* no unicode chars */ string2 += start; + length2 -= start; } else { - string2 = Tcl_UtfAtIndex(string2, start); + char *s = Tcl_UtfAtIndex(string2, start); + length2 -= s - string2; + string2 = s; } } } @@ -1049,14 +1052,14 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) * 'end' really means. */ - length2 = TclGetUnicodeLengthFromObj(objv[2]); + length2 = Tcl_GetCharLength(objv[2]); if (TclGetIntForIndex(interp, objv[3], length2 - 1, &index) != TCL_OK) { return TCL_ERROR; } if ((index >= 0) && (index < length2)) { - unichar = TclGetUniCharFromObj(objv[2], index); + unichar = Tcl_GetUniChar(objv[2], index); length2 = Tcl_UniCharToUtf((int)unichar, buf); Tcl_SetStringObj(resultPtr, buf, length2); } @@ -1432,7 +1435,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) Tcl_SetIntObj(resultPtr, length1); } else { Tcl_SetIntObj(resultPtr, - TclGetUnicodeLengthFromObj(objv[2])); + Tcl_GetCharLength(objv[2])); } } break; @@ -1611,7 +1614,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) * create a result object. */ - length2 = TclGetUnicodeLengthFromObj(objv[2]) - 1; + length2 = Tcl_GetCharLength(objv[2]) - 1; if (TclGetIntForIndex(interp, objv[3], length2, &first) != TCL_OK) { @@ -1628,7 +1631,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) last = length1 - 1; } if (last >= first) { - resultPtr = TclGetRangeFromObj(objv[2], first, last); + resultPtr = Tcl_GetRange(objv[2], first, last); Tcl_SetObjResult(interp, resultPtr); } } @@ -1761,6 +1764,7 @@ Tcl_StringObjCmd(dummy, interp, objc, objv) Tcl_SetStringObj(resultPtr, string1, start - string1); Tcl_AppendToObj(resultPtr, string2, length2); Tcl_AppendToObj(resultPtr, end, -1); + ckfree(string2); } break; diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 2a7ac93..8cec11a 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclDecls.h,v 1.15 1999/06/10 04:28:50 stanton Exp $ + * RCS: @(#) $Id: tclDecls.h,v 1.16 1999/06/15 01:16:22 hershey Exp $ */ #ifndef _TCLDECLS @@ -1175,6 +1175,26 @@ EXTERN int Tcl_RegExpMatchObj _ANSI_ARGS_((Tcl_Interp * interp, /* 377 */ EXTERN void Tcl_RegExpGetInfo _ANSI_ARGS_((Tcl_RegExp regexp, Tcl_RegExpInfo * infoPtr)); +/* 378 */ +EXTERN Tcl_Obj * Tcl_NewUnicodeObj _ANSI_ARGS_((Tcl_UniChar * unicode, + int numChars)); +/* 379 */ +EXTERN void Tcl_SetUnicodeObj _ANSI_ARGS_((Tcl_Obj * objPtr, + Tcl_UniChar * unicode, int numChars)); +/* 380 */ +EXTERN int Tcl_GetCharLength _ANSI_ARGS_((Tcl_Obj * objPtr)); +/* 381 */ +EXTERN Tcl_UniChar Tcl_GetUniChar _ANSI_ARGS_((Tcl_Obj * objPtr, + int index)); +/* 382 */ +EXTERN Tcl_UniChar * Tcl_GetUnicode _ANSI_ARGS_((Tcl_Obj * objPtr)); +/* 383 */ +EXTERN Tcl_Obj * Tcl_GetRange _ANSI_ARGS_((Tcl_Obj * objPtr, + int first, int last)); +/* 384 */ +EXTERN void Tcl_AppendUnicodeToObj _ANSI_ARGS_(( + register Tcl_Obj * objPtr, + Tcl_UniChar * unicode, int length)); typedef struct TclStubHooks { struct TclPlatStubs *tclPlatStubs; @@ -1620,6 +1640,13 @@ typedef struct TclStubs { int (*tcl_UniCharIsPunct) _ANSI_ARGS_((int ch)); /* 375 */ int (*tcl_RegExpMatchObj) _ANSI_ARGS_((Tcl_Interp * interp, Tcl_RegExp regexp, Tcl_Obj * objPtr, int offset, int nmatches, int flags)); /* 376 */ void (*tcl_RegExpGetInfo) _ANSI_ARGS_((Tcl_RegExp regexp, Tcl_RegExpInfo * infoPtr)); /* 377 */ + Tcl_Obj * (*tcl_NewUnicodeObj) _ANSI_ARGS_((Tcl_UniChar * unicode, int numChars)); /* 378 */ + void (*tcl_SetUnicodeObj) _ANSI_ARGS_((Tcl_Obj * objPtr, Tcl_UniChar * unicode, int numChars)); /* 379 */ + int (*tcl_GetCharLength) _ANSI_ARGS_((Tcl_Obj * objPtr)); /* 380 */ + Tcl_UniChar (*tcl_GetUniChar) _ANSI_ARGS_((Tcl_Obj * objPtr, int index)); /* 381 */ + Tcl_UniChar * (*tcl_GetUnicode) _ANSI_ARGS_((Tcl_Obj * objPtr)); /* 382 */ + Tcl_Obj * (*tcl_GetRange) _ANSI_ARGS_((Tcl_Obj * objPtr, int first, int last)); /* 383 */ + void (*tcl_AppendUnicodeToObj) _ANSI_ARGS_((register Tcl_Obj * objPtr, Tcl_UniChar * unicode, int length)); /* 384 */ } TclStubs; #ifdef __cplusplus @@ -3168,6 +3195,34 @@ extern TclStubs *tclStubsPtr; #define Tcl_RegExpGetInfo \ (tclStubsPtr->tcl_RegExpGetInfo) /* 377 */ #endif +#ifndef Tcl_NewUnicodeObj +#define Tcl_NewUnicodeObj \ + (tclStubsPtr->tcl_NewUnicodeObj) /* 378 */ +#endif +#ifndef Tcl_SetUnicodeObj +#define Tcl_SetUnicodeObj \ + (tclStubsPtr->tcl_SetUnicodeObj) /* 379 */ +#endif +#ifndef Tcl_GetCharLength +#define Tcl_GetCharLength \ + (tclStubsPtr->tcl_GetCharLength) /* 380 */ +#endif +#ifndef Tcl_GetUniChar +#define Tcl_GetUniChar \ + (tclStubsPtr->tcl_GetUniChar) /* 381 */ +#endif +#ifndef Tcl_GetUnicode +#define Tcl_GetUnicode \ + (tclStubsPtr->tcl_GetUnicode) /* 382 */ +#endif +#ifndef Tcl_GetRange +#define Tcl_GetRange \ + (tclStubsPtr->tcl_GetRange) /* 383 */ +#endif +#ifndef Tcl_AppendUnicodeToObj +#define Tcl_AppendUnicodeToObj \ + (tclStubsPtr->tcl_AppendUnicodeToObj) /* 384 */ +#endif #endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */ diff --git a/generic/tclInt.h b/generic/tclInt.h index d30d439..cfa587b 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -11,7 +11,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclInt.h,v 1.32 1999/06/10 04:28:51 stanton Exp $ + * RCS: @(#) $Id: tclInt.h,v 1.33 1999/06/15 01:16:23 hershey Exp $ */ #ifndef _TCLINT @@ -1509,7 +1509,6 @@ extern Tcl_ObjType tclIntType; extern Tcl_ObjType tclListType; extern Tcl_ObjType tclProcBodyType; extern Tcl_ObjType tclStringType; -extern Tcl_ObjType tclUnicodeType; /* * The head of the list of free Tcl objects, and the total number of Tcl @@ -1543,12 +1542,6 @@ EXTERN int TclAccess _ANSI_ARGS_((CONST char *path, EXTERN int TclAccessDeleteProc _ANSI_ARGS_((TclAccessProc_ *proc)); EXTERN int TclAccessInsertProc _ANSI_ARGS_((TclAccessProc_ *proc)); EXTERN void TclAllocateFreeObjects _ANSI_ARGS_((void)); -EXTERN Tcl_Obj * TclAppendObjToUnicodeObj _ANSI_ARGS_(( - register Tcl_Obj *targetObjPtr, - register Tcl_Obj *srcObjPtr)); -EXTERN void TclAppendUnicodeToObj _ANSI_ARGS_(( - register Tcl_Obj *objPtr, Tcl_UniChar *unichars, - int length)); EXTERN int TclArraySet _ANSI_ARGS_((Tcl_Interp *interp, Tcl_Obj *arrayNameObj, Tcl_Obj *arrayElemObj)); EXTERN int TclCleanupChildren _ANSI_ARGS_((Tcl_Interp *interp, @@ -1641,13 +1634,6 @@ EXTERN int TclGetOpenMode _ANSI_ARGS_((Tcl_Interp *interp, char *string, int *seekFlagPtr)); EXTERN Tcl_Command TclGetOriginalCommand _ANSI_ARGS_(( Tcl_Command command)); -EXTERN Tcl_Obj* TclGetRangeFromObj _ANSI_ARGS_((Tcl_Obj *objPtr, - int first, int last)); -EXTERN Tcl_UniChar TclGetUniCharFromObj _ANSI_ARGS_((Tcl_Obj *objPtr, - int index)); -EXTERN Tcl_UniChar * TclGetUnicodeFromObj _ANSI_ARGS_((Tcl_Obj *objPtr)); -EXTERN int TclGetUnicodeLengthFromObj _ANSI_ARGS_(( - Tcl_Obj *objPtr)); EXTERN int TclGlob _ANSI_ARGS_((Tcl_Interp *interp, char *pattern, int noComplain)); EXTERN int TclGlobalInvoke _ANSI_ARGS_((Tcl_Interp *interp, @@ -1695,8 +1681,6 @@ EXTERN Var * TclLookupVar _ANSI_ARGS_((Tcl_Interp *interp, EXTERN int TclMathInProgress _ANSI_ARGS_((void)); EXTERN int TclNeedSpace _ANSI_ARGS_((char *start, char *end)); EXTERN Tcl_Obj * TclNewProcBodyObj _ANSI_ARGS_((Proc *procPtr)); -EXTERN Tcl_Obj * TclNewUnicodeObj _ANSI_ARGS_((Tcl_UniChar *unichars, - int numChars)); EXTERN int TclObjCommandComplete _ANSI_ARGS_((Tcl_Obj *cmdPtr)); EXTERN int TclObjInterpProc _ANSI_ARGS_((ClientData clientData, Tcl_Interp *interp, int objc, diff --git a/generic/tclObj.c b/generic/tclObj.c index 423df28..67be178 100644 --- a/generic/tclObj.c +++ b/generic/tclObj.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclObj.c,v 1.8 1999/06/08 02:59:25 hershey Exp $ + * RCS: @(#) $Id: tclObj.c,v 1.9 1999/06/15 01:16:23 hershey Exp $ */ #include "tclInt.h" @@ -138,7 +138,6 @@ TclInitObjSubsystem() Tcl_RegisterObjType(&tclListType); Tcl_RegisterObjType(&tclByteCodeType); Tcl_RegisterObjType(&tclProcBodyType); - Tcl_RegisterObjType(&tclUnicodeType); #ifdef TCL_COMPILE_STATS Tcl_MutexLock(&tclObjMutex); diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index 7590c8d..3e28224 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclRegexp.c,v 1.7 1999/06/10 04:28:51 stanton Exp $ + * RCS: @(#) $Id: tclRegexp.c,v 1.8 1999/06/15 01:16:24 hershey Exp $ */ #include "tclInt.h" @@ -429,8 +429,8 @@ Tcl_RegExpMatchObj(interp, re, objPtr, offset, nmatches, flags) Tcl_IncrRefCount(objPtr); - udata = TclGetUnicodeFromObj(objPtr); - length = TclGetUnicodeLengthFromObj(objPtr); + udata = Tcl_GetUnicode(objPtr); + length = Tcl_GetCharLength(objPtr); /* * Save the target object so we can extract strings from it later. diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index c70bcb9..8dc6e90 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -9,13 +9,29 @@ * track of the extra space. Objects with this internal * representation are called "expandable string objects". * + * Since some string operations work with UTF strings and others require Unicode + format, the string obeject type stores one or both formats. If the object is + created with a Unicode string, then UTF form is not stored until it is + required by a string operation. The string object always stores the number of + characters, so if the object is created with a UTF string, we automatically + convert it to unicode (as this costs little more than + +A Unicode string + * is an internationalized string. Conceptually, a Unicode string is an + * array of 16-bit quantities organized as a sequence of properly formed + * UTF-8 characters. There is a one-to-one map between Unicode and UTF + * characters. The Unicode ojbect is opitmized for the case where each UTF + * char in a string is only one byte. In this case, we store the value of + * numChars, but we don't copy the bytes to the unicodeObj->chars. Before + * accessing obj->chars, check if unicodeObj->numChars == obj->length. + * * Copyright (c) 1995-1997 Sun Microsystems, Inc. * Copyright (c) 1999 by Scriptics Corporation. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclStringObj.c,v 1.7 1999/06/08 02:59:25 hershey Exp $ + * RCS: @(#) $Id: tclStringObj.c,v 1.8 1999/06/15 01:16:25 hershey Exp $ */ #include "tclInt.h" @@ -24,7 +40,20 @@ * Prototypes for procedures defined later in this file: */ -static void ConvertToStringType _ANSI_ARGS_((Tcl_Obj *objPtr)); +static void AppendUnicodeToUnicodeRep _ANSI_ARGS_(( + Tcl_Obj *objPtr, Tcl_UniChar *unicode, + int appendNumChars)); +static void AppendUnicodeToUtfRep _ANSI_ARGS_(( + Tcl_Obj *objPtr, Tcl_UniChar *unicode, + int numChars)); +static void AppendUtfToUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr, + char *bytes, int numBytes)); +static void AppendUtfToUtfRep _ANSI_ARGS_((Tcl_Obj *objPtr, + char *bytes, int numBytes)); + +static void FillUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr)); + +static void FreeStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr)); static void DupStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr, Tcl_Obj *copyPtr)); static int SetStringFromAny _ANSI_ARGS_((Tcl_Interp *interp, @@ -38,11 +67,46 @@ static void UpdateStringOfString _ANSI_ARGS_((Tcl_Obj *objPtr)); Tcl_ObjType tclStringType = { "string", /* name */ - (Tcl_FreeInternalRepProc *) NULL, /* freeIntRepProc */ + FreeStringInternalRep, /* freeIntRepPro */ DupStringInternalRep, /* dupIntRepProc */ UpdateStringOfString, /* updateStringProc */ SetStringFromAny /* setFromAnyProc */ }; + +/* + * The following structure is the internal rep for a String object. + * It keeps track of how much memory has been used and how much has been + * allocated for the Unicode and UTF string to enable growing and + * shrinking of the UTF and Unicode reps of the String object with fewer + * mallocs. To optimize string length and indexing operations, this + * structure also stores the number of characters (same of UTF and Unicode!) + * once that value has been computede. + */ + +typedef struct String { + int numChars; /* The number of chars in the string. + * -1 means this value has not been + * calculated. >= 0 means that there is a + * valid Unicode rep, or that the number + * of UTF bytes == the number of chars. */ + size_t allocated; /* The amount of space actually allocated + * for the UTF string (minus 1 byte for + * the termination char). */ + size_t uallocated; /* The amount of space actually allocated + * for the Unicode string. 0 means the + * Unicode string rep is invalid. */ + Tcl_UniChar unicode[2]; /* The array of Unicode chars. The actual + * size of this field depends on the + * 'uallocated' field above. */ +} String; + +#define STRING_SIZE(len) \ + ((unsigned) (sizeof(String) + ((len-1) * sizeof(Tcl_UniChar)))) +#define GET_STRING(objPtr) \ + ((String *) (objPtr)->internalRep.otherValuePtr) +#define SET_STRING(objPtr, stringPtr) \ + (objPtr)->internalRep.otherValuePtr = (VOID *) (stringPtr) + /* *---------------------------------------------------------------------- @@ -182,6 +246,327 @@ Tcl_DbNewStringObj(bytes, length, file, line) #endif /* TCL_MEM_DEBUG */ /* + *--------------------------------------------------------------------------- + * + * TclNewUnicodeObj -- + * + * This procedure is creates a new String object and initializes + * it from the given Utf String. If the Utf String is the same size + * as the Unicode string, don't duplicate the data. + * + * Results: + * The newly created object is returned. This object will have no + * initial string representation. The returned object has a ref count + * of 0. + * + * Side effects: + * Memory allocated for new object and copy of Unicode argument. + * + *--------------------------------------------------------------------------- + */ + +Tcl_Obj * +Tcl_NewUnicodeObj(unicode, numChars) + Tcl_UniChar *unicode; /* The unicode string used to initialize + * the new object. */ + int numChars; /* Number of characters in the unicode + * string. */ +{ + Tcl_Obj *objPtr; + String *stringPtr; + int uallocated = (numChars + 1) * sizeof(Tcl_UniChar); + + /* + * Create a new obj with an invalid string rep. + */ + + TclNewObj(objPtr); + Tcl_InvalidateStringRep(objPtr); + objPtr->typePtr = &tclStringType; + + stringPtr = (String *) ckalloc(STRING_SIZE(uallocated)); + stringPtr->numChars = numChars; + stringPtr->uallocated = uallocated; + stringPtr->allocated = 0; + memcpy((VOID *) stringPtr->unicode, (VOID *) unicode, + (size_t) (numChars * sizeof(Tcl_UniChar))); + stringPtr->unicode[numChars] = 0; + SET_STRING(objPtr, stringPtr); + return objPtr; +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_GetCharLength -- + * + * Get the length of the Unicode string from the Tcl object. + * + * Results: + * Pointer to unicode string representing the unicode object. + * + * Side effects: + * Frees old internal rep. Allocates memory for new "String" + * internal rep. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_GetCharLength(objPtr) + Tcl_Obj *objPtr; /* The String object to get the num chars of. */ +{ + String *stringPtr; + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + +/* if (objPtr->bytes == NULL) { */ +/* printf("called Tcl_GetCharLength with unicode str.\n"); */ +/* } else { */ +/* printf("called Tcl_GetCharLength with str = %s\n", objPtr->bytes); */ +/* } */ + + /* + * If numChars is unknown, then calculate the number of characaters + * while populating the Unicode string. + */ + + if (stringPtr->numChars == -1) { + + stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); + + if (stringPtr->numChars == objPtr->length) { + + /* + * Since we've just calucalated the number of chars, and all + * UTF chars are 1-byte long, we don't need to store the + * unicode string. + */ + + stringPtr->uallocated = 0; + + } else { + + /* + * Since we've just calucalated the number of chars, and not + * all UTF chars are 1-byte long, go ahead and populate the + * unicode string. + */ + + FillUnicodeRep(objPtr); + + /* + * We need to fetch the pointer again because we have just + * reallocated the structure to make room for the Unicode data. + */ + + stringPtr = GET_STRING(objPtr); + } + } + return stringPtr->numChars; +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_GetUniChar -- + * + * Get the index'th Unicode character from the String object. The + * index is assumed to be in the appropriate range. + * + * Results: + * Returns the index'th Unicode character in the Object. + * + * Side effects: + * Fills unichar with the index'th Unicode character. + * + *---------------------------------------------------------------------- + */ + +Tcl_UniChar +Tcl_GetUniChar(objPtr, index) + Tcl_Obj *objPtr; /* The object to get the Unicode charater from. */ + int index; /* Get the index'th Unicode character. */ +{ + Tcl_UniChar unichar; + String *stringPtr; + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + +/* if (objPtr->bytes == NULL) { */ +/* printf("called Tcl_GetUniChar with unicode str.\n"); */ +/* } else { */ +/* printf("called Tcl_GetUniChar with str = %s\n", objPtr->bytes); */ +/* } */ + + if (stringPtr->numChars == -1) { + + /* + * We haven't yet calculated the length, so we don't have the + * Unicode str. We need to know the number of chars before we + * can do indexing. + */ + + Tcl_GetCharLength(objPtr); + + /* + * We need to fetch the pointer again because we may have just + * reallocated the structure. + */ + + stringPtr = GET_STRING(objPtr); + } + if (stringPtr->uallocated == 0) { + char *bytes; + + /* + * All of the characters in the Utf string are 1 byte chars, + * so we don't store the unicode char. We get the Utf string + * and convert the index'th byte to a Unicode character. + */ + + bytes = Tcl_GetString(objPtr); + Tcl_UtfToUniChar(&bytes[index], &unichar); + } else { + unichar = stringPtr->unicode[index]; + } + return unichar; +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_GetUnicode -- + * + * Get the index'th Unicode character from the String object. If + * the object is not already a String object, it will be converted + * to one. If the String object does not have a Unicode rep, then + * one is create from the UTF string format. + * + * Results: + * Returns a pointer to the object's internal Unicode string. + * + * Side effects: + * Converts the object to have the String internal rep. + * + *---------------------------------------------------------------------- + */ + +Tcl_UniChar * +Tcl_GetUnicode(objPtr) + Tcl_Obj *objPtr; /* The object to find the unicode string for. */ +{ + String *stringPtr; + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + +/* if (objPtr->bytes == NULL) { */ +/* printf("called Tcl_GetUnicode with unicode str.\n"); */ +/* } else { */ +/* printf("called Tcl_GetUnicode with str = %s\n", objPtr->bytes); */ +/* } */ + + if ((stringPtr->numChars == -1) || (stringPtr->uallocated == 0)) { + + /* + * We haven't yet calculated the length, or all of the characters + * in the Utf string are 1 byte chars (so we didn't store the + * unicode str). Since this function must return a unicode string, + * and one has not yet been stored, force the Unicode to be + * calculated and stored now. + */ + + FillUnicodeRep(objPtr); + + /* + * We need to fetch the pointer again because we have just + * reallocated the structure to make room for the Unicode data. + */ + + stringPtr = GET_STRING(objPtr); + } + return stringPtr->unicode; +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_GetRange -- + * + * Create a Tcl Object that contains the chars between first and last + * of the object indicated by "objPtr". If the object is not already + * a String object, convert it to one. The first and last indices + * are assumed to be in the appropriate range. + * + * Results: + * Returns a new Tcl Object of the String type. + * + * Side effects: + * Changes the internal rep of "objPtr" to the String type. + * + *---------------------------------------------------------------------- + */ + +Tcl_Obj* +Tcl_GetRange(objPtr, first, last) + + Tcl_Obj *objPtr; /* The Tcl object to find the range of. */ + int first; /* First index of the range. */ + int last; /* Last index of the range. */ +{ + Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ + String *stringPtr; + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + + if (stringPtr->numChars == -1) { + + /* + * We haven't yet calculated the length, so we don't have the + * Unicode str. We need to know the number of chars before we + * can do indexing. + */ + + Tcl_GetCharLength(objPtr); + + /* + * We need to fetch the pointer again because we may have just + * reallocated the structure. + */ + + stringPtr = GET_STRING(objPtr); + } + + if (stringPtr->numChars == objPtr->length) { + char *str = Tcl_GetString(objPtr); + + /* + * All of the characters in the Utf string are 1 byte chars, + * so we don't store the unicode char. Create a new string + * object containing the specified range of chars. + */ + + newObjPtr = Tcl_NewStringObj(&str[first], last-first+1); + + /* + * Since we know the new string only has 1-byte chars, we + * can set it's numChars field. + */ + +/* stringPtr = GET_STRING(newObjPtr); */ +/* stringPtr->numChars = last-first+1; */ + } else { + newObjPtr = Tcl_NewUnicodeObj(stringPtr->unicode + first, + last-first+1); + } + return newObjPtr; +} + +/* *---------------------------------------------------------------------- * * Tcl_SetStringObj -- @@ -237,6 +622,7 @@ Tcl_SetStringObj(objPtr, bytes, length) length = (bytes? strlen(bytes) : 0); } TclInitStringRep(objPtr, bytes, length); +/* printf("called Tcl_SetStringObj with str = %s\n", objPtr->bytes); */ } /* @@ -272,15 +658,23 @@ Tcl_SetObjLength(objPtr, length) * terminating null byte. */ { char *new; + String *stringPtr; if (Tcl_IsShared(objPtr)) { panic("Tcl_SetObjLength called with shared object"); } - if (objPtr->typePtr != &tclStringType) { - ConvertToStringType(objPtr); - } - - if ((long)length > objPtr->internalRep.longValue) { + SetStringFromAny(NULL, objPtr); + + /* + * Invalidate the unicode data. + */ + + stringPtr = GET_STRING(objPtr); + stringPtr->numChars = -1; + stringPtr->uallocated = 0; + + if (length > stringPtr->allocated) { + /* * Not enough space in current string. Reallocate the string * space and free the old string. @@ -290,11 +684,13 @@ Tcl_SetObjLength(objPtr, length) if (objPtr->bytes != NULL) { memcpy((VOID *) new, (VOID *) objPtr->bytes, (size_t) objPtr->length); +/* new[objPtr->length] = 0; */ Tcl_InvalidateStringRep(objPtr); } objPtr->bytes = new; - objPtr->internalRep.longValue = (long) length; + stringPtr->allocated = length; } + objPtr->length = length; if ((objPtr->bytes != NULL) && (objPtr->bytes != tclEmptyStringRep)) { objPtr->bytes[length] = 0; @@ -302,6 +698,60 @@ Tcl_SetObjLength(objPtr, length) } /* + *--------------------------------------------------------------------------- + * + * TclSetUnicodeObj -- + * + * Modify an object to hold the Unicode string indicated by "unicode". + * + * Results: + * None. + * + * Side effects: + * Memory allocated for new "String" internal rep. + * + *--------------------------------------------------------------------------- + */ + +void +Tcl_SetUnicodeObj(objPtr, unicode, numChars) + Tcl_Obj *objPtr; /* The object to set the string of. */ + Tcl_UniChar *unicode; /* The unicode string used to initialize + * the object. */ + int numChars; /* Number of characters in the unicode + * string. */ +{ + Tcl_ObjType *typePtr; + String *stringPtr; + size_t uallocated = (numChars + 1) * sizeof(Tcl_UniChar); + + /* + * Free the internal rep if one exists, and invalidate the string rep. + */ + + typePtr = objPtr->typePtr; + if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) { + (*typePtr->freeIntRepProc)(objPtr); + } + objPtr->typePtr = &tclStringType; + + /* + * Allocate enough space for the String structure + Unicode string. + */ + + stringPtr = (String *) ckalloc(STRING_SIZE(uallocated)); + stringPtr->numChars = numChars; + stringPtr->uallocated = uallocated; + stringPtr->allocated = 0; + memcpy((VOID *) stringPtr->unicode, (VOID *) unicode, + (size_t) (numChars * sizeof(Tcl_UniChar))); + stringPtr->unicode[numChars] = 0; + SET_STRING(objPtr, stringPtr); + Tcl_InvalidateStringRep(objPtr); + return; +} + +/* *---------------------------------------------------------------------- * * Tcl_AppendToObj -- @@ -327,37 +777,106 @@ Tcl_AppendToObj(objPtr, bytes, length) * "bytes". If < 0, then append all bytes * up to NULL byte. */ { - int newLength, oldLength; + String *stringPtr; if (Tcl_IsShared(objPtr)) { panic("Tcl_AppendToObj called with shared object"); } - if (objPtr->typePtr != &tclStringType) { - ConvertToStringType(objPtr); - } + + SetStringFromAny(NULL, objPtr); + if (length < 0) { - length = (bytes? strlen(bytes) : 0); + length = (bytes ? strlen(bytes) : 0); } if (length == 0) { return; } - oldLength = objPtr->length; - newLength = length + oldLength; - if ((long)newLength > objPtr->internalRep.longValue) { - /* - * There isn't currently enough space in the string - * representation so allocate additional space. In fact, - * overallocate so that there is room for future growth without - * having to reallocate again. - */ - Tcl_SetObjLength(objPtr, 2*newLength); + /* + * TEMPORARY!!! This is terribly inefficient, but it works, and Don + * needs for me to check this stuff in ASAP. -Melissa + */ + +/* printf("called Tcl_AppendToObj with str = %s\n", bytes); */ + UpdateStringOfString(objPtr); + AppendUtfToUtfRep(objPtr, bytes, length); + return; + + /* + * If objPtr has a valid Unicode rep, then append the Unicode + * conversion of "bytes" to the objPtr's Unicode rep, otherwise + * append "bytes" to objPtr's string rep. + */ + + stringPtr = GET_STRING(objPtr); + if (stringPtr->allocated > 0) { + AppendUtfToUnicodeRep(objPtr, bytes, length); + + stringPtr = GET_STRING(objPtr); +/* printf(" ended Tcl_AppendToObj with %d unicode chars.\n", */ +/* stringPtr->numChars); */ + } else { + AppendUtfToUtfRep(objPtr, bytes, length); +/* printf(" ended Tcl_AppendToObj with str = %s\n", objPtr->bytes); */ } - if (length > 0) { - memcpy((VOID *) (objPtr->bytes + oldLength), (VOID *) bytes, - (size_t) length); - objPtr->length = newLength; - objPtr->bytes[objPtr->length] = 0; +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_AppendUnicodeToObj -- + * + * This procedure appends a Unicode string to an object in the + * most efficient manner possible. Length must be >= 0. + * + * Results: + * None. + * + * Side effects: + * Invalidates the string rep and creates a new Unicode string. + * + *---------------------------------------------------------------------- + */ + +void +Tcl_AppendUnicodeToObj(objPtr, unicode, length) + register Tcl_Obj *objPtr; /* Points to the object to append to. */ + Tcl_UniChar *unicode; /* The unicode string to append to the + * object. */ + int length; /* Number of chars in "unicode". */ +{ + String *stringPtr; + + if (Tcl_IsShared(objPtr)) { + panic("Tcl_AppendUnicodeToObj called with shared object"); + } + + if (length == 0) { + return; + } + + SetStringFromAny(NULL, objPtr); + + /* + * TEMPORARY!!! This is terribly inefficient, but it works, and Don + * needs for me to check this stuff in ASAP. -Melissa + */ + + UpdateStringOfString(objPtr); + AppendUnicodeToUtfRep(objPtr, unicode, length); + return; + + /* + * If objPtr has a valid Unicode rep, then append the "unicode" + * to the objPtr's Unicode rep, otherwise the UTF conversion of + * "unicode" to objPtr's string rep. + */ + + stringPtr = GET_STRING(objPtr); + if (stringPtr->allocated > 0) { + AppendUnicodeToUnicodeRep(objPtr, unicode, length); + } else { + AppendUnicodeToUtfRep(objPtr, unicode, length); } } @@ -367,6 +886,7 @@ Tcl_AppendToObj(objPtr, bytes, length) * Tcl_AppendObjToObj -- * * This procedure appends the string rep of one object to another. + * "objPtr" cannot be a shared object. * * Results: * None. @@ -383,7 +903,273 @@ Tcl_AppendObjToObj(objPtr, appendObjPtr) Tcl_Obj *objPtr; /* Points to the object to append to. */ Tcl_Obj *appendObjPtr; /* Object to append. */ { - TclAppendObjToUnicodeObj(objPtr, appendObjPtr); + String *stringPtr; + int length; + char *bytes; + + SetStringFromAny(NULL, objPtr); + + /* + * TEMPORARY!!! This is terribly inefficient, but it works, and Don + * needs for me to check this stuff in ASAP. -Melissa + */ + + UpdateStringOfString(objPtr); + bytes = Tcl_GetStringFromObj(appendObjPtr, &length); + AppendUtfToUtfRep(objPtr, bytes, length); + return; + + /* + * If objPtr has a valid Unicode rep, then get a Unicode string + * from appendObjPtr and append it. + */ + + stringPtr = GET_STRING(objPtr); + if (stringPtr->allocated > 0) { + + /* + * If appendObjPtr is not of the "String" type, don't convert it. + */ + + if (appendObjPtr->typePtr == &tclStringType) { + stringPtr = GET_STRING(appendObjPtr); + if ((stringPtr->numChars == -1) + || (stringPtr->uallocated == 0)) { + + /* + * If appendObjPtr is a string obj with no valide Unicode + * rep, then fill its unicode rep. + */ + + FillUnicodeRep(appendObjPtr); + stringPtr = GET_STRING(appendObjPtr); + } + AppendUnicodeToUnicodeRep(objPtr, stringPtr->unicode, + stringPtr->numChars); + } else { + bytes = Tcl_GetStringFromObj(appendObjPtr, &length); + AppendUtfToUnicodeRep(objPtr, bytes, length); + } + return; + } + + /* + * Append to objPtr's UTF string rep. + */ + + bytes = Tcl_GetStringFromObj(appendObjPtr, &length); + AppendUtfToUtfRep(objPtr, bytes, length); +} + +/* + *---------------------------------------------------------------------- + * + * AppendUnicodeToUnicodeRep -- + * + * This procedure appends the contents of "unicode" to the Unicode + * rep of "objPtr". objPtr must already have a valid Unicode rep. + * + * Results: + * None. + * + * Side effects: + * objPtr's internal rep is reallocated. + * + *---------------------------------------------------------------------- + */ + +static void +AppendUnicodeToUnicodeRep(objPtr, unicode, appendNumChars) + Tcl_Obj *objPtr; /* Points to the object to append to. */ + Tcl_UniChar *unicode; /* String to append. */ + int appendNumChars; /* Number of chars of "unicode" to append. */ +{ + String *stringPtr; + int numChars; + size_t newSize; + + if (appendNumChars == 0) { + return; + } + + SetStringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + + /* + * Make the buffer big enough for the result. + */ + + numChars = stringPtr->numChars + appendNumChars; + newSize = (numChars + 1) * sizeof(Tcl_UniChar); + + if (newSize > stringPtr->uallocated) { + stringPtr->uallocated = newSize * 2; + stringPtr = (String *) ckrealloc((char*)stringPtr, + STRING_SIZE(stringPtr->uallocated)); + SET_STRING(objPtr, stringPtr); + } + + /* + * Copy the new string onto the end of the old string, then add the + * trailing null. + */ + + memcpy((VOID*) (stringPtr->unicode + stringPtr->numChars), unicode, + appendNumChars * sizeof(Tcl_UniChar)); + stringPtr->unicode[numChars] = 0; + stringPtr->numChars = numChars; + + SET_STRING(objPtr, stringPtr); + Tcl_InvalidateStringRep(objPtr); +} + +/* + *---------------------------------------------------------------------- + * + * AppendUnicodeToUtfRep -- + * + * This procedure converts the contents of "unicode" to UTF and + * appends the UTF to the string rep of "objPtr". + * + * Results: + * None. + * + * Side effects: + * objPtr's internal rep is reallocated. + * + *---------------------------------------------------------------------- + */ + +static void +AppendUnicodeToUtfRep(objPtr, unicode, numChars) + Tcl_Obj *objPtr; /* Points to the object to append to. */ + Tcl_UniChar *unicode; /* String to convert to UTF. */ + int numChars; /* Number of chars of "unicode" to convert. */ +{ + Tcl_DString dsPtr; + int length = numChars * sizeof(Tcl_UniChar); + char *bytes; + + if (numChars == 0) { + return; + } + + Tcl_DStringInit(&dsPtr); + bytes = (char *)Tcl_UniCharToUtfDString(unicode, numChars, &dsPtr); + AppendUtfToUtfRep(objPtr, bytes, Tcl_DStringLength(&dsPtr)); + Tcl_DStringFree(&dsPtr); +} + +/* + *---------------------------------------------------------------------- + * + * AppendUtfToUnicodeRep -- + * + * This procedure converts the contents of "bytes" to Unicode and + * appends the Unicode to the Unicode rep of "objPtr". objPtr must + * already have a valid Unicode rep. + * + * Results: + * None. + * + * Side effects: + * objPtr's internal rep is reallocated. + * + *---------------------------------------------------------------------- + */ + +static void +AppendUtfToUnicodeRep(objPtr, bytes, numBytes) + Tcl_Obj *objPtr; /* Points to the object to append to. */ + char *bytes; /* String to convert to Unicode. */ + int numBytes; /* Number of bytes of "bytes" to convert. */ +{ + Tcl_DString dsPtr; + int numChars; + Tcl_UniChar *unicode; + + if (numBytes < 0) { + numBytes = (bytes ? strlen(bytes) : 0); + } + if (numBytes == 0) { + return; + } + + Tcl_DStringInit(&dsPtr); + numChars = Tcl_NumUtfChars(bytes, numBytes); + unicode = (Tcl_UniChar *)Tcl_UtfToUniCharDString(bytes, numBytes, &dsPtr); + AppendUnicodeToUnicodeRep(objPtr, unicode, numChars); + Tcl_DStringFree(&dsPtr); +} + +/* + *---------------------------------------------------------------------- + * + * AppendUtfToUtfRep -- + * + * This procedure appends "numBytes" bytes of "bytes" to the UTF string + * rep of "objPtr". objPtr must already have a valid String rep. + * + * Results: + * None. + * + * Side effects: + * objPtr's internal rep is reallocated. + * + *---------------------------------------------------------------------- + */ + +static void +AppendUtfToUtfRep(objPtr, bytes, numBytes) + Tcl_Obj *objPtr; /* Points to the object to append to. */ + char *bytes; /* String to append. */ + int numBytes; /* Number of bytes of "bytes" to append. */ +{ + String *stringPtr; + int newLength, oldLength; + + if (numBytes < 0) { + numBytes = (bytes ? strlen(bytes) : 0); + } + if (numBytes == 0) { + return; + } + + /* + * Copy the new string onto the end of the old string, then add the + * trailing null. + */ + + oldLength = objPtr->length; + newLength = numBytes + oldLength; + + stringPtr = GET_STRING(objPtr); + if (newLength > stringPtr->allocated) { + + /* + * There isn't currently enough space in the string + * representation so allocate additional space. If the current + * string representation isn't empty (i.e. it looks like we're + * doing a series of appends) then overallocate the space so + * that we won't have to do as much reallocation in the future. + */ + + Tcl_SetObjLength(objPtr, + (oldLength == 0) ? newLength : 2*newLength); + } else { + + /* + * Invalidate the unicode data. + */ + + stringPtr->numChars = -1; + stringPtr->uallocated = 0; + } + + memcpy((VOID *) (objPtr->bytes + oldLength), (VOID *) bytes, + (size_t) numBytes); + objPtr->bytes[newLength] = 0; + objPtr->length = newLength; } /* @@ -409,6 +1195,7 @@ Tcl_AppendStringsToObjVA (objPtr, argList) Tcl_Obj *objPtr; /* Points to the object to append to. */ va_list argList; /* Variable argument list. */ { + String *stringPtr; va_list tmpArgList; int newLength, oldLength; register char *string, *dst; @@ -416,9 +1203,8 @@ Tcl_AppendStringsToObjVA (objPtr, argList) if (Tcl_IsShared(objPtr)) { panic("Tcl_AppendStringsToObj called with shared object"); } - if (objPtr->typePtr != &tclStringType) { - ConvertToStringType(objPtr); - } + + SetStringFromAny(NULL, objPtr); /* * Figure out how much space is needed for all the strings, and @@ -440,7 +1226,9 @@ Tcl_AppendStringsToObjVA (objPtr, argList) return; } - if ((long)newLength > objPtr->internalRep.longValue) { + stringPtr = GET_STRING(objPtr); + if (newLength > stringPtr->allocated) { + /* * There isn't currently enough space in the string * representation so allocate additional space. If the current @@ -514,45 +1302,64 @@ Tcl_AppendStringsToObj TCL_VARARGS_DEF(Tcl_Obj *,arg1) } /* - *---------------------------------------------------------------------- + *--------------------------------------------------------------------------- * - * ConvertToStringType -- + * FillUnicodeRep -- * - * This procedure converts the internal representation of an object - * to "expandable string" type. + * Populate the Unicode internal rep with the Unicode form of its string + * rep. The object must alread have a "String" internal rep. * * Results: * None. * * Side effects: - * Any old internal reputation for objPtr is freed and the - * internal representation is set to that for an expandable string - * (the field internalRep.longValue holds 1 less than the allocated - * length of objPtr's string representation). + * Reallocates the String internal rep. * - *---------------------------------------------------------------------- + *--------------------------------------------------------------------------- */ static void -ConvertToStringType(objPtr) - register Tcl_Obj *objPtr; /* Pointer to object. Must have a - * typePtr that isn't &tclStringType. */ +FillUnicodeRep(objPtr) + Tcl_Obj *objPtr; /* The object in which to fill the unicode rep. */ { - if (objPtr->typePtr != NULL) { - if (objPtr->bytes == NULL) { - objPtr->typePtr->updateStringProc(objPtr); - } - if (objPtr->typePtr->freeIntRepProc != NULL) { - objPtr->typePtr->freeIntRepProc(objPtr); - } + String *stringPtr; + size_t uallocated; + char *src, *srcEnd; + Tcl_UniChar *dst; + src = objPtr->bytes; + + stringPtr = GET_STRING(objPtr); + if (stringPtr->numChars == -1) { + stringPtr->numChars = Tcl_NumUtfChars(src, objPtr->length); } - objPtr->typePtr = &tclStringType; - if (objPtr->bytes != NULL) { - objPtr->internalRep.longValue = (long)objPtr->length; - } else { - objPtr->internalRep.longValue = 0; - objPtr->length = 0; + + uallocated = stringPtr->numChars * sizeof(Tcl_UniChar); + if (uallocated > stringPtr->uallocated) { + + /* + * If not enought space has been allocated for the unicode rep, + * reallocate the internal rep object with double the amount of + * space needed, so the unicode string can grow without being + * reallocated. + */ + + uallocated *= 2; + stringPtr = (String *) ckrealloc((char*) stringPtr, + STRING_SIZE(uallocated)); + stringPtr->uallocated = uallocated; + } + + /* + * Convert src to Unicode and store the coverted data in "unicode". + */ + + srcEnd = src + objPtr->length; + for (dst = stringPtr->unicode; src < srcEnd; dst++) { + src += Tcl_UtfToUniChar(src, dst); } + *dst = 0; + + SET_STRING(objPtr, stringPtr); } /* @@ -581,13 +1388,40 @@ DupStringInternalRep(srcPtr, copyPtr) register Tcl_Obj *copyPtr; /* Object with internal rep to set. Must * not currently have an internal rep.*/ { + String *srcStringPtr = GET_STRING(srcPtr); + String *copyStringPtr; + + /* + * If the src obj is a string of 1-byte Utf chars, then copy the + * string rep of the source object and create an "empty" Unicode + * internal rep for the new object. Otherwise, copy Unicode + * internal rep, and invalidate the string rep of the new object. + */ + + if (srcStringPtr->numChars == srcPtr->length) { + copyStringPtr = (String *) ckalloc(STRING_SIZE(0)); + copyStringPtr->uallocated = 0; + } else { + copyStringPtr = (String *) ckalloc( + STRING_SIZE(srcStringPtr->uallocated)); + copyStringPtr->uallocated = srcStringPtr->uallocated; + + memcpy((VOID *) copyStringPtr->unicode, + (VOID *) srcStringPtr->unicode, + (size_t) srcStringPtr->numChars * sizeof(Tcl_UniChar)); + copyStringPtr->unicode[srcStringPtr->numChars] = 0; + } + copyStringPtr->numChars = srcStringPtr->numChars; + /* * Tricky point: the string value was copied by generic object * management code, so it doesn't contain any extra bytes that * might exist in the source object. */ - copyPtr->internalRep.longValue = (long)copyPtr->length; + copyStringPtr->allocated = copyPtr->length; + + SET_STRING(copyPtr, copyStringPtr); copyPtr->typePtr = &tclStringType; } @@ -596,15 +1430,14 @@ DupStringInternalRep(srcPtr, copyPtr) * * SetStringFromAny -- * - * Create an internal representation of type "expandable string" - * for an object. + * Create an internal representation of type "String" for an object. * * Results: * This operation always succeeds and returns TCL_OK. * * Side effects: - * This procedure does nothing; there is no advantage in converting - * the internal representation now, so we just defer it. + * Any old internal reputation for objPtr is freed and the + * internal representation is set to "String". * *---------------------------------------------------------------------- */ @@ -614,6 +1447,42 @@ SetStringFromAny(interp, objPtr) Tcl_Interp *interp; /* Used for error reporting if not NULL. */ Tcl_Obj *objPtr; /* The object to convert. */ { + String *stringPtr; + + /* + * The Unicode object is opitmized for the case where each UTF char + * in a string is only one byte. In this case, we store the value of + * numChars, but we don't copy the bytes to the unicodeObj->unicode. + */ + + if (objPtr->typePtr != &tclStringType) { + + if (objPtr->typePtr != NULL) { + if (objPtr->bytes == NULL) { + objPtr->typePtr->updateStringProc(objPtr); + } + if ((objPtr->typePtr->freeIntRepProc) != NULL) { + (*objPtr->typePtr->freeIntRepProc)(objPtr); + } + } + objPtr->typePtr = &tclStringType; + + /* + * Allocate enough space for the basic String structure. + */ + + stringPtr = (String *) ckalloc(STRING_SIZE(0)); + stringPtr->numChars = -1; + stringPtr->uallocated = 0; + + if (objPtr->bytes != NULL) { + stringPtr->allocated = objPtr->length; + objPtr->bytes[objPtr->length] = 0; + } else { + objPtr->length = 0; + } + SET_STRING(objPtr, stringPtr); + } return TCL_OK; } @@ -623,13 +1492,14 @@ SetStringFromAny(interp, objPtr) * UpdateStringOfString -- * * Update the string representation for an object whose internal - * representation is "expandable string". + * representation is "String". * * Results: * None. * * Side effects: - * None. + * The object's string may be set by converting its Unicode + * represention to UTF format. * *---------------------------------------------------------------------- */ @@ -638,16 +1508,73 @@ static void UpdateStringOfString(objPtr) Tcl_Obj *objPtr; /* Object with string rep to update. */ { - /* - * The string is almost always valid already, in which case there's - * nothing for us to do. The only case we have to worry about is if - * the object is totally null. In this case, set the string rep to - * an empty string. - */ + int i, length, size; + Tcl_UniChar *unicode; + char dummy[TCL_UTF_MAX]; + char *dst; + String *stringPtr; - if (objPtr->bytes == NULL) { - objPtr->bytes = tclEmptyStringRep; - objPtr->length = 0; + stringPtr = GET_STRING(objPtr); + if ((objPtr->bytes == NULL) || (stringPtr->allocated == 0)) { + + if (stringPtr->numChars <= 0) { + + /* + * If there is no Unicode rep, or the string has 0 chars, + * then set the string rep to an empty string. + */ + + objPtr->bytes = tclEmptyStringRep; + objPtr->length = 0; + return; + } + + unicode = stringPtr->unicode; + length = stringPtr->numChars * sizeof(Tcl_UniChar); + + /* + * Translate the Unicode string to UTF. "size" will hold the + * amount of space the UTF string needs. + */ + + size = 0; + for (i = 0; i < stringPtr->numChars; i++) { + size += Tcl_UniCharToUtf((int) unicode[i], dummy); + } + + dst = (char *) ckalloc((unsigned) (size + 1)); + objPtr->bytes = dst; + objPtr->length = size; + stringPtr->allocated = size; + + for (i = 0; i < stringPtr->numChars; i++) { + dst += Tcl_UniCharToUtf(unicode[i], dst); + } + *dst = '\0'; } return; } + +/* + *---------------------------------------------------------------------- + * + * FreeStringInternalRep -- + * + * Deallocate the storage associated with a String data object's + * internal representation. + * + * Results: + * None. + * + * Side effects: + * Frees memory. + * + *---------------------------------------------------------------------- + */ + +static void +FreeStringInternalRep(objPtr) + Tcl_Obj *objPtr; /* Object with internal rep to free. */ +{ + ckfree((char *) GET_STRING(objPtr)); +} diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 9a3b4f3..a6f7e45 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclStubInit.c,v 1.17 1999/06/10 04:28:51 stanton Exp $ + * RCS: @(#) $Id: tclStubInit.c,v 1.18 1999/06/15 01:16:25 hershey Exp $ */ #include "tclInt.h" @@ -758,6 +758,13 @@ TclStubs tclStubs = { Tcl_UniCharIsPunct, /* 375 */ Tcl_RegExpMatchObj, /* 376 */ Tcl_RegExpGetInfo, /* 377 */ + Tcl_NewUnicodeObj, /* 378 */ + Tcl_SetUnicodeObj, /* 379 */ + Tcl_GetCharLength, /* 380 */ + Tcl_GetUniChar, /* 381 */ + Tcl_GetUnicode, /* 382 */ + Tcl_GetRange, /* 383 */ + Tcl_AppendUnicodeToObj, /* 384 */ }; /* !END!: Do not edit above this line. */ diff --git a/generic/tclUnicodeObj.c b/generic/tclUnicodeObj.c deleted file mode 100644 index 1809b20..0000000 --- a/generic/tclUnicodeObj.c +++ /dev/null @@ -1,882 +0,0 @@ -/* - * tclUnicodeObj.c -- - * - * This file contains the implementation of the Unicode internal - * representation of Tcl objects. - * - * Copyright (c) 1999 by Scriptics Corporation. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: tclUnicodeObj.c,v 1.6 1999/06/10 19:14:54 hershey Exp $ - */ - -#include <math.h> -#include "tclInt.h" -#include "tclPort.h" - -/* - * Prototypes for local procedures defined in this file: - */ - -static int AllSingleByteChars _ANSI_ARGS_((Tcl_Obj *objPtr)); -static void AppendUniCharStrToObj _ANSI_ARGS_((Tcl_Obj *objPtr, - Tcl_UniChar *unichars, int numNewChars)); -static void DupUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr, - Tcl_Obj *copyPtr)); -static void FreeUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr)); -static void UpdateStringOfUnicode _ANSI_ARGS_((Tcl_Obj *objPtr)); -static void SetOptUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr, - int numChars)); -static void SetFullUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr, - char *src, int numBytes, int numChars)); -static int SetUnicodeFromAny _ANSI_ARGS_((Tcl_Interp *interp, - Tcl_Obj *objPtr)); - -/* - * The following object type represents a Unicode string. A Unicode string - * is an internationalized string. Conceptually, a Unicode string is an - * array of 16-bit quantities organized as a sequence of properly formed - * UTF-8 characters. There is a one-to-one map between Unicode and UTF - * characters. The Unicode ojbect is opitmized for the case where each UTF - * char in a string is only one byte. In this case, we store the value of - * numChars, but we don't copy the bytes to the unicodeObj->chars. Before - * accessing obj->chars, check if unicodeObj->numChars == obj->length. - */ - -Tcl_ObjType tclUnicodeType = { - "unicode", - FreeUnicodeInternalRep, - DupUnicodeInternalRep, - UpdateStringOfUnicode, - SetUnicodeFromAny -}; - -/* - * The following structure is the internal rep for a Unicode object. - * Keeps track of how much memory has been used and how much has been - * allocated for the Unicode to enable growing and shrinking of the - * Unicode object with fewer mallocs. - */ - -typedef struct Unicode { - int numChars; /* The number of chars in the unicode - * string. */ - size_t allocated; /* The amount of space actually allocated. */ - Tcl_UniChar chars[2]; /* The array of chars. The actual size of - * this field depends on the 'allocated' field - * above. */ -} Unicode; - -#define UNICODE_SIZE(len) \ - ((unsigned) (sizeof(Unicode) - (sizeof(Tcl_UniChar)*2) + (len))) -#define GET_UNICODE(objPtr) \ - ((Unicode *) (objPtr)->internalRep.otherValuePtr) -#define SET_UNICODE(objPtr, unicodePtr) \ - (objPtr)->internalRep.otherValuePtr = (VOID *) (unicodePtr) - - -/* - *---------------------------------------------------------------------- - * - * TclGetUnicodeFromObj -- - * - * Get the index'th Unicode character from the Unicode object. If - * the object is not already a Unicode object, an attempt will be - * made to convert it to one. The index is assumed to be in the - * appropriate range. - * - * Results: - * Returns a pointer to the object's internal unicode string. - * - * Side effects: - * Converts the object to have the Unicode internal rep. - * - *---------------------------------------------------------------------- - */ - -Tcl_UniChar * -TclGetUnicodeFromObj(objPtr) - Tcl_Obj *objPtr; /* The object to find the unicode string for. */ -{ - Unicode *unicodePtr; - int numBytes; - char *src; - - SetUnicodeFromAny(NULL, objPtr); - unicodePtr = GET_UNICODE(objPtr); - - if (unicodePtr->allocated == 0) { - - /* - * If all of the characters in the Utf string are 1 byte chars, - * we don't normally store the unicode str. Since this - * function must return a unicode string, and one has not yet - * been stored, force the Unicode to be calculated and stored - * now. - */ - - src = Tcl_GetStringFromObj(objPtr, &numBytes); - SetFullUnicodeFromAny(objPtr, src, numBytes, unicodePtr->numChars); - - /* - * We need to fetch the pointer again because we have just - * reallocated the structure to make room for the Unicode data. - */ - - unicodePtr = GET_UNICODE(objPtr); - } - return unicodePtr->chars; -} - -/* - *---------------------------------------------------------------------- - * - * TclGetUnicodeLengthFromObj -- - * - * Get the length of the Unicode string from the Tcl object. If - * the object is not already a Unicode object, an attempt will be - * made to convert it to one. - * - * Results: - * Pointer to unicode string representing the unicode object. - * - * Side effects: - * Frees old internal rep. Allocates memory for new internal rep. - * - *---------------------------------------------------------------------- - */ - -int -TclGetUnicodeLengthFromObj(objPtr) - Tcl_Obj *objPtr; /* The Unicode object. */ -{ - int length; - Unicode *unicodePtr; - - SetUnicodeFromAny(NULL, objPtr); - unicodePtr = GET_UNICODE(objPtr); - - length = unicodePtr->numChars; - return length; -} - -/* - *---------------------------------------------------------------------- - * - * TclGetUniCharFromObj -- - * - * Get the index'th Unicode character from the Unicode object. If - * the object is not already a Unicode object, an attempt will be - * made to convert it to one. The index is assumed to be in the - * appropriate range. - * - * Results: - * Returns the index'th Unicode character in the Object. - * - * Side effects: - * Fills unichar with the index'th Unicode character. - * - *---------------------------------------------------------------------- - */ - -Tcl_UniChar -TclGetUniCharFromObj(objPtr, index) - Tcl_Obj *objPtr; /* The Unicode object. */ - int index; /* Get the index'th character. */ -{ - Tcl_UniChar unichar; - Unicode *unicodePtr; - int length; - - SetUnicodeFromAny(NULL, objPtr); - unicodePtr = GET_UNICODE(objPtr); - length = objPtr->length; - - if (AllSingleByteChars(objPtr)) { - int length; - char *str; - - /* - * All of the characters in the Utf string are 1 byte chars, - * so we don't store the unicode char. We get the Utf string - * and convert the index'th byte to a Unicode character. - */ - - str = Tcl_GetStringFromObj(objPtr, &length); - Tcl_UtfToUniChar(&str[index], &unichar); - } else { - unichar = unicodePtr->chars[index]; - } - return unichar; -} - -/* - *---------------------------------------------------------------------- - * - * TclGetRangeFromObj -- - * - * Create a Tcl Object that contains the chars between first and last - * of the object indicated by "objPtr". If the object is not already - * a Unicode object, an attempt will be made to convert it to one. - * The first and last indices are assumed to be in the appropriate - * range. - * - * Results: - * Returns a new Tcl Object of either "string" or "unicode" type, - * containing the range of chars. - * - * Side effects: - * Changes the internal rep of "objPtr" to unicode. - * - *---------------------------------------------------------------------- - */ - -Tcl_Obj* -TclGetRangeFromObj(objPtr, first, last) - - Tcl_Obj *objPtr; /* The Tcl object to find the range of. */ - int first; /* First index of the range. */ - int last; /* Last index of the range. */ -{ - Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ - Unicode *unicodePtr; - int length; - - SetUnicodeFromAny(NULL, objPtr); - unicodePtr = GET_UNICODE(objPtr); - length = objPtr->length; - - if (unicodePtr->numChars != length) { - newObjPtr = TclNewUnicodeObj(unicodePtr->chars + first, last-first+1); - } else { - int length; - char *str; - - /* - * All of the characters in the Utf string are 1 byte chars, - * so we don't store the unicode char. Create a new string - * object containing the specified range of chars. - */ - - str = Tcl_GetStringFromObj(objPtr, &length); - newObjPtr = Tcl_NewStringObj(&str[first], last-first+1); - } - return newObjPtr; -} - -/* - *---------------------------------------------------------------------- - * - * TclAppendObjToUnicodeObj -- - * - * This procedure appends the contents of "srcObjPtr" to the Unicode - * object "destPtr". - * - * Results: - * None. - * - * Side effects: - * If srcObjPtr doesn't have an internal rep, then it is given a - * Unicode internal rep. - * - *---------------------------------------------------------------------- - */ - -Tcl_Obj * -TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr) - register Tcl_Obj *targetObjPtr; /* Points to the object to - * append to. */ - register Tcl_Obj *srcObjPtr; /* Points to the object to - * append from. */ -{ - int numBytes, numChars; - Tcl_Obj *resultObjPtr; - char *utfSrcStr; - Tcl_UniChar *unicharSrcStr; - Unicode *unicodePtr; - Tcl_DString dsPtr; - - /* - * Duplicate the target if it is shared. - * Change the result's internal rep to Unicode object. - */ - - if (Tcl_IsShared(targetObjPtr)) { - resultObjPtr = Tcl_DuplicateObj(targetObjPtr); - } else { - resultObjPtr = targetObjPtr; - } - SetUnicodeFromAny(NULL, resultObjPtr); - - /* - * Case where target chars are 1 byte long: - * If src obj is of "string" or null type, then convert it to "unicode" - * type. Src objs of other types (such as int) are left in tact to keep - * them from shimmering between types. If the src obj is a unichar obj, - * and all src chars are also 1 byte long, the src string is appended to - * the target "unicode" obj, and the target obj maintains its "optimized" - * status. - */ - - if (AllSingleByteChars(resultObjPtr)) { - - int length; - char *stringRep; - - if (srcObjPtr->typePtr == &tclStringType - || srcObjPtr->typePtr == NULL) { - SetUnicodeFromAny(NULL, srcObjPtr); - } - - stringRep = Tcl_GetStringFromObj(srcObjPtr, &length); - Tcl_AppendToObj(resultObjPtr, stringRep, length); - - if ((srcObjPtr->typePtr == &tclUnicodeType) - && (AllSingleByteChars(srcObjPtr))) { - SetOptUnicodeFromAny(resultObjPtr, resultObjPtr->length); - } - return resultObjPtr; - } - - /* - * Extract a unicode string from "unicode" or "string" type objects. - * Extract the utf string from non-unicode objects, and convert the - * utf string to unichar string locally. - * If the src obj is a "string" obj, convert it to "unicode" type. - * Src objs of other types (such as int) are left in tact to keep - * them from shimmering between types. - */ - - Tcl_DStringInit(&dsPtr); - if (srcObjPtr->typePtr == &tclStringType || srcObjPtr->typePtr == NULL) { - SetUnicodeFromAny(NULL, srcObjPtr); - } - if (srcObjPtr->typePtr == &tclUnicodeType) { - if (AllSingleByteChars(srcObjPtr)) { - - unicodePtr = GET_UNICODE(srcObjPtr); - numChars = unicodePtr->numChars; - - utfSrcStr = Tcl_GetStringFromObj(srcObjPtr, &numBytes); - unicharSrcStr = (Tcl_UniChar *)Tcl_UtfToUniCharDString(utfSrcStr, - numBytes, &dsPtr); - } else { - unicodePtr = GET_UNICODE(srcObjPtr); - numChars = unicodePtr->numChars; - unicharSrcStr = unicodePtr->chars; - } - } else { - utfSrcStr = Tcl_GetStringFromObj(srcObjPtr, &numBytes); - numChars = Tcl_NumUtfChars(utfSrcStr, numBytes); - unicharSrcStr = (Tcl_UniChar *)Tcl_UtfToUniCharDString(utfSrcStr, - numBytes, &dsPtr); - } - if (numChars == 0) { - return resultObjPtr; - } - - /* - * Append the unichar src string to the result object. - */ - - AppendUniCharStrToObj(resultObjPtr, unicharSrcStr, numChars); - Tcl_DStringFree(&dsPtr); - return resultObjPtr; -} - -/* - *---------------------------------------------------------------------- - * - * AppendUniCharStrToObj -- - * - * This procedure appends the contents of "srcObjPtr" to the - * Unicode object "objPtr". - * - * Results: - * None. - * - * Side effects: - * If srcObjPtr doesn't have an internal rep, then it is given a - * Unicode internal rep. - * - *---------------------------------------------------------------------- - */ - -static void -AppendUniCharStrToObj(objPtr, unichars, numNewChars) - register Tcl_Obj *objPtr; /* Points to the object to append to. */ - Tcl_UniChar *unichars; /* The unicode string to append to the - * object. */ - int numNewChars; /* Number of chars in "unichars". */ -{ - Unicode *unicodePtr; - int numChars; - size_t numBytes; - - SetUnicodeFromAny(NULL, objPtr); - unicodePtr = GET_UNICODE(objPtr); - - numChars = numNewChars + unicodePtr->numChars; - numBytes = (numChars + 1) * sizeof(Tcl_UniChar); - - if (unicodePtr->allocated < numBytes) { - int allocatedBytes = numBytes * 2; - - /* - * There isn't currently enough space in the Unicode - * representation so allocate additional space. In fact, - * overallocate so that there is room for future growth without - * having to reallocate again. - */ - - unicodePtr = (Unicode *) ckrealloc((char*) unicodePtr, - UNICODE_SIZE(allocatedBytes)); - unicodePtr->allocated = allocatedBytes; - unicodePtr = SET_UNICODE(objPtr, unicodePtr); - } - memcpy((VOID *) (unicodePtr->chars + unicodePtr->numChars), - (VOID *) unichars, (size_t) numNewChars * sizeof(Tcl_UniChar)); - unicodePtr->chars[numChars] = 0; - unicodePtr->numChars = numChars; - - /* - * Invalidate the StringRep. - */ - - Tcl_InvalidateStringRep(objPtr); -} - -/* - *---------------------------------------------------------------------- - * - * TclAppendUnicodeToObj -- - * - * This procedure appends a Unicode string to an object in the - * most efficient manner possible. - * - * Results: - * None. - * - * Side effects: - * Invalidates the string rep and creates a new Unicode string. - * - *---------------------------------------------------------------------- - */ - -void -TclAppendUnicodeToObj(objPtr, unichars, length) - register Tcl_Obj *objPtr; /* Points to the object to append to. */ - Tcl_UniChar *unichars; /* The unicode string to append to the - * object. */ - int length; /* Number of chars in "unichars". */ -{ - Unicode *unicodePtr; - int numChars, i; - size_t newSize; - char *src; - Tcl_UniChar *dst; - - if (Tcl_IsShared(objPtr)) { - panic("TclAppendUnicodeToObj called with shared object"); - } - - SetUnicodeFromAny(NULL, objPtr); - unicodePtr = GET_UNICODE(objPtr); - - /* - * Make the buffer big enough for the result. - */ - - numChars = unicodePtr->numChars + length; - newSize = (numChars + 1) * sizeof(Tcl_UniChar); - - if (newSize > unicodePtr->allocated) { - int allocated = newSize * 2; - - unicodePtr = (Unicode *) ckrealloc((char*)unicodePtr, - UNICODE_SIZE(allocated)); - - if (unicodePtr->allocated == 0) { - /* - * If the original string was not in Unicode form, add it to the - * beginning of the buffer. - */ - - src = objPtr->bytes; - dst = unicodePtr->chars; - for (i = 0; i < unicodePtr->numChars; i++) { - src += Tcl_UtfToUniChar(src, dst++); - } - } - unicodePtr->allocated = allocated; - } - - /* - * Copy the new string onto the end of the old string, then add the - * trailing null. - */ - - memcpy((VOID*) (unicodePtr->chars + unicodePtr->numChars), unichars, - length * sizeof(Tcl_UniChar)); - unicodePtr->numChars = numChars; - unicodePtr->chars[numChars] = 0; - - SET_UNICODE(objPtr, unicodePtr); - - Tcl_InvalidateStringRep(objPtr); -} - -/* - *--------------------------------------------------------------------------- - * - * TclNewUnicodeObj -- - * - * This procedure is creates a new Unicode object and initializes - * it from the given Utf String. If the Utf String is the same size - * as the Unicode string, don't duplicate the data. - * - * Results: - * The newly created object is returned. This object will have no - * initial string representation. The returned object has a ref count - * of 0. - * - * Side effects: - * Memory allocated for new object and copy of Unicode argument. - * - *--------------------------------------------------------------------------- - */ - -Tcl_Obj * -TclNewUnicodeObj(unichars, numChars) - Tcl_UniChar *unichars; /* The unicode string used to initialize - * the new object. */ - int numChars; /* Number of characters in the unicode - * string. */ -{ - Tcl_Obj *objPtr; - Unicode *unicodePtr; - int numBytes, allocated; - - numBytes = numChars * sizeof(Tcl_UniChar); - - /* - * Allocate extra space for the null character - */ - - allocated = numBytes + sizeof(Tcl_UniChar); - - TclNewObj(objPtr); - objPtr->bytes = NULL; - objPtr->typePtr = &tclUnicodeType; - - unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(allocated)); - unicodePtr->numChars = numChars; - unicodePtr->allocated = allocated; - memcpy((VOID *) unicodePtr->chars, (VOID *) unichars, (size_t) numBytes); - unicodePtr->chars[numChars] = 0; - SET_UNICODE(objPtr, unicodePtr); - return objPtr; -} - -/* - *--------------------------------------------------------------------------- - * - * TclAllSingleByteChars -- - * - * Initialize the internal representation of a Unicode Tcl_Obj - * to a copy of the internal representation of an existing Unicode - * object. - * - * Results: - * None. - * - * Side effects: - * Allocates memory. - * - *--------------------------------------------------------------------------- - */ - -static int -AllSingleByteChars(objPtr) - Tcl_Obj *objPtr; /* Object whose char lengths to check. */ -{ - Unicode *unicodePtr; - int numBytes, numChars; - - unicodePtr = GET_UNICODE(objPtr); - numChars = unicodePtr->numChars; - numBytes = objPtr->length; - - if (numChars == numBytes) { - return 1; - } else { - return 0; - } -} - -/* - *--------------------------------------------------------------------------- - * - * DupUnicodeInternalRep -- - * - * Initialize the internal representation of a Unicode Tcl_Obj - * to a copy of the internal representation of an existing Unicode - * object. - * - * Results: - * None. - * - * Side effects: - * Allocates memory. - * - *--------------------------------------------------------------------------- - */ - -static void -DupUnicodeInternalRep(srcPtr, copyPtr) - Tcl_Obj *srcPtr; /* Object with internal rep to copy. */ - Tcl_Obj *copyPtr; /* Object with internal rep to set. */ -{ - Unicode *srcUnicodePtr = GET_UNICODE(srcPtr); - Unicode *copyUnicodePtr; - - /* - * If the src obj is a string of 1-byte Utf chars, then copy the - * string rep of the source object and create an "empty" Unicode - * internal rep for the new object. Otherwise, copy Unicode - * internal rep, and invalidate the string rep of the new object. - */ - - if (srcUnicodePtr->numChars == srcPtr->length) { - copyUnicodePtr = (Unicode *) ckalloc(sizeof(Unicode)); - copyUnicodePtr->allocated = 0; - } else { - int allocated = srcUnicodePtr->allocated; - - copyUnicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(allocated)); - - copyUnicodePtr->allocated = allocated; - memcpy((VOID *) copyUnicodePtr->chars, - (VOID *) srcUnicodePtr->chars, - (size_t) (srcUnicodePtr->numChars + 1) * sizeof(Tcl_UniChar)); - } - copyUnicodePtr->numChars = srcUnicodePtr->numChars; - SET_UNICODE(copyPtr, copyUnicodePtr); - copyPtr->typePtr = &tclUnicodeType; -} - -/* - *--------------------------------------------------------------------------- - * - * UpdateStringOfUnicode -- - * - * Update the string representation for a Unicode data object. - * Note: This procedure does not invalidate an existing old string rep - * so storage will be lost if this has not already been done. - * - * Results: - * None. - * - * Side effects: - * The object's string is set to a valid string that results from - * the Unicode-to-string conversion. - * - * The object becomes a string object -- the internal rep is - * discarded and the typePtr becomes NULL. - * - *--------------------------------------------------------------------------- - */ - -static void -UpdateStringOfUnicode(objPtr) - Tcl_Obj *objPtr; /* Unicode object whose string rep to - * update. */ -{ - int i, length, size; - Tcl_UniChar *src; - char dummy[TCL_UTF_MAX]; - char *dst; - Unicode *unicodePtr; - - unicodePtr = GET_UNICODE(objPtr); - src = unicodePtr->chars; - length = unicodePtr->numChars * sizeof(Tcl_UniChar); - - /* - * How much space will string rep need? - */ - - size = 0; - for (i = 0; i < unicodePtr->numChars; i++) { - size += Tcl_UniCharToUtf((int) src[i], dummy); - } - - dst = (char *) ckalloc((unsigned) (size + 1)); - objPtr->bytes = dst; - objPtr->length = size; - - for (i = 0; i < unicodePtr->numChars; i++) { - dst += Tcl_UniCharToUtf(src[i], dst); - } - *dst = '\0'; -} - -/* - *--------------------------------------------------------------------------- - * - * SetOptUnicodeFromAny -- - * - * Generate the optimized Unicode internal rep from the string rep. - * - * Results: - * None. - * - * Side effects: - * The Unicode ojbect is opitmized for the case where each UTF char in - * a string is only one byte. In this case, we store the value of - * numChars, but we don't copy the bytes to the unicodeObj->chars. - * Before accessing obj->chars, check if all chars are 1 byte long. - * - *--------------------------------------------------------------------------- - */ - -static void -SetOptUnicodeFromAny(objPtr, numChars) - Tcl_Obj *objPtr; /* The object to convert to type Unicode. */ - int numChars; -{ - Tcl_ObjType *typePtr; - Unicode *unicodePtr; - - typePtr = objPtr->typePtr; - if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) { - (*typePtr->freeIntRepProc)(objPtr); - } - objPtr->typePtr = &tclUnicodeType; - - /* - * Allocate enough space for the basic Unicode structure. - */ - - unicodePtr = (Unicode *) ckalloc(sizeof(Unicode)); - unicodePtr->numChars = numChars; - unicodePtr->allocated = 0; - SET_UNICODE(objPtr, unicodePtr); -} - -/* - *--------------------------------------------------------------------------- - * - * SetFullUnicodeFromAny -- - * - * Generate the full (non-optimized) Unicode internal rep from the - * string rep. - * - * Results: - * None. - * - * Side effects: - * The Unicode internal rep will contain a copy of the string "src" in - * unicode format. - * - *--------------------------------------------------------------------------- - */ - -static void -SetFullUnicodeFromAny(objPtr, src, numBytes, numChars) - Tcl_Obj *objPtr; /* The object to convert to type Unicode. */ - char *src; - int numBytes; - int numChars; -{ - Tcl_ObjType *typePtr; - Unicode *unicodePtr; - char *srcEnd; - Tcl_UniChar *dst; - size_t length = (numChars + 1) * sizeof(Tcl_UniChar); - - unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(length)); - srcEnd = src + numBytes; - - for (dst = unicodePtr->chars; src < srcEnd; dst++) { - src += Tcl_UtfToUniChar(src, dst); - } - *dst = 0; - - unicodePtr->numChars = numChars; - unicodePtr->allocated = length; - - typePtr = objPtr->typePtr; - if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) { - (*typePtr->freeIntRepProc)(objPtr); - } - objPtr->typePtr = &tclUnicodeType; - SET_UNICODE(objPtr, unicodePtr); -} - -/* - *--------------------------------------------------------------------------- - * - * SetUnicodeFromAny -- - * - * Generate the Unicode internal rep from the string rep. - * - * Results: - * The return value is always TCL_OK. - * - * Side effects: - * A Unicode object is stored as the internal rep of objPtr. The Unicode - * object is opitmized for the case where each UTF char in a string is - * only one byte. In this case, we store the value of numChars, but we - * don't copy the bytes to the unicodeObj->chars. Before accessing - * obj->chars, check if all chars are 1 byte long. - * - *--------------------------------------------------------------------------- - */ - -static int -SetUnicodeFromAny(interp, objPtr) - Tcl_Interp *interp; /* Not used. */ - Tcl_Obj *objPtr; /* The object to convert to type Unicode. */ -{ - int numBytes, numChars; - char *src; - - if (objPtr->typePtr != &tclUnicodeType) { - src = Tcl_GetStringFromObj(objPtr, &numBytes); - - numChars = Tcl_NumUtfChars(src, numBytes); - if (numChars == numBytes) { - SetOptUnicodeFromAny(objPtr, numChars); - } else { - SetFullUnicodeFromAny(objPtr, src, numBytes, numChars); - } - } - return TCL_OK; -} - -/* - *---------------------------------------------------------------------- - * - * FreeUnicodeInternalRep -- - * - * Deallocate the storage associated with a Unicode data object's - * internal representation. - * - * Results: - * None. - * - * Side effects: - * Frees memory. - * - *---------------------------------------------------------------------- - */ - -static void -FreeUnicodeInternalRep(objPtr) - Tcl_Obj *objPtr; /* Object with internal rep to free. */ -{ - ckfree((char *) GET_UNICODE(objPtr)); -} |