diff options
-rw-r--r-- | generic/tclInt.h | 3 | ||||
-rw-r--r-- | generic/tclUnicodeObj.c | 142 |
2 files changed, 115 insertions, 30 deletions
diff --git a/generic/tclInt.h b/generic/tclInt.h index ed9002d..506c953 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -11,7 +11,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclInt.h,v 1.30 1999/06/08 02:59:24 hershey Exp $ + * RCS: @(#) $Id: tclInt.h,v 1.31 1999/06/08 23:30:24 hershey Exp $ */ #ifndef _TCLINT @@ -1642,6 +1642,7 @@ EXTERN Tcl_Obj* TclGetRangeFromObj _ANSI_ARGS_((Tcl_Obj *objPtr, int first, int last)); EXTERN Tcl_UniChar TclGetUniCharFromObj _ANSI_ARGS_((Tcl_Obj *objPtr, int index)); +EXTERN Tcl_UniChar * TclGetUnicodeFromObj _ANSI_ARGS_((Tcl_Obj *objPtr)); EXTERN int TclGetUnicodeLengthFromObj _ANSI_ARGS_(( Tcl_Obj *objPtr)); EXTERN int TclGlob _ANSI_ARGS_((Tcl_Interp *interp, diff --git a/generic/tclUnicodeObj.c b/generic/tclUnicodeObj.c index 869b8c7..e724491 100644 --- a/generic/tclUnicodeObj.c +++ b/generic/tclUnicodeObj.c @@ -9,7 +9,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUnicodeObj.c,v 1.2 1999/06/08 02:59:27 hershey Exp $ + * RCS: @(#) $Id: tclUnicodeObj.c,v 1.3 1999/06/08 23:30:24 hershey Exp $ */ #include <math.h> @@ -35,6 +35,8 @@ static Tcl_Obj * TclNewUnicodeObj _ANSI_ARGS_((Tcl_UniChar *unichars, int numChars)); static void SetOptUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr, int numChars)); +static void SetFullUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr, + char *src, int numBytes, int numChars)); /* * The following object type represents a Unicode string. A Unicode string @@ -85,6 +87,54 @@ typedef struct Unicode { /* *---------------------------------------------------------------------- * + * TclGetUnicodeFromObj -- + * + * Get the index'th Unicode character from the Unicode object. If + * the object is not already a Unicode object, an attempt will be + * made to convert it to one. The index is assumed to be in the + * appropriate range. + * + * Results: + * Returns a pointer to the object's internal unicode string. + * + * Side effects: + * Converts the object to have the Unicode internal rep. + * + *---------------------------------------------------------------------- + */ + +Tcl_UniChar * +TclGetUnicodeFromObj(objPtr) + Tcl_Obj *objPtr; /* The object to find the unicode string for. */ +{ + Tcl_UniChar *unicharPtr; + Unicode *unicodePtr; + int numBytes; + char *src; + + SetUnicodeFromAny(NULL, objPtr); + unicodePtr = GET_UNICODE(objPtr); + + if (AllSingleByteChars(objPtr) && (unicodePtr->allocated == 0)) { + + /* + * If all of the characters in the Utf string are 1 byte chars, + * we don't normally store the unicode str. Since this + * function must return a unicode string, and one has not yet + * been stored, force the Unicode to be calculated and stored + * now. + */ + + src = Tcl_GetStringFromObj(objPtr, &numBytes); + SetFullUnicodeFromAny(objPtr, src, numBytes, unicodePtr->numChars); + } + unicharPtr = (Tcl_UniChar *)unicodePtr->chars; + return unicharPtr; +} + +/* + *---------------------------------------------------------------------- + * * TclGetUnicodeLengthFromObj -- * * Get the length of the Unicode string from the Tcl object. If @@ -650,17 +700,16 @@ UpdateStringOfUnicode(objPtr) * * SetOptUnicodeFromAny -- * - * Generate the Unicode internal rep from the string rep. + * Generate the optimized Unicode internal rep from the string rep. * * Results: - * The return value is always TCL_OK. + * None. * * Side effects: - * A Unicode object is stored as the internal rep of objPtr. The Unicode - * ojbect is opitmized for the case where each UTF char in a string is only - * one byte. In this case, we store the value of numChars, but we don't copy - * the bytes to the unicodeObj->chars. Before accessing obj->chars, check if - * all chars are 1 byte long. + * The Unicode ojbect is opitmized for the case where each UTF char in + * a string is only one byte. In this case, we store the value of + * numChars, but we don't copy the bytes to the unicodeObj->chars. + * Before accessing obj->chars, check if all chars are 1 byte long. * *--------------------------------------------------------------------------- */ @@ -675,6 +724,8 @@ SetOptUnicodeFromAny(objPtr, numChars) unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(4)); unicodePtr->numChars = numChars; + unicodePtr->allocated = 0; + unicodePtr->used = 0; typePtr = objPtr->typePtr; if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) { @@ -687,6 +738,58 @@ SetOptUnicodeFromAny(objPtr, numChars) /* *--------------------------------------------------------------------------- * + * SetFullUnicodeFromAny -- + * + * Generate the full (non-optimized) Unicode internal rep from the + * string rep. + * + * Results: + * None. + * + * Side effects: + * The Unicode internal rep will contain a copy of the string "src" in + * unicode format. + * + *--------------------------------------------------------------------------- + */ + +static void +SetFullUnicodeFromAny(objPtr, src, numBytes, numChars) + Tcl_Obj *objPtr; /* The object to convert to type Unicode. */ + char *src; + int numBytes; + int numChars; +{ + Tcl_ObjType *typePtr; + Unicode *unicodePtr; + char *srcEnd; + unsigned char *dst; + + + unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(numChars + * sizeof(Tcl_UniChar))); + srcEnd = src + numBytes; + + for (dst = unicodePtr->chars; src < srcEnd; + dst += sizeof(Tcl_UniChar)) { + src += Tcl_UtfToUniChar(src, (Tcl_UniChar *) dst); + } + + unicodePtr->used = numChars * sizeof(Tcl_UniChar); + unicodePtr->numChars = numChars; + unicodePtr->allocated = numChars * sizeof(Tcl_UniChar); + + typePtr = objPtr->typePtr; + if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) { + (*typePtr->freeIntRepProc)(objPtr); + } + objPtr->typePtr = &tclUnicodeType; + SET_UNICODE(objPtr, unicodePtr); +} + +/* + *--------------------------------------------------------------------------- + * * SetUnicodeFromAny -- * * Generate the Unicode internal rep from the string rep. @@ -711,9 +814,7 @@ SetUnicodeFromAny(interp, objPtr) { Tcl_ObjType *typePtr; int numBytes, numChars; - char *src, *srcEnd; - Unicode *unicodePtr; - unsigned char *dst; + char *src; typePtr = objPtr->typePtr; if (typePtr != &tclUnicodeType) { @@ -723,24 +824,7 @@ SetUnicodeFromAny(interp, objPtr) if (numChars == numBytes) { SetOptUnicodeFromAny(objPtr, numChars); } else { - unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(numChars - * sizeof(Tcl_UniChar))); - srcEnd = src + numBytes; - - for (dst = unicodePtr->chars; src < srcEnd; - dst += sizeof(Tcl_UniChar)) { - src += Tcl_UtfToUniChar(src, (Tcl_UniChar *) dst); - } - - unicodePtr->used = numChars * sizeof(Tcl_UniChar); - unicodePtr->numChars = numChars; - unicodePtr->allocated = numChars * sizeof(Tcl_UniChar); - - if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) { - (*typePtr->freeIntRepProc)(objPtr); - } - objPtr->typePtr = &tclUnicodeType; - SET_UNICODE(objPtr, unicodePtr); + SetFullUnicodeFromAny(objPtr, src, numBytes, numChars); } } return TCL_OK; |