diff options
Diffstat (limited to 'generic/tclUnicodeObj.c')
| -rw-r--r-- | generic/tclUnicodeObj.c | 771 |
1 files changed, 0 insertions, 771 deletions
diff --git a/generic/tclUnicodeObj.c b/generic/tclUnicodeObj.c deleted file mode 100644 index 869b8c7..0000000 --- a/generic/tclUnicodeObj.c +++ /dev/null @@ -1,771 +0,0 @@ -/* - * tclUnicodeObj.c -- - * - * This file contains the implementation of the Unicode internal - * representation of Tcl objects. - * - * Copyright (c) 1999 by Scriptics Corporation. - * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: tclUnicodeObj.c,v 1.2 1999/06/08 02:59:27 hershey Exp $ - */ - -#include <math.h> -#include "tclInt.h" -#include "tclPort.h" - -/* - * Prototypes for local procedures defined in this file: - */ - -static void DupUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr, - Tcl_Obj *copyPtr)); -static void FreeUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr)); -static void UpdateStringOfUnicode _ANSI_ARGS_((Tcl_Obj *objPtr)); -static int SetUnicodeFromAny _ANSI_ARGS_((Tcl_Interp *interp, - Tcl_Obj *objPtr)); - -static int AllSingleByteChars _ANSI_ARGS_((Tcl_Obj *objPtr)); -static void TclAppendUniCharStrToObj _ANSI_ARGS_(( - register Tcl_Obj *objPtr, Tcl_UniChar *unichars, - int numChars)); -static Tcl_Obj * TclNewUnicodeObj _ANSI_ARGS_((Tcl_UniChar *unichars, - int numChars)); -static void SetOptUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr, - int numChars)); - -/* - * The following object type represents a Unicode string. A Unicode string - * is an internationalized string. Conceptually, a Unicode string is an - * array of 16-bit quantities organized as a sequence of properly formed - * UTF-8 characters. There is a one-to-one map between Unicode and UTF - * characters. The Unicode ojbect is opitmized for the case where each UTF - * char in a string is only one byte. In this case, we store the value of - * numChars, but we don't copy the bytes to the unicodeObj->chars. Before - * accessing obj->chars, check if unicodeObj->numChars == obj->length. - */ - -Tcl_ObjType tclUnicodeType = { - "unicode", - FreeUnicodeInternalRep, - DupUnicodeInternalRep, - UpdateStringOfUnicode, - SetUnicodeFromAny -}; - -/* - * The following structure is the internal rep for a Unicode object. - * Keeps track of how much memory has been used and how much has been - * allocated for the Unicode to enable growing and shrinking of the - * Unicode object with fewer mallocs. - */ - -typedef struct Unicode { - int numChars; /* The number of chars in the unicode - * string. */ - int used; /* The number of bytes used in the unicode - * string. */ - int allocated; /* The amount of space actually allocated - * minus 1 byte. */ - unsigned char chars[4]; /* The array of chars. The actual size of - * this field depends on the 'allocated' field - * above. */ -} Unicode; - -#define UNICODE_SIZE(len) \ - ((unsigned) (sizeof(Unicode) - 4 + (len))) -#define GET_UNICODE(objPtr) \ - ((Unicode *) (objPtr)->internalRep.otherValuePtr) -#define SET_UNICODE(objPtr, unicodePtr) \ - (objPtr)->internalRep.otherValuePtr = (VOID *) (unicodePtr) - - -/* - *---------------------------------------------------------------------- - * - * TclGetUnicodeLengthFromObj -- - * - * Get the length of the Unicode string from the Tcl object. If - * the object is not already a Unicode object, an attempt will be - * made to convert it to one. - * - * Results: - * Pointer to unicode string representing the unicode object. - * - * Side effects: - * Frees old internal rep. Allocates memory for new internal rep. - * - *---------------------------------------------------------------------- - */ - -int -TclGetUnicodeLengthFromObj(objPtr) - Tcl_Obj *objPtr; /* The Unicode object. */ -{ - int length; - Unicode *unicodePtr; - - SetUnicodeFromAny(NULL, objPtr); - unicodePtr = GET_UNICODE(objPtr); - - length = unicodePtr->numChars; - return length; -} - -/* - *---------------------------------------------------------------------- - * - * TclGetUniCharFromObj -- - * - * Get the index'th Unicode character from the Unicode object. If - * the object is not already a Unicode object, an attempt will be - * made to convert it to one. The index is assumed to be in the - * appropriate range. - * - * Results: - * Returns the index'th Unicode character in the Object. - * - * Side effects: - * Fills unichar with the index'th Unicode character. - * - *---------------------------------------------------------------------- - */ - -Tcl_UniChar -TclGetUniCharFromObj(objPtr, index) - Tcl_Obj *objPtr; /* The Unicode object. */ - int index; /* Get the index'th character. */ -{ - Tcl_UniChar *unicharPtr, unichar; - Unicode *unicodePtr; - int length; - - SetUnicodeFromAny(NULL, objPtr); - unicodePtr = GET_UNICODE(objPtr); - length = objPtr->length; - - if (AllSingleByteChars(objPtr)) { - int length; - char *str; - - /* - * All of the characters in the Utf string are 1 byte chars, - * so we don't store the unicode char. We get the Utf string - * and convert the index'th byte to a Unicode character. - */ - - str = Tcl_GetStringFromObj(objPtr, &length); - Tcl_UtfToUniChar(&str[index], &unichar); - } else { - unicharPtr = (Tcl_UniChar *)unicodePtr->chars; - unichar = unicharPtr[index]; - } - return unichar; -} - -/* - *---------------------------------------------------------------------- - * - * TclGetRangeFromObj -- - * - * Create a Tcl Object that contains the chars between first and - * last of the object indicated by "objPtr". If the object is not - * already a Unicode object, an attempt will be made to convert it - * to one. The first and last indices are assumed to be in the - * appropriate range. - * - * Results: - * Returns a new Tcl Object of either "string" or "unicode" type, - * containing the range of chars. - * - * Side effects: - * Changes the internal rep of "objPtr" to unicode. - * - *---------------------------------------------------------------------- - */ - -Tcl_Obj* -TclGetRangeFromObj(objPtr, first, last) - - Tcl_Obj *objPtr; /* The Tcl object to find the range of. */ - int first; /* First index of the range. */ - int last; /* Last index of the range. */ -{ - Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ - Tcl_UniChar *unicharPtr; - Unicode *unicodePtr; - int length; - - SetUnicodeFromAny(NULL, objPtr); - unicodePtr = GET_UNICODE(objPtr); - length = objPtr->length; - - if (unicodePtr->numChars != length) { - unicharPtr = (Tcl_UniChar *)unicodePtr->chars; - newObjPtr = TclNewUnicodeObj(&unicharPtr[first], last-first+1); - } else { - int length; - char *str; - - /* - * All of the characters in the Utf string are 1 byte chars, - * so we don't store the unicode char. Create a new string - * object containing the specified range of chars. - */ - - str = Tcl_GetStringFromObj(objPtr, &length); - newObjPtr = Tcl_NewStringObj(&str[first], last-first+1); - } - return newObjPtr; -} - -/* - *---------------------------------------------------------------------- - * - * TclAppendObjToUnicodeObj -- - * - * This procedure appends the contest of "srcObjPtr" to the Unicode - * object "destPtr". - * - * Results: - * None. - * - * Side effects: - * If srcObjPtr doesn't have an internal rep, then it is given a - * Unicode internal rep. - * - *---------------------------------------------------------------------- - */ - -Tcl_Obj * -TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr) - register Tcl_Obj *targetObjPtr; /* Points to the object to - * append to. */ - register Tcl_Obj *srcObjPtr; /* Points to the object to - * append from. */ -{ - int numBytes, numChars; - Tcl_Obj *resultObjPtr; - char *utfSrcStr; - Tcl_UniChar *unicharSrcStr; - Unicode *unicodePtr; - Tcl_DString dsPtr; - - /* - * Duplicate the target if it is shared. - * Change the result's internal rep to Unicode object. - */ - - if (Tcl_IsShared(targetObjPtr)) { - resultObjPtr = Tcl_DuplicateObj(targetObjPtr); - } else { - resultObjPtr = targetObjPtr; - } - SetUnicodeFromAny(NULL, resultObjPtr); - - /* - * Case where target chars are 1 byte long: - * If src obj is of "string" or null type, then convert it to "unicode" - * type. Src objs of other types (such as int) are left in tact to keep - * them from shimmering between types. If the src obj is a unichar obj, - * and all src chars are also 1 byte long, the src string is appended to - * the target "unicode" obj, and the target obj maintains its "optimized" - * status. - */ - - if (AllSingleByteChars(resultObjPtr)) { - - int length; - char *stringRep; - - if (srcObjPtr->typePtr == &tclStringType - || srcObjPtr->typePtr == NULL) { - SetUnicodeFromAny(NULL, srcObjPtr); - } - - stringRep = Tcl_GetStringFromObj(srcObjPtr, &length); - Tcl_AppendToObj(resultObjPtr, stringRep, length); - - if ((srcObjPtr->typePtr == &tclUnicodeType) - && (AllSingleByteChars(srcObjPtr))) { - SetOptUnicodeFromAny(resultObjPtr, resultObjPtr->length); - } - return resultObjPtr; - } - - /* - * Extract a unicode string from "unicode" or "string" type objects. - * Extract the utf string from non-unicode objects, and convert the - * utf string to unichar string locally. - * If the src obj is a "string" obj, convert it to "unicode" type. - * Src objs of other types (such as int) are left in tact to keep - * them from shimmering between types. - */ - - Tcl_DStringInit(&dsPtr); - if (srcObjPtr->typePtr == &tclStringType || srcObjPtr->typePtr == NULL) { - SetUnicodeFromAny(NULL, srcObjPtr); - } - if (srcObjPtr->typePtr == &tclUnicodeType) { - if (AllSingleByteChars(srcObjPtr)) { - - unicodePtr = GET_UNICODE(srcObjPtr); - numChars = unicodePtr->numChars; - - utfSrcStr = Tcl_GetStringFromObj(srcObjPtr, &numBytes); - unicharSrcStr = (Tcl_UniChar *)Tcl_UtfToUniCharDString(utfSrcStr, - numBytes, &dsPtr); - } else { - unicodePtr = GET_UNICODE(srcObjPtr); - numChars = unicodePtr->numChars; - unicharSrcStr = (Tcl_UniChar *)unicodePtr->chars; - } - } else { - utfSrcStr = Tcl_GetStringFromObj(srcObjPtr, &numBytes); - numChars = Tcl_NumUtfChars(utfSrcStr, numBytes); - unicharSrcStr = (Tcl_UniChar *)Tcl_UtfToUniCharDString(utfSrcStr, - numBytes, &dsPtr); - } - if (numChars == 0) { - return resultObjPtr; - } - - /* - * Append the unichar src string to the result object. - */ - - TclAppendUniCharStrToObj(resultObjPtr, unicharSrcStr, numChars); - Tcl_DStringFree(&dsPtr); - return resultObjPtr; -} - -/* - *---------------------------------------------------------------------- - * - * TclAppendUniCharStrToObj -- - * - * This procedure appends the contents of "srcObjPtr" to the - * Unicode object "objPtr". - * - * Results: - * None. - * - * Side effects: - * If srcObjPtr doesn't have an internal rep, then it is given a - * Unicode internal rep. - * - *---------------------------------------------------------------------- - */ - -void -TclAppendUniCharStrToObj(objPtr, unichars, numNewChars) - register Tcl_Obj *objPtr; /* Points to the object to append to. */ - Tcl_UniChar *unichars; /* The unicode string to append to the - * object. */ - int numNewChars; /* Number of chars in "unichars". */ -{ - Unicode *unicodePtr; - int usedBytes, numNewBytes, totalNumBytes, totalNumChars; - - /* - * Invalidate the StringRep. - */ - - Tcl_InvalidateStringRep(objPtr); - - unicodePtr = GET_UNICODE(objPtr); - - usedBytes = unicodePtr->used; - totalNumChars = numNewChars + unicodePtr->numChars; - totalNumBytes = totalNumChars * sizeof(Tcl_UniChar); - numNewBytes = numNewChars * sizeof(Tcl_UniChar); - - if (unicodePtr->allocated < totalNumBytes) { - int allocatedBytes = totalNumBytes * 2; - - /* - * There isn't currently enough space in the Unicode - * representation so allocate additional space. In fact, - * overallocate so that there is room for future growth without - * having to reallocate again. - */ - - unicodePtr = (Unicode *) ckrealloc(unicodePtr, - UNICODE_SIZE(allocatedBytes)); - memcpy((VOID *) (unicodePtr->chars + usedBytes), - (VOID *) unichars, (size_t) numNewBytes); - - unicodePtr->allocated = allocatedBytes; - unicodePtr = SET_UNICODE(objPtr, unicodePtr); - } - - memcpy((VOID *) (unicodePtr->chars + usedBytes), - (VOID *) unichars, (size_t) numNewBytes); - unicodePtr->used = totalNumBytes; - unicodePtr->numChars = totalNumChars; -} - -/* - *--------------------------------------------------------------------------- - * - * TclNewUnicodeObj -- - * - * This procedure is creates a new Unicode object and initializes - * it from the given Utf String. If the Utf String is the same size - * as the Unicode string, don't duplicate the data. - * - * Results: - * The newly created object is returned. This object will have no - * initial string representation. The returned object has a ref count - * of 0. - * - * Side effects: - * Memory allocated for new object and copy of Unicode argument. - * - *--------------------------------------------------------------------------- - */ - -Tcl_Obj * -TclNewUnicodeObj(unichars, numChars) - Tcl_UniChar *unichars; /* The unicode string used to initialize - * the new object. */ - int numChars; /* Number of characters in the unicode - * string. */ -{ - Tcl_Obj *objPtr; - Unicode *unicodePtr; - int numBytes; - - numBytes = numChars * sizeof(Tcl_UniChar); - - TclNewObj(objPtr); - objPtr->bytes = NULL; - objPtr->typePtr = &tclUnicodeType; - - unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(numBytes)); - unicodePtr->used = numBytes; - unicodePtr->numChars = numChars; - unicodePtr->allocated = numBytes; - memcpy((VOID *) unicodePtr->chars, (VOID *) unichars, (size_t) numBytes); - SET_UNICODE(objPtr, unicodePtr); - return objPtr; -} - -/* - *--------------------------------------------------------------------------- - * - * TclAllSingleByteChars -- - * - * Initialize the internal representation of a Unicode Tcl_Obj - * to a copy of the internal representation of an existing Unicode - * object. - * - * Results: - * None. - * - * Side effects: - * Allocates memory. - * - *--------------------------------------------------------------------------- - */ - -static int -AllSingleByteChars(objPtr) - Tcl_Obj *objPtr; /* Object whose char lengths to check. */ -{ - Unicode *unicodePtr; - int numBytes, numChars; - - unicodePtr = GET_UNICODE(objPtr); - numChars = unicodePtr->numChars; - numBytes = objPtr->length; - - if (numChars == numBytes) { - return 1; - } else { - return 0; - } -} - -/* - *--------------------------------------------------------------------------- - * - * DupUnicodeInternalRep -- - * - * Initialize the internal representation of a Unicode Tcl_Obj - * to a copy of the internal representation of an existing Unicode - * object. - * - * Results: - * None. - * - * Side effects: - * Allocates memory. - * - *--------------------------------------------------------------------------- - */ - -static void -DupUnicodeInternalRep(srcPtr, copyPtr) - Tcl_Obj *srcPtr; /* Object with internal rep to copy. */ - Tcl_Obj *copyPtr; /* Object with internal rep to set. */ -{ - Unicode *srcUnicodePtr = GET_UNICODE(srcPtr); - Unicode *copyUnicodePtr; /*GET_UNICODE(copyPtr);*/ - - /* - * If the src obj is a string of 1-byte Utf chars, then copy the - * string rep of the source object and create an "empty" Unicode - * internal rep for the new object. Otherwise, copy Unicode - * internal rep, and invalidate the string rep of the new object. - */ - - if (AllSingleByteChars(srcPtr)) { - copyUnicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(4)); - } else { - int used = srcUnicodePtr->used; - int allocated = srcUnicodePtr->allocated; - Tcl_UniChar *unichars; - - unichars = (Tcl_UniChar *)srcUnicodePtr->chars; - - copyUnicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(allocated)); - - copyUnicodePtr->used = used; - copyUnicodePtr->allocated = allocated; - memcpy((VOID *) copyUnicodePtr->chars, - (VOID *) srcUnicodePtr->chars, (size_t) used); - } - copyUnicodePtr->numChars = srcUnicodePtr->numChars; - SET_UNICODE(copyPtr, copyUnicodePtr); -} - -/* - *--------------------------------------------------------------------------- - * - * TclSetUnicodeObj -- - * - * Modify an object to be a Unicode object and to have the specified - * unicode string as its value. - * - * Results: - * None. - * - * Side effects: - * The object's old string rep and internal rep is freed. - * Memory allocated for copy of unicode argument. - * - *---------------------------------------------------------------------- - */ - -void -TclSetUnicodeObj(objPtr, chars, length) - Tcl_Obj *objPtr; /* Object to initialize as a Unicode obj. */ - unsigned char *chars; /* The unicode string to use as the new - * value. */ - int length; /* Length of the unicode string, which must - * be >= 0. */ -{ - Tcl_ObjType *typePtr; - Unicode *unicodePtr; - - if (Tcl_IsShared(objPtr)) { - panic("TclSetUnicodeObj called with shared object"); - } - typePtr = objPtr->typePtr; - if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) { - (*typePtr->freeIntRepProc)(objPtr); - } - Tcl_InvalidateStringRep(objPtr); - - unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(length)); - unicodePtr->used = length; - unicodePtr->allocated = length; - memcpy((VOID *) unicodePtr->chars, (VOID *) chars, (size_t) length); - - objPtr->typePtr = &tclUnicodeType; - SET_UNICODE(objPtr, unicodePtr); -} - -/* - *--------------------------------------------------------------------------- - * - * UpdateStringOfUnicode -- - * - * Update the string representation for a Unicode data object. - * Note: This procedure does not invalidate an existing old string rep - * so storage will be lost if this has not already been done. - * - * Results: - * None. - * - * Side effects: - * The object's string is set to a valid string that results from - * the Unicode-to-string conversion. - * - * The object becomes a string object -- the internal rep is - * discarded and the typePtr becomes NULL. - * - *--------------------------------------------------------------------------- - */ - -static void -UpdateStringOfUnicode(objPtr) - Tcl_Obj *objPtr; /* Unicode object whose string rep to - * update. */ -{ - int i, length, size; - Tcl_UniChar *src; - char dummy[TCL_UTF_MAX]; - char *dst; - Unicode *unicodePtr; - - unicodePtr = GET_UNICODE(objPtr); - src = (Tcl_UniChar *) unicodePtr->chars; - length = unicodePtr->used; - - /* - * How much space will string rep need? - */ - - size = 0; - for (i = 0; i < unicodePtr->numChars; i++) { - size += Tcl_UniCharToUtf((int) src[i], dummy); - } - - dst = (char *) ckalloc((unsigned) (size + 1)); - objPtr->bytes = dst; - objPtr->length = size; - - for (i = 0; i < unicodePtr->numChars; i++) { - dst += Tcl_UniCharToUtf(src[i], dst); - } - *dst = '\0'; -} - -/* - *--------------------------------------------------------------------------- - * - * SetOptUnicodeFromAny -- - * - * Generate the Unicode internal rep from the string rep. - * - * Results: - * The return value is always TCL_OK. - * - * Side effects: - * A Unicode object is stored as the internal rep of objPtr. The Unicode - * ojbect is opitmized for the case where each UTF char in a string is only - * one byte. In this case, we store the value of numChars, but we don't copy - * the bytes to the unicodeObj->chars. Before accessing obj->chars, check if - * all chars are 1 byte long. - * - *--------------------------------------------------------------------------- - */ - -static void -SetOptUnicodeFromAny(objPtr, numChars) - Tcl_Obj *objPtr; /* The object to convert to type Unicode. */ - int numChars; -{ - Tcl_ObjType *typePtr; - Unicode *unicodePtr; - - unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(4)); - unicodePtr->numChars = numChars; - - typePtr = objPtr->typePtr; - if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) { - (*typePtr->freeIntRepProc)(objPtr); - } - objPtr->typePtr = &tclUnicodeType; - SET_UNICODE(objPtr, unicodePtr); -} - -/* - *--------------------------------------------------------------------------- - * - * SetUnicodeFromAny -- - * - * Generate the Unicode internal rep from the string rep. - * - * Results: - * The return value is always TCL_OK. - * - * Side effects: - * A Unicode object is stored as the internal rep of objPtr. The Unicode - * ojbect is opitmized for the case where each UTF char in a string is only - * one byte. In this case, we store the value of numChars, but we don't copy - * the bytes to the unicodeObj->chars. Before accessing obj->chars, check if - * all chars are 1 byte long. - * - *--------------------------------------------------------------------------- - */ - -static int -SetUnicodeFromAny(interp, objPtr) - Tcl_Interp *interp; /* Not used. */ - Tcl_Obj *objPtr; /* The object to convert to type Unicode. */ -{ - Tcl_ObjType *typePtr; - int numBytes, numChars; - char *src, *srcEnd; - Unicode *unicodePtr; - unsigned char *dst; - - typePtr = objPtr->typePtr; - if (typePtr != &tclUnicodeType) { - src = Tcl_GetStringFromObj(objPtr, &numBytes); - - numChars = Tcl_NumUtfChars(src, numBytes); - if (numChars == numBytes) { - SetOptUnicodeFromAny(objPtr, numChars); - } else { - unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(numChars - * sizeof(Tcl_UniChar))); - srcEnd = src + numBytes; - - for (dst = unicodePtr->chars; src < srcEnd; - dst += sizeof(Tcl_UniChar)) { - src += Tcl_UtfToUniChar(src, (Tcl_UniChar *) dst); - } - - unicodePtr->used = numChars * sizeof(Tcl_UniChar); - unicodePtr->numChars = numChars; - unicodePtr->allocated = numChars * sizeof(Tcl_UniChar); - - if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) { - (*typePtr->freeIntRepProc)(objPtr); - } - objPtr->typePtr = &tclUnicodeType; - SET_UNICODE(objPtr, unicodePtr); - } - } - return TCL_OK; -} - -/* - *---------------------------------------------------------------------- - * - * FreeUnicodeInternalRep -- - * - * Deallocate the storage associated with a Unicode data object's - * internal representation. - * - * Results: - * None. - * - * Side effects: - * Frees memory. - * - *---------------------------------------------------------------------- - */ - -static void -FreeUnicodeInternalRep(objPtr) - Tcl_Obj *objPtr; /* Object with internal rep to free. */ -{ - ckfree((char *) GET_UNICODE(objPtr)); -} |
