summaryrefslogtreecommitdiffstats
path: root/generic/tclUnicodeObj.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclUnicodeObj.c')
-rw-r--r--generic/tclUnicodeObj.c214
1 files changed, 146 insertions, 68 deletions
diff --git a/generic/tclUnicodeObj.c b/generic/tclUnicodeObj.c
index 3a4709b..315644d 100644
--- a/generic/tclUnicodeObj.c
+++ b/generic/tclUnicodeObj.c
@@ -9,7 +9,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUnicodeObj.c,v 1.4 1999/06/09 17:06:57 hershey Exp $
+ * RCS: @(#) $Id: tclUnicodeObj.c,v 1.5 1999/06/10 04:28:51 stanton Exp $
*/
#include <math.h>
@@ -20,23 +20,19 @@
* Prototypes for local procedures defined in this file:
*/
+static int AllSingleByteChars _ANSI_ARGS_((Tcl_Obj *objPtr));
+static void AppendUniCharStrToObj _ANSI_ARGS_((Tcl_Obj *objPtr,
+ Tcl_UniChar *unichars, int numNewChars));
static void DupUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
Tcl_Obj *copyPtr));
static void FreeUnicodeInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
static void UpdateStringOfUnicode _ANSI_ARGS_((Tcl_Obj *objPtr));
-static int SetUnicodeFromAny _ANSI_ARGS_((Tcl_Interp *interp,
- Tcl_Obj *objPtr));
-
-static int AllSingleByteChars _ANSI_ARGS_((Tcl_Obj *objPtr));
-static void TclAppendUniCharStrToObj _ANSI_ARGS_((
- register Tcl_Obj *objPtr, Tcl_UniChar *unichars,
- int numChars));
-static Tcl_Obj * TclNewUnicodeObj _ANSI_ARGS_((Tcl_UniChar *unichars,
- int numChars));
static void SetOptUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr,
int numChars));
static void SetFullUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr,
char *src, int numBytes, int numChars));
+static int SetUnicodeFromAny _ANSI_ARGS_((Tcl_Interp *interp,
+ Tcl_Obj *objPtr));
/*
* The following object type represents a Unicode string. A Unicode string
@@ -68,13 +64,13 @@ typedef struct Unicode {
int numChars; /* The number of chars in the unicode
* string. */
size_t allocated; /* The amount of space actually allocated. */
- unsigned char chars[4]; /* The array of chars. The actual size of
+ Tcl_UniChar chars[2]; /* The array of chars. The actual size of
* this field depends on the 'allocated' field
* above. */
} Unicode;
#define UNICODE_SIZE(len) \
- ((unsigned) (sizeof(Unicode) - 4 + (len)))
+ ((unsigned) (sizeof(Unicode) - (sizeof(Tcl_UniChar)*2) + (len)))
#define GET_UNICODE(objPtr) \
((Unicode *) (objPtr)->internalRep.otherValuePtr)
#define SET_UNICODE(objPtr, unicodePtr) \
@@ -104,7 +100,6 @@ Tcl_UniChar *
TclGetUnicodeFromObj(objPtr)
Tcl_Obj *objPtr; /* The object to find the unicode string for. */
{
- Tcl_UniChar *unicharPtr;
Unicode *unicodePtr;
int numBytes;
char *src;
@@ -124,9 +119,15 @@ TclGetUnicodeFromObj(objPtr)
src = Tcl_GetStringFromObj(objPtr, &numBytes);
SetFullUnicodeFromAny(objPtr, src, numBytes, unicodePtr->numChars);
+
+ /*
+ * We need to fetch the pointer again because we have just
+ * reallocated the structure to make room for the Unicode data.
+ */
+
+ unicodePtr = GET_UNICODE(objPtr);
}
- unicharPtr = (Tcl_UniChar *)unicodePtr->chars;
- return unicharPtr;
+ return unicodePtr->chars;
}
/*
@@ -185,7 +186,7 @@ TclGetUniCharFromObj(objPtr, index)
Tcl_Obj *objPtr; /* The Unicode object. */
int index; /* Get the index'th character. */
{
- Tcl_UniChar *unicharPtr, unichar;
+ Tcl_UniChar unichar;
Unicode *unicodePtr;
int length;
@@ -206,8 +207,7 @@ TclGetUniCharFromObj(objPtr, index)
str = Tcl_GetStringFromObj(objPtr, &length);
Tcl_UtfToUniChar(&str[index], &unichar);
} else {
- unicharPtr = (Tcl_UniChar *)unicodePtr->chars;
- unichar = unicharPtr[index];
+ unichar = unicodePtr->chars[index];
}
return unichar;
}
@@ -217,11 +217,11 @@ TclGetUniCharFromObj(objPtr, index)
*
* TclGetRangeFromObj --
*
- * Create a Tcl Object that contains the chars between first and
- * last of the object indicated by "objPtr". If the object is not
- * already a Unicode object, an attempt will be made to convert it
- * to one. The first and last indices are assumed to be in the
- * appropriate range.
+ * Create a Tcl Object that contains the chars between first and last
+ * of the object indicated by "objPtr". If the object is not already
+ * a Unicode object, an attempt will be made to convert it to one.
+ * The first and last indices are assumed to be in the appropriate
+ * range.
*
* Results:
* Returns a new Tcl Object of either "string" or "unicode" type,
@@ -241,7 +241,6 @@ TclGetRangeFromObj(objPtr, first, last)
int last; /* Last index of the range. */
{
Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */
- Tcl_UniChar *unicharPtr;
Unicode *unicodePtr;
int length;
@@ -250,8 +249,7 @@ TclGetRangeFromObj(objPtr, first, last)
length = objPtr->length;
if (unicodePtr->numChars != length) {
- unicharPtr = (Tcl_UniChar *)unicodePtr->chars;
- newObjPtr = TclNewUnicodeObj(&unicharPtr[first], last-first+1);
+ newObjPtr = TclNewUnicodeObj(unicodePtr->chars + first, last-first+1);
} else {
int length;
char *str;
@@ -273,7 +271,7 @@ TclGetRangeFromObj(objPtr, first, last)
*
* TclAppendObjToUnicodeObj --
*
- * This procedure appends the contest of "srcObjPtr" to the Unicode
+ * This procedure appends the contents of "srcObjPtr" to the Unicode
* object "destPtr".
*
* Results:
@@ -367,7 +365,7 @@ TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr)
} else {
unicodePtr = GET_UNICODE(srcObjPtr);
numChars = unicodePtr->numChars;
- unicharSrcStr = (Tcl_UniChar *)unicodePtr->chars;
+ unicharSrcStr = unicodePtr->chars;
}
} else {
utfSrcStr = Tcl_GetStringFromObj(srcObjPtr, &numBytes);
@@ -383,7 +381,7 @@ TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr)
* Append the unichar src string to the result object.
*/
- TclAppendUniCharStrToObj(resultObjPtr, unicharSrcStr, numChars);
+ AppendUniCharStrToObj(resultObjPtr, unicharSrcStr, numChars);
Tcl_DStringFree(&dsPtr);
return resultObjPtr;
}
@@ -391,7 +389,7 @@ TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr)
/*
*----------------------------------------------------------------------
*
- * TclAppendUniCharStrToObj --
+ * AppendUniCharStrToObj --
*
* This procedure appends the contents of "srcObjPtr" to the
* Unicode object "objPtr".
@@ -406,31 +404,25 @@ TclAppendObjToUnicodeObj(targetObjPtr, srcObjPtr)
*----------------------------------------------------------------------
*/
-void
-TclAppendUniCharStrToObj(objPtr, unichars, numNewChars)
+static void
+AppendUniCharStrToObj(objPtr, unichars, numNewChars)
register Tcl_Obj *objPtr; /* Points to the object to append to. */
Tcl_UniChar *unichars; /* The unicode string to append to the
* object. */
int numNewChars; /* Number of chars in "unichars". */
{
Unicode *unicodePtr;
- int usedBytes, numNewBytes, totalNumBytes, totalNumChars;
-
- /*
- * Invalidate the StringRep.
- */
-
- Tcl_InvalidateStringRep(objPtr);
+ int numChars;
+ size_t numBytes;
+ SetUnicodeFromAny(NULL, objPtr);
unicodePtr = GET_UNICODE(objPtr);
- usedBytes = unicodePtr->numChars * sizeof(Tcl_UniChar);
- totalNumChars = numNewChars + unicodePtr->numChars;
- totalNumBytes = totalNumChars * sizeof(Tcl_UniChar);
- numNewBytes = numNewChars * sizeof(Tcl_UniChar);
+ numChars = numNewChars + unicodePtr->numChars;
+ numBytes = (numChars + 1) * sizeof(Tcl_UniChar);
- if (unicodePtr->allocated <= totalNumBytes) {
- int allocatedBytes = totalNumBytes * 2;
+ if (unicodePtr->allocated < numBytes) {
+ int allocatedBytes = numBytes * 2;
/*
* There isn't currently enough space in the Unicode
@@ -439,15 +431,101 @@ TclAppendUniCharStrToObj(objPtr, unichars, numNewChars)
* having to reallocate again.
*/
- unicodePtr = (Unicode *) ckrealloc(unicodePtr,
+ unicodePtr = (Unicode *) ckrealloc((char*) unicodePtr,
UNICODE_SIZE(allocatedBytes));
unicodePtr->allocated = allocatedBytes;
unicodePtr = SET_UNICODE(objPtr, unicodePtr);
}
- memcpy((VOID *) (unicodePtr->chars + usedBytes),
- (VOID *) unichars, (size_t) numNewBytes);
- *((Tcl_UniChar *)unicodePtr->chars + totalNumChars) = 0;
- unicodePtr->numChars = totalNumChars;
+ memcpy((VOID *) (unicodePtr->chars + unicodePtr->numChars),
+ (VOID *) unichars, (size_t) numNewChars * sizeof(Tcl_UniChar));
+ unicodePtr->chars[numChars] = 0;
+ unicodePtr->numChars = numChars;
+
+ /*
+ * Invalidate the StringRep.
+ */
+
+ Tcl_InvalidateStringRep(objPtr);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclAppendUnicodeToObj --
+ *
+ * This procedure appends a Unicode string to an object in the
+ * most efficient manner possible.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Invalidates the string rep and creates a new Unicode string.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+TclAppendUnicodeToObj(objPtr, unichars, length)
+ register Tcl_Obj *objPtr; /* Points to the object to append to. */
+ Tcl_UniChar *unichars; /* The unicode string to append to the
+ * object. */
+ int length; /* Number of chars in "unichars". */
+{
+ Unicode *unicodePtr;
+ int numChars, i;
+ size_t newSize;
+ char *src;
+ Tcl_UniChar *dst;
+
+ if (Tcl_IsShared(objPtr)) {
+ panic("TclAppendUnicodeToObj called with shared object");
+ }
+
+ SetUnicodeFromAny(NULL, objPtr);
+ unicodePtr = GET_UNICODE(objPtr);
+
+ /*
+ * Make the buffer big enough for the result.
+ */
+
+ numChars = unicodePtr->numChars + length;
+ newSize = (numChars + 1) * sizeof(Tcl_UniChar);
+
+ if (newSize > unicodePtr->allocated) {
+ int allocated = newSize * 2;
+
+ unicodePtr = (Unicode *) ckrealloc((char*)unicodePtr,
+ UNICODE_SIZE(allocated));
+
+ if (unicodePtr->allocated == 0) {
+ /*
+ * If the original string was not in Unicode form, add it to the
+ * beginning of the buffer.
+ */
+
+ src = objPtr->bytes;
+ dst = unicodePtr->chars;
+ for (i = 0; i < unicodePtr->numChars; i++) {
+ src += Tcl_UtfToUniChar(src, dst++);
+ }
+ }
+ unicodePtr->allocated = allocated;
+ }
+
+ /*
+ * Copy the new string onto the end of the old string, then add the
+ * trailing null.
+ */
+
+ memcpy((VOID*) (unicodePtr->chars + unicodePtr->numChars), unichars,
+ length * sizeof(Tcl_UniChar));
+ unicodePtr->numChars = numChars;
+ unicodePtr->chars[numChars] = 0;
+
+ SET_UNICODE(objPtr, unicodePtr);
+
+ Tcl_InvalidateStringRep(objPtr);
}
/*
@@ -497,7 +575,7 @@ TclNewUnicodeObj(unichars, numChars)
unicodePtr->numChars = numChars;
unicodePtr->allocated = allocated;
memcpy((VOID *) unicodePtr->chars, (VOID *) unichars, (size_t) numBytes);
- *((Tcl_UniChar *)unicodePtr->chars + numChars) = 0;
+ unicodePtr->chars[numChars] = 0;
SET_UNICODE(objPtr, unicodePtr);
return objPtr;
}
@@ -572,12 +650,10 @@ DupUnicodeInternalRep(srcPtr, copyPtr)
*/
if (AllSingleByteChars(srcPtr)) {
- copyUnicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(4));
+ copyUnicodePtr = (Unicode *) ckalloc(sizeof(Unicode));
+ copyUnicodePtr->allocated = 0;
} else {
int allocated = srcUnicodePtr->allocated;
- Tcl_UniChar *unichars;
-
- unichars = (Tcl_UniChar *)srcUnicodePtr->chars;
copyUnicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(allocated));
@@ -624,7 +700,7 @@ UpdateStringOfUnicode(objPtr)
Unicode *unicodePtr;
unicodePtr = GET_UNICODE(objPtr);
- src = (Tcl_UniChar *) unicodePtr->chars;
+ src = unicodePtr->chars;
length = unicodePtr->numChars * sizeof(Tcl_UniChar);
/*
@@ -672,16 +748,20 @@ SetOptUnicodeFromAny(objPtr, numChars)
{
Tcl_ObjType *typePtr;
Unicode *unicodePtr;
-
- unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(4));
- unicodePtr->numChars = numChars;
- unicodePtr->allocated = 0;
typePtr = objPtr->typePtr;
if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) {
(*typePtr->freeIntRepProc)(objPtr);
}
objPtr->typePtr = &tclUnicodeType;
+
+ /*
+ * Allocate enough space for the basic Unicode structure.
+ */
+
+ unicodePtr = (Unicode *) ckalloc(sizeof(Unicode));
+ unicodePtr->numChars = numChars;
+ unicodePtr->allocated = 0;
SET_UNICODE(objPtr, unicodePtr);
}
@@ -719,7 +799,7 @@ SetFullUnicodeFromAny(objPtr, src, numBytes, numChars)
unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(length));
srcEnd = src + numBytes;
- for (dst = (Tcl_UniChar *) unicodePtr->chars; src < srcEnd; dst++) {
+ for (dst = unicodePtr->chars; src < srcEnd; dst++) {
src += Tcl_UtfToUniChar(src, dst);
}
*dst = 0;
@@ -747,10 +827,10 @@ SetFullUnicodeFromAny(objPtr, src, numBytes, numChars)
*
* Side effects:
* A Unicode object is stored as the internal rep of objPtr. The Unicode
- * ojbect is opitmized for the case where each UTF char in a string is only
- * one byte. In this case, we store the value of numChars, but we don't copy
- * the bytes to the unicodeObj->chars. Before accessing obj->chars, check if
- * all chars are 1 byte long.
+ * object is opitmized for the case where each UTF char in a string is
+ * only one byte. In this case, we store the value of numChars, but we
+ * don't copy the bytes to the unicodeObj->chars. Before accessing
+ * obj->chars, check if all chars are 1 byte long.
*
*---------------------------------------------------------------------------
*/
@@ -760,12 +840,10 @@ SetUnicodeFromAny(interp, objPtr)
Tcl_Interp *interp; /* Not used. */
Tcl_Obj *objPtr; /* The object to convert to type Unicode. */
{
- Tcl_ObjType *typePtr;
int numBytes, numChars;
char *src;
- typePtr = objPtr->typePtr;
- if (typePtr != &tclUnicodeType) {
+ if (objPtr->typePtr != &tclUnicodeType) {
src = Tcl_GetStringFromObj(objPtr, &numBytes);
numChars = Tcl_NumUtfChars(src, numBytes);