summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclInt.h3
-rw-r--r--generic/tclUnicodeObj.c142
2 files changed, 115 insertions, 30 deletions
diff --git a/generic/tclInt.h b/generic/tclInt.h
index ed9002d..506c953 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -11,7 +11,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclInt.h,v 1.30 1999/06/08 02:59:24 hershey Exp $
+ * RCS: @(#) $Id: tclInt.h,v 1.31 1999/06/08 23:30:24 hershey Exp $
*/
#ifndef _TCLINT
@@ -1642,6 +1642,7 @@ EXTERN Tcl_Obj* TclGetRangeFromObj _ANSI_ARGS_((Tcl_Obj *objPtr,
int first, int last));
EXTERN Tcl_UniChar TclGetUniCharFromObj _ANSI_ARGS_((Tcl_Obj *objPtr,
int index));
+EXTERN Tcl_UniChar * TclGetUnicodeFromObj _ANSI_ARGS_((Tcl_Obj *objPtr));
EXTERN int TclGetUnicodeLengthFromObj _ANSI_ARGS_((
Tcl_Obj *objPtr));
EXTERN int TclGlob _ANSI_ARGS_((Tcl_Interp *interp,
diff --git a/generic/tclUnicodeObj.c b/generic/tclUnicodeObj.c
index 869b8c7..e724491 100644
--- a/generic/tclUnicodeObj.c
+++ b/generic/tclUnicodeObj.c
@@ -9,7 +9,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUnicodeObj.c,v 1.2 1999/06/08 02:59:27 hershey Exp $
+ * RCS: @(#) $Id: tclUnicodeObj.c,v 1.3 1999/06/08 23:30:24 hershey Exp $
*/
#include <math.h>
@@ -35,6 +35,8 @@ static Tcl_Obj * TclNewUnicodeObj _ANSI_ARGS_((Tcl_UniChar *unichars,
int numChars));
static void SetOptUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr,
int numChars));
+static void SetFullUnicodeFromAny _ANSI_ARGS_((Tcl_Obj *objPtr,
+ char *src, int numBytes, int numChars));
/*
* The following object type represents a Unicode string. A Unicode string
@@ -85,6 +87,54 @@ typedef struct Unicode {
/*
*----------------------------------------------------------------------
*
+ * TclGetUnicodeFromObj --
+ *
+ * Get the index'th Unicode character from the Unicode object. If
+ * the object is not already a Unicode object, an attempt will be
+ * made to convert it to one. The index is assumed to be in the
+ * appropriate range.
+ *
+ * Results:
+ * Returns a pointer to the object's internal unicode string.
+ *
+ * Side effects:
+ * Converts the object to have the Unicode internal rep.
+ *
+ *----------------------------------------------------------------------
+ */
+
+Tcl_UniChar *
+TclGetUnicodeFromObj(objPtr)
+ Tcl_Obj *objPtr; /* The object to find the unicode string for. */
+{
+ Tcl_UniChar *unicharPtr;
+ Unicode *unicodePtr;
+ int numBytes;
+ char *src;
+
+ SetUnicodeFromAny(NULL, objPtr);
+ unicodePtr = GET_UNICODE(objPtr);
+
+ if (AllSingleByteChars(objPtr) && (unicodePtr->allocated == 0)) {
+
+ /*
+ * If all of the characters in the Utf string are 1 byte chars,
+ * we don't normally store the unicode str. Since this
+ * function must return a unicode string, and one has not yet
+ * been stored, force the Unicode to be calculated and stored
+ * now.
+ */
+
+ src = Tcl_GetStringFromObj(objPtr, &numBytes);
+ SetFullUnicodeFromAny(objPtr, src, numBytes, unicodePtr->numChars);
+ }
+ unicharPtr = (Tcl_UniChar *)unicodePtr->chars;
+ return unicharPtr;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* TclGetUnicodeLengthFromObj --
*
* Get the length of the Unicode string from the Tcl object. If
@@ -650,17 +700,16 @@ UpdateStringOfUnicode(objPtr)
*
* SetOptUnicodeFromAny --
*
- * Generate the Unicode internal rep from the string rep.
+ * Generate the optimized Unicode internal rep from the string rep.
*
* Results:
- * The return value is always TCL_OK.
+ * None.
*
* Side effects:
- * A Unicode object is stored as the internal rep of objPtr. The Unicode
- * ojbect is opitmized for the case where each UTF char in a string is only
- * one byte. In this case, we store the value of numChars, but we don't copy
- * the bytes to the unicodeObj->chars. Before accessing obj->chars, check if
- * all chars are 1 byte long.
+ * The Unicode ojbect is opitmized for the case where each UTF char in
+ * a string is only one byte. In this case, we store the value of
+ * numChars, but we don't copy the bytes to the unicodeObj->chars.
+ * Before accessing obj->chars, check if all chars are 1 byte long.
*
*---------------------------------------------------------------------------
*/
@@ -675,6 +724,8 @@ SetOptUnicodeFromAny(objPtr, numChars)
unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(4));
unicodePtr->numChars = numChars;
+ unicodePtr->allocated = 0;
+ unicodePtr->used = 0;
typePtr = objPtr->typePtr;
if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) {
@@ -687,6 +738,58 @@ SetOptUnicodeFromAny(objPtr, numChars)
/*
*---------------------------------------------------------------------------
*
+ * SetFullUnicodeFromAny --
+ *
+ * Generate the full (non-optimized) Unicode internal rep from the
+ * string rep.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * The Unicode internal rep will contain a copy of the string "src" in
+ * unicode format.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+static void
+SetFullUnicodeFromAny(objPtr, src, numBytes, numChars)
+ Tcl_Obj *objPtr; /* The object to convert to type Unicode. */
+ char *src;
+ int numBytes;
+ int numChars;
+{
+ Tcl_ObjType *typePtr;
+ Unicode *unicodePtr;
+ char *srcEnd;
+ unsigned char *dst;
+
+
+ unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(numChars
+ * sizeof(Tcl_UniChar)));
+ srcEnd = src + numBytes;
+
+ for (dst = unicodePtr->chars; src < srcEnd;
+ dst += sizeof(Tcl_UniChar)) {
+ src += Tcl_UtfToUniChar(src, (Tcl_UniChar *) dst);
+ }
+
+ unicodePtr->used = numChars * sizeof(Tcl_UniChar);
+ unicodePtr->numChars = numChars;
+ unicodePtr->allocated = numChars * sizeof(Tcl_UniChar);
+
+ typePtr = objPtr->typePtr;
+ if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) {
+ (*typePtr->freeIntRepProc)(objPtr);
+ }
+ objPtr->typePtr = &tclUnicodeType;
+ SET_UNICODE(objPtr, unicodePtr);
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
* SetUnicodeFromAny --
*
* Generate the Unicode internal rep from the string rep.
@@ -711,9 +814,7 @@ SetUnicodeFromAny(interp, objPtr)
{
Tcl_ObjType *typePtr;
int numBytes, numChars;
- char *src, *srcEnd;
- Unicode *unicodePtr;
- unsigned char *dst;
+ char *src;
typePtr = objPtr->typePtr;
if (typePtr != &tclUnicodeType) {
@@ -723,24 +824,7 @@ SetUnicodeFromAny(interp, objPtr)
if (numChars == numBytes) {
SetOptUnicodeFromAny(objPtr, numChars);
} else {
- unicodePtr = (Unicode *) ckalloc(UNICODE_SIZE(numChars
- * sizeof(Tcl_UniChar)));
- srcEnd = src + numBytes;
-
- for (dst = unicodePtr->chars; src < srcEnd;
- dst += sizeof(Tcl_UniChar)) {
- src += Tcl_UtfToUniChar(src, (Tcl_UniChar *) dst);
- }
-
- unicodePtr->used = numChars * sizeof(Tcl_UniChar);
- unicodePtr->numChars = numChars;
- unicodePtr->allocated = numChars * sizeof(Tcl_UniChar);
-
- if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) {
- (*typePtr->freeIntRepProc)(objPtr);
- }
- objPtr->typePtr = &tclUnicodeType;
- SET_UNICODE(objPtr, unicodePtr);
+ SetFullUnicodeFromAny(objPtr, src, numBytes, numChars);
}
}
return TCL_OK;