summaryrefslogtreecommitdiffstats
path: root/generic/tclStringObj.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r--generic/tclStringObj.c86
1 files changed, 52 insertions, 34 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 59758bb..75638cf 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -466,7 +466,7 @@ Tcl_GetCharLength(
*----------------------------------------------------------------------
*/
-Tcl_UniChar
+int
Tcl_GetUniChar(
Tcl_Obj *objPtr, /* The object to get the Unicode charater
* from. */
@@ -483,7 +483,7 @@ Tcl_GetUniChar(
if (TclIsPureByteArray(objPtr)) {
unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL);
- return (Tcl_UniChar) bytes[index];
+ return (int) bytes[index];
}
/*
@@ -493,7 +493,7 @@ Tcl_GetUniChar(
SetStringFromAny(NULL, objPtr);
stringPtr = GET_STRING(objPtr);
- if (stringPtr->hasUnicode == 0) {
+ if ((stringPtr->flags & TCL_HAS_UNICODE) == 0) {
/*
* If numChars is unknown, compute it.
*/
@@ -507,7 +507,7 @@ Tcl_GetUniChar(
FillUnicodeRep(objPtr);
stringPtr = GET_STRING(objPtr);
}
- return stringPtr->unicode[index];
+ return (int) stringPtr->unicode[index];
}
/*
@@ -569,7 +569,7 @@ Tcl_GetUnicodeFromObj(
SetStringFromAny(NULL, objPtr);
stringPtr = GET_STRING(objPtr);
- if (stringPtr->hasUnicode == 0) {
+ if ((stringPtr->flags & TCL_HAS_UNICODE) == 0) {
FillUnicodeRep(objPtr);
stringPtr = GET_STRING(objPtr);
}
@@ -607,6 +607,7 @@ Tcl_GetRange(
{
Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */
String *stringPtr;
+ int i, firstoffset = 0, lastoffset = 0;
/*
* Optimize the case where we're really dealing with a bytearray object
@@ -627,7 +628,7 @@ Tcl_GetRange(
SetStringFromAny(NULL, objPtr);
stringPtr = GET_STRING(objPtr);
- if (stringPtr->hasUnicode == 0) {
+ if ((stringPtr->flags & TCL_HAS_UNICODE) == 0) {
/*
* If numChars is unknown, compute it.
*/
@@ -651,7 +652,17 @@ Tcl_GetRange(
stringPtr = GET_STRING(objPtr);
}
- return Tcl_NewUnicodeObj(stringPtr->unicode + first, last-first+1);
+ for (i = 0; i <= last + lastoffset + firstoffset; i++) {
+ if ((stringPtr->unicode[i] & 0xfc00) == 0xd800) {
+ if (i < first + firstoffset) {
+ firstoffset++;
+ } else {
+ lastoffset++;
+ }
+ }
+ }
+
+ return Tcl_NewUnicodeObj(stringPtr->unicode + first + firstoffset, last-first+1 + lastoffset + firstoffset);
}
/*
@@ -783,7 +794,7 @@ Tcl_SetObjLength(
*/
stringPtr->numChars = -1;
- stringPtr->hasUnicode = 0;
+ stringPtr->flags = 0;
} else {
/*
* Changing length of pure unicode string.
@@ -802,7 +813,7 @@ Tcl_SetObjLength(
stringPtr->numChars = length;
stringPtr->unicode[length] = 0;
- stringPtr->hasUnicode = 1;
+ stringPtr->flags |= TCL_HAS_UNICODE;
/*
* Can only get here when objPtr->bytes == NULL. No need to invalidate
@@ -893,7 +904,7 @@ Tcl_AttemptSetObjLength(
*/
stringPtr->numChars = -1;
- stringPtr->hasUnicode = 0;
+ stringPtr->flags = 0;
} else {
/*
* Changing length of pure unicode string.
@@ -917,7 +928,7 @@ Tcl_AttemptSetObjLength(
stringPtr->unicode[length] = 0;
stringPtr->numChars = length;
- stringPtr->hasUnicode = 1;
+ stringPtr->flags |= TCL_HAS_UNICODE;
/*
* Can only get here when objPtr->bytes == NULL. No need to invalidate
@@ -1000,7 +1011,7 @@ SetUnicodeObj(
memcpy(stringPtr->unicode, unicode, numChars * sizeof(Tcl_UniChar));
stringPtr->unicode[numChars] = 0;
stringPtr->numChars = numChars;
- stringPtr->hasUnicode = 1;
+ stringPtr->flags |= TCL_HAS_UNICODE;
TclInvalidateStringRep(objPtr);
stringPtr->allocated = 0;
@@ -1071,7 +1082,7 @@ Tcl_AppendLimitedToObj(
SetStringFromAny(NULL, objPtr);
stringPtr = GET_STRING(objPtr);
- if (stringPtr->hasUnicode && stringPtr->numChars > 0) {
+ if ((stringPtr->flags & TCL_HAS_UNICODE) && stringPtr->numChars > 0) {
AppendUtfToUnicodeRep(objPtr, bytes, toCopy);
} else {
AppendUtfToUtfRep(objPtr, bytes, toCopy);
@@ -1082,7 +1093,7 @@ Tcl_AppendLimitedToObj(
}
stringPtr = GET_STRING(objPtr);
- if (stringPtr->hasUnicode && stringPtr->numChars > 0) {
+ if ((stringPtr->flags & TCL_HAS_UNICODE) && stringPtr->numChars > 0) {
AppendUtfToUnicodeRep(objPtr, ellipsis, strlen(ellipsis));
} else {
AppendUtfToUtfRep(objPtr, ellipsis, strlen(ellipsis));
@@ -1161,7 +1172,7 @@ Tcl_AppendUnicodeToObj(
* objPtr's string rep.
*/
- if (stringPtr->hasUnicode) {
+ if ((stringPtr->flags & TCL_HAS_UNICODE)) {
AppendUnicodeToUnicodeRep(objPtr, unicode, length);
} else {
AppendUnicodeToUtfRep(objPtr, unicode, length);
@@ -1267,7 +1278,7 @@ Tcl_AppendObjToObj(
* appendObjPtr and append it.
*/
- if (stringPtr->hasUnicode) {
+ if ((stringPtr->flags & TCL_HAS_UNICODE)) {
/*
* If appendObjPtr is not of the "String" type, don't convert it.
*/
@@ -1543,7 +1554,7 @@ AppendUtfToUtfRep(
*/
stringPtr->numChars = -1;
- stringPtr->hasUnicode = 0;
+ stringPtr->flags = 0;
if (bytes) {
memmove(objPtr->bytes + oldLength, bytes, numBytes);
@@ -2724,7 +2735,7 @@ TclStringRepeat(
if (!binary) {
if (objPtr->typePtr == &tclStringType) {
String *stringPtr = GET_STRING(objPtr);
- if (stringPtr->hasUnicode) {
+ if (stringPtr->flags & TCL_HAS_UNICODE) {
unichar = 1;
}
}
@@ -3444,7 +3455,7 @@ TclStringObjReverse(
SetStringFromAny(NULL, objPtr);
stringPtr = GET_STRING(objPtr);
- if (stringPtr->hasUnicode) {
+ if ((stringPtr->flags & TCL_HAS_UNICODE)) {
Tcl_UniChar *from = Tcl_GetUnicode(objPtr);
Tcl_UniChar *src = from + stringPtr->numChars;
@@ -3456,7 +3467,6 @@ TclStringObjReverse(
* Tcl_SetObjLength into growing the unicode rep buffer.
*/
- ch = 0;
objPtr = Tcl_NewUnicodeObj(&ch, 1);
Tcl_SetObjLength(objPtr, stringPtr->numChars);
to = Tcl_GetUnicode(objPtr);
@@ -3558,10 +3568,10 @@ ExtendUnicodeRepWithString(
int numAppendChars)
{
String *stringPtr = GET_STRING(objPtr);
- int needed, numOrigChars = 0;
- Tcl_UniChar *dst;
+ int incr, needed, numOrigChars = 0;
+ Tcl_UniChar *dst, unichar = 0;
- if (stringPtr->hasUnicode) {
+ if ((stringPtr->flags & TCL_HAS_UNICODE)) {
numOrigChars = stringPtr->numChars;
}
if (numAppendChars == -1) {
@@ -3575,14 +3585,19 @@ ExtendUnicodeRepWithString(
stringPtr = GET_STRING(objPtr);
}
- stringPtr->hasUnicode = 1;
+ stringPtr->flags |= TCL_HAS_UNICODE;
if (bytes) {
stringPtr->numChars = needed;
} else {
numAppendChars = 0;
}
for (dst=stringPtr->unicode + numOrigChars; numAppendChars-- > 0; dst++) {
- bytes += TclUtfToUniChar(bytes, dst);
+ bytes += (incr = TclUtfToUniChar(bytes, &unichar));
+ *dst = unichar;
+ if (!incr) {
+ bytes += TclUtfToUniChar(bytes, &unichar);
+ *++dst = unichar;
+ }
}
*dst = 0;
}
@@ -3625,7 +3640,7 @@ DupStringInternalRep(
return;
}
- if (srcStringPtr->hasUnicode) {
+ if (srcStringPtr->flags & TCL_HAS_UNICODE) {
int copyMaxChars;
if (srcStringPtr->maxChars / 2 >= srcStringPtr->numChars) {
@@ -3647,7 +3662,7 @@ DupStringInternalRep(
copyStringPtr->maxChars = 0;
copyStringPtr->unicode[0] = 0;
}
- copyStringPtr->hasUnicode = srcStringPtr->hasUnicode;
+ copyStringPtr->flags = srcStringPtr->flags;
copyStringPtr->numChars = srcStringPtr->numChars;
/*
@@ -3702,7 +3717,7 @@ SetStringFromAny(
stringPtr->numChars = -1;
stringPtr->allocated = objPtr->length;
stringPtr->maxChars = 0;
- stringPtr->hasUnicode = 0;
+ stringPtr->flags = 0;
SET_STRING(objPtr, stringPtr);
objPtr->typePtr = &tclStringType;
}
@@ -3761,8 +3776,8 @@ ExtendStringRepWithUnicode(
* Pre-condition: this is the "string" Tcl_ObjType.
*/
- int i, origLength, size = 0;
- char *dst;
+ int incr, i, origLength, size = 0, offset = 0;
+ char *dst, buf[TCL_UTF_MAX];
String *stringPtr = GET_STRING(objPtr);
if (numChars < 0) {
@@ -3787,9 +3802,11 @@ ExtendStringRepWithUnicode(
goto copyBytes;
}
- for (i = 0; i < numChars && size >= 0; i++) {
- size += TclUtfCount(unicode[i]);
+ for (i = 0; i < numChars + offset && size >= 0; i++) {
+ size += (incr = Tcl_UniCharToUtf((int) unicode[i], buf));
+ if (!incr) offset++;
}
+ offset = 0;
if (size < 0) {
Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
}
@@ -3804,8 +3821,9 @@ ExtendStringRepWithUnicode(
copyBytes:
dst = objPtr->bytes + origLength;
- for (i = 0; i < numChars; i++) {
- dst += Tcl_UniCharToUtf((int) unicode[i], dst);
+ for (i = 0; i < numChars + offset; i++) {
+ dst += (incr = Tcl_UniCharToUtf((int) unicode[i], dst));
+ if (!incr) offset++;
}
*dst = '\0';
objPtr->length = dst - objPtr->bytes;