diff options
Diffstat (limited to 'generic/tclStringObj.c')
-rw-r--r-- | generic/tclStringObj.c | 86 |
1 files changed, 52 insertions, 34 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 59758bb..75638cf 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -466,7 +466,7 @@ Tcl_GetCharLength( *---------------------------------------------------------------------- */ -Tcl_UniChar +int Tcl_GetUniChar( Tcl_Obj *objPtr, /* The object to get the Unicode charater * from. */ @@ -483,7 +483,7 @@ Tcl_GetUniChar( if (TclIsPureByteArray(objPtr)) { unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, NULL); - return (Tcl_UniChar) bytes[index]; + return (int) bytes[index]; } /* @@ -493,7 +493,7 @@ Tcl_GetUniChar( SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); - if (stringPtr->hasUnicode == 0) { + if ((stringPtr->flags & TCL_HAS_UNICODE) == 0) { /* * If numChars is unknown, compute it. */ @@ -507,7 +507,7 @@ Tcl_GetUniChar( FillUnicodeRep(objPtr); stringPtr = GET_STRING(objPtr); } - return stringPtr->unicode[index]; + return (int) stringPtr->unicode[index]; } /* @@ -569,7 +569,7 @@ Tcl_GetUnicodeFromObj( SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); - if (stringPtr->hasUnicode == 0) { + if ((stringPtr->flags & TCL_HAS_UNICODE) == 0) { FillUnicodeRep(objPtr); stringPtr = GET_STRING(objPtr); } @@ -607,6 +607,7 @@ Tcl_GetRange( { Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ String *stringPtr; + int i, firstoffset = 0, lastoffset = 0; /* * Optimize the case where we're really dealing with a bytearray object @@ -627,7 +628,7 @@ Tcl_GetRange( SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); - if (stringPtr->hasUnicode == 0) { + if ((stringPtr->flags & TCL_HAS_UNICODE) == 0) { /* * If numChars is unknown, compute it. */ @@ -651,7 +652,17 @@ Tcl_GetRange( stringPtr = GET_STRING(objPtr); } - return Tcl_NewUnicodeObj(stringPtr->unicode + first, last-first+1); + for (i = 0; i <= last + lastoffset + firstoffset; i++) { + if ((stringPtr->unicode[i] & 0xfc00) == 0xd800) { + if (i < first + firstoffset) { + firstoffset++; + } else { + lastoffset++; + } + } + } + + return Tcl_NewUnicodeObj(stringPtr->unicode + first + firstoffset, last-first+1 + lastoffset + firstoffset); } /* @@ -783,7 +794,7 @@ Tcl_SetObjLength( */ stringPtr->numChars = -1; - stringPtr->hasUnicode = 0; + stringPtr->flags = 0; } else { /* * Changing length of pure unicode string. @@ -802,7 +813,7 @@ Tcl_SetObjLength( stringPtr->numChars = length; stringPtr->unicode[length] = 0; - stringPtr->hasUnicode = 1; + stringPtr->flags |= TCL_HAS_UNICODE; /* * Can only get here when objPtr->bytes == NULL. No need to invalidate @@ -893,7 +904,7 @@ Tcl_AttemptSetObjLength( */ stringPtr->numChars = -1; - stringPtr->hasUnicode = 0; + stringPtr->flags = 0; } else { /* * Changing length of pure unicode string. @@ -917,7 +928,7 @@ Tcl_AttemptSetObjLength( stringPtr->unicode[length] = 0; stringPtr->numChars = length; - stringPtr->hasUnicode = 1; + stringPtr->flags |= TCL_HAS_UNICODE; /* * Can only get here when objPtr->bytes == NULL. No need to invalidate @@ -1000,7 +1011,7 @@ SetUnicodeObj( memcpy(stringPtr->unicode, unicode, numChars * sizeof(Tcl_UniChar)); stringPtr->unicode[numChars] = 0; stringPtr->numChars = numChars; - stringPtr->hasUnicode = 1; + stringPtr->flags |= TCL_HAS_UNICODE; TclInvalidateStringRep(objPtr); stringPtr->allocated = 0; @@ -1071,7 +1082,7 @@ Tcl_AppendLimitedToObj( SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); - if (stringPtr->hasUnicode && stringPtr->numChars > 0) { + if ((stringPtr->flags & TCL_HAS_UNICODE) && stringPtr->numChars > 0) { AppendUtfToUnicodeRep(objPtr, bytes, toCopy); } else { AppendUtfToUtfRep(objPtr, bytes, toCopy); @@ -1082,7 +1093,7 @@ Tcl_AppendLimitedToObj( } stringPtr = GET_STRING(objPtr); - if (stringPtr->hasUnicode && stringPtr->numChars > 0) { + if ((stringPtr->flags & TCL_HAS_UNICODE) && stringPtr->numChars > 0) { AppendUtfToUnicodeRep(objPtr, ellipsis, strlen(ellipsis)); } else { AppendUtfToUtfRep(objPtr, ellipsis, strlen(ellipsis)); @@ -1161,7 +1172,7 @@ Tcl_AppendUnicodeToObj( * objPtr's string rep. */ - if (stringPtr->hasUnicode) { + if ((stringPtr->flags & TCL_HAS_UNICODE)) { AppendUnicodeToUnicodeRep(objPtr, unicode, length); } else { AppendUnicodeToUtfRep(objPtr, unicode, length); @@ -1267,7 +1278,7 @@ Tcl_AppendObjToObj( * appendObjPtr and append it. */ - if (stringPtr->hasUnicode) { + if ((stringPtr->flags & TCL_HAS_UNICODE)) { /* * If appendObjPtr is not of the "String" type, don't convert it. */ @@ -1543,7 +1554,7 @@ AppendUtfToUtfRep( */ stringPtr->numChars = -1; - stringPtr->hasUnicode = 0; + stringPtr->flags = 0; if (bytes) { memmove(objPtr->bytes + oldLength, bytes, numBytes); @@ -2724,7 +2735,7 @@ TclStringRepeat( if (!binary) { if (objPtr->typePtr == &tclStringType) { String *stringPtr = GET_STRING(objPtr); - if (stringPtr->hasUnicode) { + if (stringPtr->flags & TCL_HAS_UNICODE) { unichar = 1; } } @@ -3444,7 +3455,7 @@ TclStringObjReverse( SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); - if (stringPtr->hasUnicode) { + if ((stringPtr->flags & TCL_HAS_UNICODE)) { Tcl_UniChar *from = Tcl_GetUnicode(objPtr); Tcl_UniChar *src = from + stringPtr->numChars; @@ -3456,7 +3467,6 @@ TclStringObjReverse( * Tcl_SetObjLength into growing the unicode rep buffer. */ - ch = 0; objPtr = Tcl_NewUnicodeObj(&ch, 1); Tcl_SetObjLength(objPtr, stringPtr->numChars); to = Tcl_GetUnicode(objPtr); @@ -3558,10 +3568,10 @@ ExtendUnicodeRepWithString( int numAppendChars) { String *stringPtr = GET_STRING(objPtr); - int needed, numOrigChars = 0; - Tcl_UniChar *dst; + int incr, needed, numOrigChars = 0; + Tcl_UniChar *dst, unichar = 0; - if (stringPtr->hasUnicode) { + if ((stringPtr->flags & TCL_HAS_UNICODE)) { numOrigChars = stringPtr->numChars; } if (numAppendChars == -1) { @@ -3575,14 +3585,19 @@ ExtendUnicodeRepWithString( stringPtr = GET_STRING(objPtr); } - stringPtr->hasUnicode = 1; + stringPtr->flags |= TCL_HAS_UNICODE; if (bytes) { stringPtr->numChars = needed; } else { numAppendChars = 0; } for (dst=stringPtr->unicode + numOrigChars; numAppendChars-- > 0; dst++) { - bytes += TclUtfToUniChar(bytes, dst); + bytes += (incr = TclUtfToUniChar(bytes, &unichar)); + *dst = unichar; + if (!incr) { + bytes += TclUtfToUniChar(bytes, &unichar); + *++dst = unichar; + } } *dst = 0; } @@ -3625,7 +3640,7 @@ DupStringInternalRep( return; } - if (srcStringPtr->hasUnicode) { + if (srcStringPtr->flags & TCL_HAS_UNICODE) { int copyMaxChars; if (srcStringPtr->maxChars / 2 >= srcStringPtr->numChars) { @@ -3647,7 +3662,7 @@ DupStringInternalRep( copyStringPtr->maxChars = 0; copyStringPtr->unicode[0] = 0; } - copyStringPtr->hasUnicode = srcStringPtr->hasUnicode; + copyStringPtr->flags = srcStringPtr->flags; copyStringPtr->numChars = srcStringPtr->numChars; /* @@ -3702,7 +3717,7 @@ SetStringFromAny( stringPtr->numChars = -1; stringPtr->allocated = objPtr->length; stringPtr->maxChars = 0; - stringPtr->hasUnicode = 0; + stringPtr->flags = 0; SET_STRING(objPtr, stringPtr); objPtr->typePtr = &tclStringType; } @@ -3761,8 +3776,8 @@ ExtendStringRepWithUnicode( * Pre-condition: this is the "string" Tcl_ObjType. */ - int i, origLength, size = 0; - char *dst; + int incr, i, origLength, size = 0, offset = 0; + char *dst, buf[TCL_UTF_MAX]; String *stringPtr = GET_STRING(objPtr); if (numChars < 0) { @@ -3787,9 +3802,11 @@ ExtendStringRepWithUnicode( goto copyBytes; } - for (i = 0; i < numChars && size >= 0; i++) { - size += TclUtfCount(unicode[i]); + for (i = 0; i < numChars + offset && size >= 0; i++) { + size += (incr = Tcl_UniCharToUtf((int) unicode[i], buf)); + if (!incr) offset++; } + offset = 0; if (size < 0) { Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); } @@ -3804,8 +3821,9 @@ ExtendStringRepWithUnicode( copyBytes: dst = objPtr->bytes + origLength; - for (i = 0; i < numChars; i++) { - dst += Tcl_UniCharToUtf((int) unicode[i], dst); + for (i = 0; i < numChars + offset; i++) { + dst += (incr = Tcl_UniCharToUtf((int) unicode[i], dst)); + if (!incr) offset++; } *dst = '\0'; objPtr->length = dst - objPtr->bytes; |