summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclBinary.c4
-rw-r--r--generic/tclCmdMZ.c27
-rw-r--r--generic/tclCompCmdsSZ.c2
-rw-r--r--generic/tclCompile.c4
-rw-r--r--generic/tclEncoding.c10
-rw-r--r--generic/tclExecute.c2
-rw-r--r--generic/tclParse.c8
-rw-r--r--generic/tclScan.c4
-rw-r--r--generic/tclStringObj.c6
-rw-r--r--generic/tclStubInit.c4
-rw-r--r--generic/tclUtf.c141
-rw-r--r--generic/tclUtil.c2
-rw-r--r--win/tclWin32Dll.c4
13 files changed, 120 insertions, 98 deletions
diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 2874ea8..d810e84 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -1211,7 +1211,7 @@ BinaryFormatCmd(
badField:
{
Tcl_UniChar ch = 0;
- char buf[TCL_UTF_MAX + 1];
+ char buf[TCL_UTF_MAX + 1] = "";
TclUtfToUniChar(errorString, &ch);
buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
@@ -1581,7 +1581,7 @@ BinaryScanCmd(
badField:
{
Tcl_UniChar ch = 0;
- char buf[TCL_UTF_MAX + 1];
+ char buf[TCL_UTF_MAX + 1] = "";
TclUtfToUniChar(errorString, &ch);
buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index 3a712f9..039cd16 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -1085,8 +1085,8 @@ Tcl_SplitObjCmd(
fullchar = ch;
#if TCL_UTF_MAX == 4
- if (!len) {
- len += TclUtfToUniChar(stringPtr, &ch);
+ if ((ch >= 0xD800) && (len < 3)) {
+ len += TclUtfToUniChar(stringPtr + len, &ch);
fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
}
#endif
@@ -1425,9 +1425,14 @@ StringIndexCmd(
Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(&uch, 1));
} else {
- char buf[TCL_UTF_MAX];
+ char buf[TCL_UTF_MAX] = "";
length = Tcl_UniCharToUtf(ch, buf);
+#if TCL_UTF_MAX > 3
+ if ((ch >= 0xD800) && (length < 3)) {
+ length += Tcl_UniCharToUtf(-1, buf + length);
+ }
+#endif
Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length));
}
}
@@ -1795,8 +1800,8 @@ StringIsCmd(
length2 = TclUtfToUniChar(string1, &ch);
fullchar = ch;
#if TCL_UTF_MAX == 4
- if (!length2) {
- length2 = TclUtfToUniChar(string1, &ch);
+ if ((ch >= 0xD800) && (length2 < 3)) {
+ length2 += TclUtfToUniChar(string1 + length2, &ch);
fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
}
#endif
@@ -1876,7 +1881,7 @@ StringMapCmd(
const char *string = TclGetStringFromObj(objv[1], &length2);
if ((length2 > 1) &&
- strncmp(string, "-nocase", (size_t) length2) == 0) {
+ strncmp(string, "-nocase", length2) == 0) {
nocase = 1;
} else {
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
@@ -2143,7 +2148,7 @@ StringMatchCmd(
const char *string = TclGetStringFromObj(objv[1], &length);
if ((length > 1) &&
- strncmp(string, "-nocase", (size_t) length) == 0) {
+ strncmp(string, "-nocase", length) == 0) {
nocase = TCL_MATCH_NOCASE;
} else {
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
@@ -2611,10 +2616,10 @@ StringEqualCmd(
for (i = 1; i < objc-2; i++) {
string2 = TclGetStringFromObj(objv[i], &length2);
- if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) {
+ if ((length2 > 1) && !strncmp(string2, "-nocase", length2)) {
nocase = 1;
} else if ((length2 > 1)
- && !strncmp(string2, "-length", (size_t)length2)) {
+ && !strncmp(string2, "-length", length2)) {
if (i+1 >= objc-2) {
goto str_cmp_args;
}
@@ -2888,10 +2893,10 @@ int TclStringCmpOpts(
for (i = 1; i < objc-2; i++) {
string = TclGetStringFromObj(objv[i], &length);
- if ((length > 1) && !strncmp(string, "-nocase", (size_t)length)) {
+ if ((length > 1) && !strncmp(string, "-nocase", length)) {
*nocase = 1;
} else if ((length > 1)
- && !strncmp(string, "-length", (size_t)length)) {
+ && !strncmp(string, "-length", length)) {
if (i+1 >= objc-2) {
goto str_cmp_args;
}
diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c
index c13376b..53bff6e 100644
--- a/generic/tclCompCmdsSZ.c
+++ b/generic/tclCompCmdsSZ.c
@@ -1496,7 +1496,7 @@ TclSubstCompile(
for (endTokenPtr = tokenPtr + parse.numTokens;
tokenPtr < endTokenPtr; tokenPtr = TokenAfter(tokenPtr)) {
int length, literal, catchRange, breakJump;
- char buf[TCL_UTF_MAX];
+ char buf[TCL_UTF_MAX] = "";
JumpFixup startFixup, okFixup, returnFixup, breakFixup;
JumpFixup continueFixup, otherFixup, endFixup;
diff --git a/generic/tclCompile.c b/generic/tclCompile.c
index f716195..6f90072 100644
--- a/generic/tclCompile.c
+++ b/generic/tclCompile.c
@@ -1723,7 +1723,7 @@ TclWordKnownAtCompileTime(
case TCL_TOKEN_BS:
if (tempPtr != NULL) {
- char utfBuf[TCL_UTF_MAX];
+ char utfBuf[TCL_UTF_MAX] = "";
int length = TclParseBackslash(tokenPtr->start,
tokenPtr->size, NULL, utfBuf);
@@ -2337,7 +2337,7 @@ TclCompileTokens(
{
Tcl_DString textBuffer; /* Holds concatenated chars from adjacent
* TCL_TOKEN_TEXT, TCL_TOKEN_BS tokens. */
- char buffer[TCL_UTF_MAX];
+ char buffer[TCL_UTF_MAX] = "";
int i, numObjsToConcat, length, adjust;
unsigned char *entryCodeNext = envPtr->codeNext;
#define NUM_STATIC_POS 20
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 51909c2..144954b 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2365,8 +2365,8 @@ UtfToUtfProc(
src += len;
dst += Tcl_UniCharToUtf(*chPtr, dst);
#if TCL_UTF_MAX == 4
- if (!len) {
- src += TclUtfToUniChar(src, chPtr);
+ if ((*chPtr >= 0xD800) && (len < 3)) {
+ src += TclUtfToUniChar(src + len, chPtr);
dst += Tcl_UniCharToUtf(*chPtr, dst);
}
#endif
@@ -2987,7 +2987,7 @@ Iso88591FromUtfProc(
if (ch > 0xff
#if TCL_UTF_MAX == 4
- || !len
+ || ((ch >= 0xD800) && (len < 3))
#endif
) {
if (flags & TCL_ENCODING_STOPONERROR) {
@@ -2995,7 +2995,7 @@ Iso88591FromUtfProc(
break;
}
#if TCL_UTF_MAX == 4
- if (!len) len = 4;
+ if ((ch >= 0xD800) && (len < 3)) len = 4;
#endif
/*
@@ -3425,7 +3425,7 @@ EscapeFromUtfProc(
break;
}
memcpy(dst, subTablePtr->sequence,
- (size_t) subTablePtr->sequenceLen);
+ subTablePtr->sequenceLen);
dst += subTablePtr->sequenceLen;
}
}
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index fafd511..77a173e 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5515,7 +5515,7 @@ TEBCresume(
objResultPtr = Tcl_NewStringObj((const char *)
valuePtr->bytes+index, 1);
} else {
- char buf[TCL_UTF_MAX];
+ char buf[TCL_UTF_MAX] = "";
Tcl_UniChar ch = Tcl_GetUniChar(valuePtr, index);
/*
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 74b02ce..1532c05 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -844,7 +844,7 @@ TclParseBackslash(
Tcl_UniChar unichar = 0;
int result;
int count;
- char buf[TCL_UTF_MAX];
+ char buf[TCL_UTF_MAX] = "";
if (numBytes == 0) {
if (readPtr != NULL) {
@@ -993,8 +993,8 @@ TclParseBackslash(
}
count = Tcl_UniCharToUtf(result, dst);
#if TCL_UTF_MAX > 3
- if (!count) {
- count = Tcl_UniCharToUtf(-1, dst);
+ if ((result >= 0xD800) && (count < 3)) {
+ count += Tcl_UniCharToUtf(-1, dst + count);
}
#endif
return count;
@@ -2217,7 +2217,7 @@ TclSubstTokens(
Tcl_Obj *appendObj = NULL;
const char *append = NULL;
int appendByteLength = 0;
- char utfCharBytes[TCL_UTF_MAX];
+ char utfCharBytes[TCL_UTF_MAX] = "";
switch (tokenPtr->type) {
case TCL_TOKEN_TEXT:
diff --git a/generic/tclScan.c b/generic/tclScan.c
index 3dae3b3..ade5f33 100644
--- a/generic/tclScan.c
+++ b/generic/tclScan.c
@@ -260,7 +260,7 @@ ValidateFormat(
Tcl_UniChar ch = 0;
int objIndex, xpgSize, nspace = numVars;
int *nassign = TclStackAlloc(interp, nspace * sizeof(int));
- char buf[TCL_UTF_MAX+1];
+ char buf[TCL_UTF_MAX+1] = "";
Tcl_Obj *errorMsg; /* Place to build an error messages. Note that
* these are messy operations because we do
* not want to use the formatting engine;
@@ -889,7 +889,7 @@ Tcl_ScanObjCmd(
i = (int)sch;
#if TCL_UTF_MAX == 4
if (!offset) {
- offset = Tcl_UtfToUniChar(string, &sch);
+ offset = TclUtfToUniChar(string, &sch);
i = (((i<<10) & 0x0FFC00) + 0x10000) + (sch & 0x3FF);
}
#endif
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 72e4a3d..46bd1c1 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -2002,9 +2002,9 @@ Tcl_AppendFormatToObj(
}
length = Tcl_UniCharToUtf(code, buf);
#if TCL_UTF_MAX > 3
- if (!length) {
+ if ((code >= 0xD800) && (length < 3)) {
/* Special case for handling high surrogates. */
- length = Tcl_UniCharToUtf(-1, buf);
+ length += Tcl_UniCharToUtf(-1, buf + length);
}
#endif
segment = Tcl_NewStringObj(buf, length);
@@ -3176,7 +3176,7 @@ ExtendStringRepWithUnicode(
copyBytes:
dst = objPtr->bytes + origLength;
for (i = 0; i < numChars; i++) {
- dst += Tcl_UniCharToUtf((int) unicode[i], dst);
+ dst += Tcl_UniCharToUtf(unicode[i], dst);
}
*dst = '\0';
objPtr->length = dst - objPtr->bytes;
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 690e801..3ff686c 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -287,6 +287,10 @@ Tcl_WinTCharToUtf(
}
blen = Tcl_UniCharToUtf(*w, p);
p += blen;
+ if ((*w >= 0xD800) && (blen < 3)) {
+ /* Indication that high surrogate is handled */
+ blen = 0;
+ }
w++;
}
if (!blen) {
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index b33bf5f..40dc29f 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -158,23 +158,22 @@ Tcl_UniCharToUtf(
if ((ch & 0xF800) == 0xD800) {
if (ch & 0x0400) {
/* Low surrogate */
- if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80)
- && ((buf[2] & 0xCF) == 0)) {
- /* Previous Tcl_UniChar was a High surrogate, so combine */
- buf[3] = (char) ((ch & 0x3F) | 0x80);
- buf[2] |= (char) (((ch >> 6) & 0x0F) | 0x80);
- return 4;
+ if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0)) {
+ /* Previous Tcl_UniChar was a high surrogate, so combine */
+ buf[2] = (char) ((ch & 0x3F) | 0x80);
+ buf[1] |= (char) (((ch >> 6) & 0x0F) | 0x80);
+ return 3;
}
- /* Previous Tcl_UniChar was not a High surrogate, so just output */
+ /* Previous Tcl_UniChar was not a high surrogate, so just output */
} else {
/* High surrogate */
ch += 0x40;
/* Fill buffer with specific 3-byte (invalid) byte combination,
- so following Low surrogate can recognize it and combine */
+ so following low surrogate can recognize it and combine */
buf[2] = (char) ((ch << 4) & 0x30);
buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80);
buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0);
- return 0;
+ return 1;
}
}
#endif
@@ -190,11 +189,14 @@ Tcl_UniCharToUtf(
return 4;
}
} else if (ch == -1) {
- if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80)
- && ((buf[2] & 0xCF) == 0)) {
- ch = 0xD7C0 + ((buf[0] & 0x07) << 8) + ((buf[1] & 0x3F) << 2)
- + ((buf[2] & 0x30) >> 4);
- goto three;
+ if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0)
+ && ((buf[-1] & 0xF8) == 0xF0)) {
+ ch = 0xD7C0 + ((buf[-1] & 0x07) << 8) + ((buf[0] & 0x3F) << 2)
+ + ((buf[1] & 0x30) >> 4);
+ buf[1] = (char) ((ch | 0x80) & 0xBF);
+ buf[0] = (char) (((ch >> 6) | 0x80) & 0xBF);
+ buf[-1] = (char) ((ch >> 12) | 0xE0);
+ return 2;
}
#endif
}
@@ -298,7 +300,7 @@ Tcl_UtfToUniChar(
register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by
* the UTF-8 string. */
{
- register int byte;
+ Tcl_UniChar byte;
/*
* Unroll 1 to 3 (or 4) byte UTF-8 sequences.
@@ -312,7 +314,21 @@ Tcl_UtfToUniChar(
* characters representing themselves.
*/
- *chPtr = (Tcl_UniChar) byte;
+#if TCL_UTF_MAX == 4
+ /* If *chPtr contains a high surrogate (produced by a previous
+ * Tcl_UtfToUniChar() call) and the next 3 bytes are UTF-8 continuation
+ * bytes, then we must produce a follow-up low surrogate. We only
+ * do that if the high surrogate matches the bits we encounter.
+ */
+ if ((byte >= 0x80)
+ && (((((byte - 0x10) << 2) & 0xFC) | 0xD800) == (*chPtr & 0xFCFC))
+ && ((src[1] & 0xF0) == (((*chPtr << 4) & 0x30) | 0x80))
+ && ((src[2] & 0xC0) == 0x80)) {
+ *chPtr = ((src[1] & 0x0F) << 6) + (src[2] & 0x3F) + 0xDC00;
+ return 3;
+ }
+#endif
+ *chPtr = byte;
return 1;
} else if (byte < 0xE0) {
if ((src[1] & 0xC0) == 0x80) {
@@ -320,7 +336,7 @@ Tcl_UtfToUniChar(
* Two-byte-character lead-byte followed by a trail-byte.
*/
- *chPtr = (Tcl_UniChar) (((byte & 0x1F) << 6) | (src[1] & 0x3F));
+ *chPtr = (((byte & 0x1F) << 6) | (src[1] & 0x3F));
if ((unsigned)(*chPtr - 1) >= (UNICODE_SELF - 1)) {
return 2;
}
@@ -336,7 +352,7 @@ Tcl_UtfToUniChar(
* Three-byte-character lead byte followed by two trail bytes.
*/
- *chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12)
+ *chPtr = (((byte & 0x0F) << 12)
| ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
if (*chPtr > 0x7FF) {
return 3;
@@ -355,26 +371,19 @@ Tcl_UtfToUniChar(
* Four-byte-character lead byte followed by three trail bytes.
*/
#if TCL_UTF_MAX == 4
- Tcl_UniChar surrogate;
-
- byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
- | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000;
- surrogate = (Tcl_UniChar) (0xD800 + (byte >> 10));
- if (byte & 0x100000) {
+ Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
+ | ((src[2] & 0x3F) >> 4)) - 0x40;
+ if (high >= 0x400) {
/* out of range, < 0x10000 or > 0x10ffff */
- } else if (*chPtr != surrogate) {
- /* produce high surrogate, but don't advance source pointer */
- *chPtr = surrogate;
- return 0;
} else {
- /* produce low surrogate, and advance source pointer */
- *chPtr = (Tcl_UniChar) (0xDC00 | (byte & 0x3FF));
- return 4;
+ /* produce high surrogate, advance source pointer */
+ *chPtr = 0xD800 + high;
+ return 1;
}
#else
- *chPtr = (Tcl_UniChar) (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
+ *chPtr = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
| ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
- if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) {
+ if ((*chPtr - 0x10000) <= 0xFFFFF) {
return 4;
}
#endif
@@ -387,7 +396,7 @@ Tcl_UtfToUniChar(
}
#endif
- *chPtr = (Tcl_UniChar) byte;
+ *chPtr = byte;
return 1;
}
@@ -578,8 +587,8 @@ Tcl_UtfFindFirst(
len = TclUtfToUniChar(src, &find);
fullchar = find;
#if TCL_UTF_MAX == 4
- if (!len) {
- len += TclUtfToUniChar(src, &find);
+ if ((ch >= 0xD800) && (len < 3)) {
+ len += TclUtfToUniChar(src + len, &find);
fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
}
#endif
@@ -626,8 +635,8 @@ Tcl_UtfFindLast(
len = TclUtfToUniChar(src, &find);
fullchar = find;
#if TCL_UTF_MAX == 4
- if (!len) {
- len += TclUtfToUniChar(src, &find);
+ if ((ch >= 0xD800) && (len < 3)) {
+ len += TclUtfToUniChar(src + len, &find);
fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
}
#endif
@@ -669,8 +678,8 @@ Tcl_UtfNext(
int len = TclUtfToUniChar(src, &ch);
#if TCL_UTF_MAX == 4
- if (len == 0) {
- len = TclUtfToUniChar(src, &ch);
+ if ((ch >= 0xD800) && (len < 3)) {
+ len += TclUtfToUniChar(src + len, &ch);
}
#endif
return src + len;
@@ -779,15 +788,15 @@ Tcl_UtfAtIndex(
register int index) /* The position of the desired character. */
{
Tcl_UniChar ch = 0;
- int len = 1;
+ int len = 0;
while (index-- > 0) {
len = TclUtfToUniChar(src, &ch);
src += len;
}
#if TCL_UTF_MAX == 4
- if (!len) {
- /* Index points at character following High Surrogate */
+ if ((ch >= 0xD800) && (len < 3)) {
+ /* Index points at character following high Surrogate */
src += TclUtfToUniChar(src, &ch);
}
#endif
@@ -871,7 +880,7 @@ Tcl_UtfToUpper(
{
Tcl_UniChar ch = 0, upChar;
char *src, *dst;
- int bytes;
+ int len;
/*
* Iterate over the string until we hit the terminating null.
@@ -879,7 +888,7 @@ Tcl_UtfToUpper(
src = dst = str;
while (*src) {
- bytes = TclUtfToUniChar(src, &ch);
+ len = TclUtfToUniChar(src, &ch);
upChar = Tcl_UniCharToUpper(ch);
/*
@@ -888,13 +897,13 @@ Tcl_UtfToUpper(
* char to dst if its size is <= the original char.
*/
- if (bytes < UtfCount(upChar)) {
- memcpy(dst, src, (size_t) bytes);
- dst += bytes;
+ if (len < UtfCount(upChar)) {
+ memcpy(dst, src, len);
+ dst += len;
} else {
dst += Tcl_UniCharToUtf(upChar, dst);
}
- src += bytes;
+ src += len;
}
*dst = '\0';
return (dst - str);
@@ -924,7 +933,7 @@ Tcl_UtfToLower(
{
Tcl_UniChar ch = 0, lowChar;
char *src, *dst;
- int bytes;
+ int len;
/*
* Iterate over the string until we hit the terminating null.
@@ -932,7 +941,7 @@ Tcl_UtfToLower(
src = dst = str;
while (*src) {
- bytes = TclUtfToUniChar(src, &ch);
+ len = TclUtfToUniChar(src, &ch);
lowChar = Tcl_UniCharToLower(ch);
/*
@@ -941,13 +950,13 @@ Tcl_UtfToLower(
* char to dst if its size is <= the original char.
*/
- if (bytes < UtfCount(lowChar)) {
- memcpy(dst, src, (size_t) bytes);
- dst += bytes;
+ if (len < UtfCount(lowChar)) {
+ memcpy(dst, src, len);
+ dst += len;
} else {
dst += Tcl_UniCharToUtf(lowChar, dst);
}
- src += bytes;
+ src += len;
}
*dst = '\0';
return (dst - str);
@@ -978,7 +987,7 @@ Tcl_UtfToTitle(
{
Tcl_UniChar ch = 0, titleChar, lowChar;
char *src, *dst;
- int bytes;
+ int len;
/*
* Capitalize the first character and then lowercase the rest of the
@@ -988,32 +997,32 @@ Tcl_UtfToTitle(
src = dst = str;
if (*src) {
- bytes = TclUtfToUniChar(src, &ch);
+ len = TclUtfToUniChar(src, &ch);
titleChar = Tcl_UniCharToTitle(ch);
- if (bytes < UtfCount(titleChar)) {
- memcpy(dst, src, (size_t) bytes);
- dst += bytes;
+ if (len < UtfCount(titleChar)) {
+ memcpy(dst, src, len);
+ dst += len;
} else {
dst += Tcl_UniCharToUtf(titleChar, dst);
}
- src += bytes;
+ src += len;
}
while (*src) {
- bytes = TclUtfToUniChar(src, &ch);
+ len = TclUtfToUniChar(src, &ch);
lowChar = ch;
/* Special exception for Georgian Asomtavruli chars, no titlecase. */
if ((unsigned)(lowChar - 0x1C90) >= 0x30) {
lowChar = Tcl_UniCharToLower(lowChar);
}
- if (bytes < UtfCount(lowChar)) {
- memcpy(dst, src, (size_t) bytes);
- dst += bytes;
+ if (len < UtfCount(lowChar)) {
+ memcpy(dst, src, len);
+ dst += len;
} else {
dst += Tcl_UniCharToUtf(lowChar, dst);
}
- src += bytes;
+ src += len;
}
*dst = '\0';
return (dst - str);
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index d5cc7c2..c801b83 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1649,7 +1649,7 @@ Tcl_Backslash(
int *readPtr) /* Fill in with number of characters read from
* src, unless NULL. */
{
- char buf[TCL_UTF_MAX];
+ char buf[TCL_UTF_MAX] = "";
Tcl_UniChar ch = 0;
Tcl_UtfBackslash(src, readPtr, buf);
diff --git a/win/tclWin32Dll.c b/win/tclWin32Dll.c
index 0fa86c9..c8bb98b 100644
--- a/win/tclWin32Dll.c
+++ b/win/tclWin32Dll.c
@@ -648,6 +648,10 @@ Tcl_WinTCharToUtf(
}
blen = Tcl_UniCharToUtf(*w, p);
p += blen;
+ if ((*w >= 0xD800) && (blen < 3)) {
+ /* Indication that high surrogate is handled */
+ blen = 0;
+ }
w++;
}
if (!blen) {