summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2019-02-27 21:58:41 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2019-02-27 21:58:41 (GMT)
commitb3e0d07c59b61fd7f3777b1949d323c39e442d3a (patch)
treea7848fa14d551f39fcd5fe5a0c13f96d03084ac0
parentb8c0e58734798ec469c1e7bf11614dfe20ebf1c8 (diff)
parent050b6c3d5f632fea7bf549688c54be65fba7bee2 (diff)
downloadtcl-b3e0d07c59b61fd7f3777b1949d323c39e442d3a.zip
tcl-b3e0d07c59b61fd7f3777b1949d323c39e442d3a.tar.gz
tcl-b3e0d07c59b61fd7f3777b1949d323c39e442d3a.tar.bz2
Merge 8.7
-rw-r--r--generic/tclBinary.c4
-rw-r--r--generic/tclCmdMZ.c14
-rw-r--r--generic/tclCompCmdsSZ.c2
-rw-r--r--generic/tclCompile.c4
-rw-r--r--generic/tclEncoding.c8
-rw-r--r--generic/tclExecute.c6
-rw-r--r--generic/tclParse.c8
-rw-r--r--generic/tclScan.c6
-rw-r--r--generic/tclStringObj.c4
-rw-r--r--generic/tclUtf.c182
-rw-r--r--generic/tclZipfs.c22
-rw-r--r--win/tclWin32Dll.c4
12 files changed, 129 insertions, 135 deletions
diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index cfdeb86..53d0fbc 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -1340,7 +1340,7 @@ BinaryFormatCmd(
badField:
{
Tcl_UniChar ch = 0;
- char buf[TCL_UTF_MAX + 1];
+ char buf[TCL_UTF_MAX + 1] = "";
TclUtfToUniChar(errorString, &ch);
buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
@@ -1711,7 +1711,7 @@ BinaryScanCmd(
badField:
{
Tcl_UniChar ch = 0;
- char buf[TCL_UTF_MAX + 1];
+ char buf[TCL_UTF_MAX + 1] = "";
TclUtfToUniChar(errorString, &ch);
buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index c0df3e0..ee1ec10 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -1223,8 +1223,8 @@ Tcl_SplitObjCmd(
fullchar = ch;
#if TCL_UTF_MAX <= 4
- if (!len) {
- len += TclUtfToUniChar(stringPtr, &ch);
+ if ((ch >= 0xD800) && (len < 3)) {
+ len += TclUtfToUniChar(stringPtr + len, &ch);
fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
}
#endif
@@ -1446,11 +1446,11 @@ StringIndexCmd(
Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(&uch, 1));
} else {
- char buf[4];
+ char buf[TCL_UTF_MAX] = "";
end = Tcl_UniCharToUtf(ch, buf);
- if (!end) {
- end = Tcl_UniCharToUtf(-1, buf);
+ if ((ch >= 0xD800) && (end < 3)) {
+ end += Tcl_UniCharToUtf(-1, buf + end);
}
Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, end));
}
@@ -1858,8 +1858,8 @@ StringIsCmd(
length2 = TclUtfToUniChar(string1, &ch);
fullchar = ch;
#if TCL_UTF_MAX <= 4
- if (!length2) {
- length2 = TclUtfToUniChar(string1, &ch);
+ if ((ch >= 0xD800) && (length2 < 3)) {
+ length2 += TclUtfToUniChar(string1 + length2, &ch);
fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
}
#endif
diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c
index 81a44ee..1f8a8ea 100644
--- a/generic/tclCompCmdsSZ.c
+++ b/generic/tclCompCmdsSZ.c
@@ -1505,7 +1505,7 @@ TclSubstCompile(
tokenPtr < endTokenPtr; tokenPtr = TokenAfter(tokenPtr)) {
size_t length;
int literal, catchRange, breakJump;
- char buf[TCL_UTF_MAX];
+ char buf[TCL_UTF_MAX] = "";
JumpFixup startFixup, okFixup, returnFixup, breakFixup;
JumpFixup continueFixup, otherFixup, endFixup;
diff --git a/generic/tclCompile.c b/generic/tclCompile.c
index 3f28f15..38aad66 100644
--- a/generic/tclCompile.c
+++ b/generic/tclCompile.c
@@ -1725,7 +1725,7 @@ TclWordKnownAtCompileTime(
case TCL_TOKEN_BS:
if (tempPtr != NULL) {
- char utfBuf[TCL_UTF_MAX];
+ char utfBuf[TCL_UTF_MAX] = "";
size_t length = TclParseBackslash(tokenPtr->start,
tokenPtr->size, NULL, utfBuf);
@@ -2339,7 +2339,7 @@ TclCompileTokens(
{
Tcl_DString textBuffer; /* Holds concatenated chars from adjacent
* TCL_TOKEN_TEXT, TCL_TOKEN_BS tokens. */
- char buffer[TCL_UTF_MAX];
+ char buffer[TCL_UTF_MAX] = "";
int i, numObjsToConcat, adjust;
size_t length;
unsigned char *entryCodeNext = envPtr->codeNext;
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index c477ca0..6faaef4 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2322,8 +2322,8 @@ UtfToUtfProc(
src += len;
dst += Tcl_UniCharToUtf(*chPtr, dst);
#if TCL_UTF_MAX <= 4
- if (!len) {
- src += TclUtfToUniChar(src, chPtr);
+ if ((*chPtr >= 0xD800) && (len < 3)) {
+ src += TclUtfToUniChar(src + len, chPtr);
dst += Tcl_UniCharToUtf(*chPtr, dst);
}
#endif
@@ -2944,7 +2944,7 @@ Iso88591FromUtfProc(
if (ch > 0xff
#if TCL_UTF_MAX <= 4
- || !len
+ || ((ch >= 0xD800) && (len < 3))
#endif
) {
if (flags & TCL_ENCODING_STOPONERROR) {
@@ -2952,7 +2952,7 @@ Iso88591FromUtfProc(
break;
}
#if TCL_UTF_MAX <= 4
- if (!len) len = 4;
+ if ((ch >= 0xD800) && (len < 3)) len = 4;
#endif
/*
* Plunge on, using '?' as a fallback character.
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 4285ed0..509a4e8 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5006,7 +5006,7 @@ TEBCresume(
objResultPtr = Tcl_NewStringObj((const char *)
valuePtr->bytes+index, 1);
} else {
- char buf[4];
+ char buf[TCL_UTF_MAX] = "";
int ch = Tcl_GetUniChar(valuePtr, index);
/*
@@ -5018,8 +5018,8 @@ TEBCresume(
objResultPtr = Tcl_NewObj();
} else {
slength = Tcl_UniCharToUtf(ch, buf);
- if (!slength) {
- slength = Tcl_UniCharToUtf(-1, buf);
+ if ((ch >= 0xD800) && (slength < 3)) {
+ slength += Tcl_UniCharToUtf(-1, buf + slength);
}
objResultPtr = Tcl_NewStringObj(buf, slength);
}
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 9d20fba..61b87a4 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -792,7 +792,7 @@ TclParseBackslash(
Tcl_UniChar unichar = 0;
int result;
size_t count;
- char buf[TCL_UTF_MAX];
+ char buf[TCL_UTF_MAX] = "";
if (numBytes == 0) {
if (readPtr != NULL) {
@@ -940,9 +940,9 @@ TclParseBackslash(
*readPtr = count;
}
count = Tcl_UniCharToUtf(result, dst);
- if (!count) {
+ if ((result >= 0xD800) && (count < 3)) {
/* Special case for handling high surrogates. */
- count = Tcl_UniCharToUtf(-1, dst);
+ count += Tcl_UniCharToUtf(-1, dst + count);
}
return count;
}
@@ -2153,7 +2153,7 @@ TclSubstTokens(
Tcl_Obj *appendObj = NULL;
const char *append = NULL;
int appendByteLength = 0;
- char utfCharBytes[TCL_UTF_MAX];
+ char utfCharBytes[TCL_UTF_MAX] = "";
switch (tokenPtr->type) {
case TCL_TOKEN_TEXT:
diff --git a/generic/tclScan.c b/generic/tclScan.c
index 1d7edf9..3529951 100644
--- a/generic/tclScan.c
+++ b/generic/tclScan.c
@@ -261,7 +261,7 @@ ValidateFormat(
Tcl_UniChar ch = 0;
int objIndex, xpgSize, nspace = numVars;
int *nassign = TclStackAlloc(interp, nspace * sizeof(int));
- char buf[TCL_UTF_MAX+1];
+ char buf[TCL_UTF_MAX+1] = "";
Tcl_Obj *errorMsg; /* Place to build an error messages. Note that
* these are messy operations because we do
* not want to use the formatting engine;
@@ -882,8 +882,8 @@ Tcl_ScanObjCmd(
offset = TclUtfToUniChar(string, &sch);
i = (int)sch;
#if TCL_UTF_MAX == 4
- if (!offset) {
- offset = TclUtfToUniChar(string, &sch);
+ if (((sch & 0xFC00) == 0xD800) && (offset < 3)) {
+ offset += TclUtfToUniChar(string+offset, &sch);
i = (((i<<10) & 0x0FFC00) + 0x10000) + (sch & 0x3FF);
}
#endif
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index a4f442f..043497b 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1951,9 +1951,9 @@ Tcl_AppendFormatToObj(
goto error;
}
length = Tcl_UniCharToUtf(code, buf);
- if (!length) {
+ if ((code >= 0xD800) && (length < 3)) {
/* Special case for handling high surrogates. */
- length = Tcl_UniCharToUtf(-1, buf);
+ length += Tcl_UniCharToUtf(-1, buf + length);
}
segment = Tcl_NewStringObj(buf, length);
Tcl_IncrRefCount(segment);
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 2897ace..d435017 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -145,23 +145,22 @@ Tcl_UniCharToUtf(
if ((ch & 0xF800) == 0xD800) {
if (ch & 0x0400) {
/* Low surrogate */
- if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80)
- && ((buf[2] & 0xCF) == 0)) {
- /* Previous Tcl_UniChar was a High surrogate, so combine */
- buf[3] = (char) ((ch & 0x3F) | 0x80);
- buf[2] |= (char) (((ch >> 6) & 0x0F) | 0x80);
- return 4;
+ if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0)) {
+ /* Previous Tcl_UniChar was a high surrogate, so combine */
+ buf[2] = (char) ((ch & 0x3F) | 0x80);
+ buf[1] |= (char) (((ch >> 6) & 0x0F) | 0x80);
+ return 3;
}
- /* Previous Tcl_UniChar was not a High surrogate, so just output */
+ /* Previous Tcl_UniChar was not a high surrogate, so just output */
} else {
/* High surrogate */
ch += 0x40;
/* Fill buffer with specific 3-byte (invalid) byte combination,
- so following Low surrogate can recognize it and combine */
+ so following low surrogate can recognize it and combine */
buf[2] = (char) ((ch << 4) & 0x30);
buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80);
buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0);
- return 0;
+ return 1;
}
}
goto three;
@@ -174,11 +173,14 @@ Tcl_UniCharToUtf(
return 4;
}
} else if (ch == -1) {
- if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80)
- && ((buf[2] & 0xCF) == 0)) {
- ch = 0xD7C0 + ((buf[0] & 0x07) << 8) + ((buf[1] & 0x3F) << 2)
- + ((buf[2] & 0x30) >> 4);
- goto three;
+ if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0)
+ && ((buf[-1] & 0xF8) == 0xF0)) {
+ ch = 0xD7C0 + ((buf[-1] & 0x07) << 8) + ((buf[0] & 0x3F) << 2)
+ + ((buf[1] & 0x30) >> 4);
+ buf[1] = (char) ((ch | 0x80) & 0xBF);
+ buf[0] = (char) (((ch >> 6) | 0x80) & 0xBF);
+ buf[-1] = (char) ((ch >> 12) | 0xE0);
+ return 2;
}
}
@@ -238,6 +240,9 @@ Tcl_UniCharToUtfDString(
}
len = Tcl_UniCharToUtf(*w, p);
p += len;
+ if ((*w >= 0xD800) && (len < 3)) {
+ len = 0; /* Indication that high surrogate was found */
+ }
w++;
}
if (!len) {
@@ -297,10 +302,10 @@ Tcl_UtfToUniChar(
register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by
* the UTF-8 string. */
{
- register int byte;
+ Tcl_UniChar byte;
/*
- * Unroll 1 to 3 (or 4) byte UTF-8 sequences.
+ * Unroll 1 to 4 byte UTF-8 sequences.
*/
byte = *((unsigned char *) src);
@@ -313,10 +318,24 @@ Tcl_UtfToUniChar(
* characters representing themselves.
*/
- if ((unsigned)(byte-0x80) < (unsigned) 0x20) {
- *chPtr = (Tcl_UniChar) cp1252[byte-0x80];
+#if TCL_UTF_MAX <= 4
+ /* If *chPtr contains a high surrogate (produced by a previous
+ * Tcl_UtfToUniChar() call) and the next 3 bytes are UTF-8 continuation
+ * bytes, then we must produce a follow-up low surrogate. We only
+ * do that if the high surrogate matches the bits we encounter.
+ */
+ if ((byte >= 0x80)
+ && (((((byte - 0x10) << 2) & 0xFC) | 0xD800) == (*chPtr & 0xFCFC))
+ && ((src[1] & 0xF0) == (((*chPtr << 4) & 0x30) | 0x80))
+ && ((src[2] & 0xC0) == 0x80)) {
+ *chPtr = ((src[1] & 0x0F) << 6) + (src[2] & 0x3F) + 0xDC00;
+ return 3;
+ }
+#endif
+ if (byte-0x80 < 0x20) {
+ *chPtr = cp1252[byte-0x80];
} else {
- *chPtr = (Tcl_UniChar) byte;
+ *chPtr = byte;
}
return 1;
} else if (byte < 0xE0) {
@@ -325,7 +344,7 @@ Tcl_UtfToUniChar(
* Two-byte-character lead-byte followed by a trail-byte.
*/
- *chPtr = (Tcl_UniChar) (((byte & 0x1F) << 6) | (src[1] & 0x3F));
+ *chPtr = (((byte & 0x1F) << 6) | (src[1] & 0x3F));
if ((unsigned)(*chPtr - 1) >= (UNICODE_SELF - 1)) {
return 2;
}
@@ -341,7 +360,7 @@ Tcl_UtfToUniChar(
* Three-byte-character lead byte followed by two trail bytes.
*/
- *chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12)
+ *chPtr = (((byte & 0x0F) << 12)
| ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
if (*chPtr > 0x7FF) {
return 3;
@@ -359,26 +378,19 @@ Tcl_UtfToUniChar(
* Four-byte-character lead byte followed by three trail bytes.
*/
#if TCL_UTF_MAX <= 4
- Tcl_UniChar surrogate;
-
- byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
- | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000;
- surrogate = (Tcl_UniChar) (0xD800 + (byte >> 10));
- if (byte & 0x100000) {
+ Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
+ | ((src[2] & 0x3F) >> 4)) - 0x40;
+ if (high >= 0x400) {
/* out of range, < 0x10000 or > 0x10ffff */
- } else if (*chPtr != surrogate) {
- /* produce high surrogate, but don't advance source pointer */
- *chPtr = surrogate;
- return 0;
} else {
- /* produce low surrogate, and advance source pointer */
- *chPtr = (Tcl_UniChar) (0xDC00 | (byte & 0x3FF));
- return 4;
+ /* produce high surrogate, advance source pointer */
+ *chPtr = 0xD800 + high;
+ return 1;
}
#else
- *chPtr = (Tcl_UniChar) (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
+ *chPtr = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
| ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
- if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) {
+ if ((*chPtr - 0x10000) <= 0xFFFFF) {
return 4;
}
#endif
@@ -390,7 +402,7 @@ Tcl_UtfToUniChar(
*/
}
- *chPtr = (Tcl_UniChar) byte;
+ *chPtr = byte;
return 1;
}
@@ -451,8 +463,8 @@ Tcl_UtfToUniCharDString(
while (p < end) {
if (Tcl_UtfCharComplete(p, end-p)) {
p += TclUtfToUniChar(p, &ch);
- } else if ((unsigned)((UCHAR(*p)-0x80)) < (unsigned) 0x20) {
- ch = (Tcl_UniChar) cp1252[UCHAR(*p++)-0x80];
+ } else if (((UCHAR(*p)-0x80)) < 0x20) {
+ ch = cp1252[UCHAR(*p++)-0x80];
} else {
ch = UCHAR(*p++);
}
@@ -583,8 +595,8 @@ Tcl_UtfFindFirst(
len = TclUtfToUniChar(src, &find);
fullchar = find;
#if TCL_UTF_MAX <= 4
- if (!len) {
- len += TclUtfToUniChar(src, &find);
+ if ((ch >= 0xD800) && (len < 3)) {
+ len += TclUtfToUniChar(src + len, &find);
fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
}
#endif
@@ -632,8 +644,8 @@ Tcl_UtfFindLast(
len = TclUtfToUniChar(src, &find);
fullchar = find;
#if TCL_UTF_MAX <= 4
- if (!len) {
- len += TclUtfToUniChar(src, &find);
+ if ((ch >= 0xD800) && (len < 3)) {
+ len += TclUtfToUniChar(src + len, &find);
fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
}
#endif
@@ -675,8 +687,8 @@ Tcl_UtfNext(
size_t len = TclUtfToUniChar(src, &ch);
#if TCL_UTF_MAX <= 4
- if (len == 0) {
- len = TclUtfToUniChar(src, &ch);
+ if ((ch >= 0xD800) && (len < 3)) {
+ len += TclUtfToUniChar(src + len, &ch);
}
#endif
return src + len;
@@ -757,7 +769,7 @@ Tcl_UniCharAtIndex(
Tcl_UniChar ch = 0;
int fullchar = 0;
#if TCL_UTF_MAX <= 4
- size_t len = 1;
+ size_t len = 0;
#endif
src += TclUtfToUniChar(src, &ch);
@@ -770,7 +782,7 @@ Tcl_UniCharAtIndex(
}
fullchar = ch;
#if TCL_UTF_MAX <= 4
- if (!len) {
+ if ((ch >= 0xD800) && (len < 3)) {
/* If last Tcl_UniChar was a high surrogate, combine with low surrogate */
(void)TclUtfToUniChar(src, &ch);
fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
@@ -805,7 +817,7 @@ Tcl_UtfAtIndex(
{
Tcl_UniChar ch = 0;
#if TCL_UTF_MAX <= 4
- size_t len = 1;
+ size_t len = 0;
#endif
if (index != TCL_AUTO_LENGTH) {
@@ -817,10 +829,10 @@ Tcl_UtfAtIndex(
#endif
}
#if TCL_UTF_MAX <= 4
- if (!len) {
- /* Index points at character following High Surrogate */
- src += TclUtfToUniChar(src, &ch);
- }
+ if ((ch >= 0xD800) && (len < 3)) {
+ /* Index points at character following high Surrogate */
+ src += TclUtfToUniChar(src, &ch);
+ }
#endif
}
return src;
@@ -903,7 +915,7 @@ Tcl_UtfToUpper(
Tcl_UniChar ch = 0;
int upChar;
char *src, *dst;
- size_t bytes;
+ size_t len;
/*
* Iterate over the string until we hit the terminating null.
@@ -911,12 +923,11 @@ Tcl_UtfToUpper(
src = dst = str;
while (*src) {
- bytes = TclUtfToUniChar(src, &ch);
+ len = TclUtfToUniChar(src, &ch);
upChar = ch;
#if TCL_UTF_MAX <= 4
- if (!bytes) {
- /* TclUtfToUniChar only returns 0 for chars > 0xffff ! */
- bytes = TclUtfToUniChar(src, &ch);
+ if ((ch >= 0xD800) && (len < 3)) {
+ len += TclUtfToUniChar(src + len, &ch);
/* Combine surrogates */
upChar = (((upChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
}
@@ -929,13 +940,13 @@ Tcl_UtfToUpper(
* char to dst if its size is <= the original char.
*/
- if ((bytes < TclUtfCount(upChar)) || ((upChar & 0xF800) == 0xD800)) {
- memcpy(dst, src, bytes);
- dst += bytes;
+ if ((len < TclUtfCount(upChar)) || ((upChar & 0xF800) == 0xD800)) {
+ memcpy(dst, src, len);
+ dst += len;
} else {
dst += Tcl_UniCharToUtf(upChar, dst);
}
- src += bytes;
+ src += len;
}
*dst = '\0';
return (dst - str);
@@ -966,7 +977,7 @@ Tcl_UtfToLower(
Tcl_UniChar ch = 0;
int lowChar;
char *src, *dst;
- size_t bytes;
+ size_t len;
/*
* Iterate over the string until we hit the terminating null.
@@ -974,12 +985,11 @@ Tcl_UtfToLower(
src = dst = str;
while (*src) {
- bytes = TclUtfToUniChar(src, &ch);
+ len = TclUtfToUniChar(src, &ch);
lowChar = ch;
#if TCL_UTF_MAX <= 4
- if (!bytes) {
- /* TclUtfToUniChar only returns 0 for chars > 0xffff ! */
- bytes = TclUtfToUniChar(src, &ch);
+ if ((ch >= 0xD800) && (len < 3)) {
+ len += TclUtfToUniChar(src + len, &ch);
/* Combine surrogates */
lowChar = (((lowChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
}
@@ -992,13 +1002,13 @@ Tcl_UtfToLower(
* char to dst if its size is <= the original char.
*/
- if ((bytes < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) {
- memcpy(dst, src, bytes);
- dst += bytes;
+ if ((len < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) {
+ memcpy(dst, src, len);
+ dst += len;
} else {
dst += Tcl_UniCharToUtf(lowChar, dst);
}
- src += bytes;
+ src += len;
}
*dst = '\0';
return (dst - str);
@@ -1030,7 +1040,7 @@ Tcl_UtfToTitle(
Tcl_UniChar ch = 0;
int titleChar, lowChar;
char *src, *dst;
- size_t bytes;
+ size_t len;
/*
* Capitalize the first character and then lowercase the rest of the
@@ -1040,33 +1050,31 @@ Tcl_UtfToTitle(
src = dst = str;
if (*src) {
- bytes = TclUtfToUniChar(src, &ch);
+ len = TclUtfToUniChar(src, &ch);
titleChar = ch;
#if TCL_UTF_MAX <= 4
- if (!bytes) {
- /* TclUtfToUniChar only returns 0 for chars > 0xffff ! */
- bytes = TclUtfToUniChar(src, &ch);
+ if ((ch >= 0xD800) && (len < 3)) {
+ len += TclUtfToUniChar(src + len, &ch);
/* Combine surrogates */
titleChar = (((titleChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
}
#endif
titleChar = Tcl_UniCharToTitle(titleChar);
- if ((bytes < TclUtfCount(titleChar)) || ((titleChar & 0xF800) == 0xD800)) {
- memcpy(dst, src, bytes);
- dst += bytes;
+ if ((len < TclUtfCount(titleChar)) || ((titleChar & 0xF800) == 0xD800)) {
+ memcpy(dst, src, len);
+ dst += len;
} else {
dst += Tcl_UniCharToUtf(titleChar, dst);
}
- src += bytes;
+ src += len;
}
while (*src) {
- bytes = TclUtfToUniChar(src, &ch);
+ len = TclUtfToUniChar(src, &ch);
lowChar = ch;
#if TCL_UTF_MAX <= 4
- if (!bytes) {
- /* TclUtfToUniChar only returns 0 for chars > 0xffff ! */
- bytes = TclUtfToUniChar(src, &ch);
+ if ((ch >= 0xD800) && (len < 3)) {
+ len += TclUtfToUniChar(src + len, &ch);
/* Combine surrogates */
lowChar = (((lowChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
}
@@ -1076,13 +1084,13 @@ Tcl_UtfToTitle(
lowChar = Tcl_UniCharToLower(lowChar);
}
- if ((bytes < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) {
- memcpy(dst, src, bytes);
- dst += bytes;
+ if ((len < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) {
+ memcpy(dst, src, len);
+ dst += len;
} else {
dst += Tcl_UniCharToUtf(lowChar, dst);
}
- src += bytes;
+ src += len;
}
*dst = '\0';
return (dst - str);
diff --git a/generic/tclZipfs.c b/generic/tclZipfs.c
index 90e8671..2322ba6 100644
--- a/generic/tclZipfs.c
+++ b/generic/tclZipfs.c
@@ -3149,21 +3149,6 @@ ZipFSListObjCmd(
#ifdef _WIN32
#define LIBRARY_SIZE 64
-
-static inline int
-WCharToUtf(
- const WCHAR *wSrc,
- char *dst)
-{
- char *start = dst;
-
- while (*wSrc != '\0') {
- dst += Tcl_UniCharToUtf(*wSrc, dst);
- wSrc++;
- }
- *dst = '\0';
- return (int) (dst - start);
-}
#endif /* _WIN32 */
Tcl_Obj *
@@ -3207,11 +3192,8 @@ TclZipfs_TclLibrary(void)
#if defined(_WIN32)
hModule = TclWinGetTclInstance();
- if (GetModuleFileNameW(hModule, wName, MAX_PATH) == 0) {
- GetModuleFileNameA(hModule, dllName, MAX_PATH);
- } else {
- WCharToUtf(wName, dllName);
- }
+ GetModuleFileNameW(hModule, wName, MAX_PATH);
+ WideCharToMultiByte(CP_UTF8, 0, wName, -1, dllName, sizeof(dllName), NULL, NULL);
if (ZipfsAppHookFindTclInit(dllName) == TCL_OK) {
return Tcl_NewStringObj(zipfs_literal_tcl_library, -1);
diff --git a/win/tclWin32Dll.c b/win/tclWin32Dll.c
index 5c04db6..c7e38cc 100644
--- a/win/tclWin32Dll.c
+++ b/win/tclWin32Dll.c
@@ -572,6 +572,10 @@ Tcl_WinTCharToUtf(
}
blen = Tcl_UniCharToUtf(*w, p);
p += blen;
+ if ((*w >= 0xD800) && (blen < 3)) {
+ /* Indication that high surrogate is handled */
+ blen = 0;
+ }
w++;
}
if (!blen) {