summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2019-02-25 21:10:33 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2019-02-25 21:10:33 (GMT)
commit57d9952ece8f81fc6802097bace965a196bb849b (patch)
treed57232dd97b61e8123f256c0b77a052769a0aa3f /generic
parent1eba3f12e0ade2e605ab5a69642e97f7b2ab4214 (diff)
downloadtcl-57d9952ece8f81fc6802097bace965a196bb849b.zip
tcl-57d9952ece8f81fc6802097bace965a196bb849b.tar.gz
tcl-57d9952ece8f81fc6802097bace965a196bb849b.tar.bz2
Finish complete fix, all corner-cases correct now. Also spurious UTF-8 testcase failure (as seen on travis) fixed now.
Diffstat (limited to 'generic')
-rw-r--r--generic/tclBinary.c4
-rw-r--r--generic/tclCmdMZ.c4
-rw-r--r--generic/tclCompCmdsSZ.c2
-rw-r--r--generic/tclCompile.c4
-rw-r--r--generic/tclExecute.c4
-rw-r--r--generic/tclParse.c4
-rw-r--r--generic/tclScan.c2
-rw-r--r--generic/tclUtf.c38
-rw-r--r--generic/tclUtil.c2
9 files changed, 33 insertions, 31 deletions
diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 677213e..3590af4 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -1354,7 +1354,7 @@ BinaryFormatCmd(
badField:
{
Tcl_UniChar ch = 0;
- char buf[TCL_UTF_MAX + 1];
+ char buf[TCL_UTF_MAX + 1] = "";
TclUtfToUniChar(errorString, &ch);
buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
@@ -1724,7 +1724,7 @@ BinaryScanCmd(
badField:
{
Tcl_UniChar ch = 0;
- char buf[TCL_UTF_MAX + 1];
+ char buf[TCL_UTF_MAX + 1] = "";
TclUtfToUniChar(errorString, &ch);
buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index a289a5c..38689fd 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -1444,11 +1444,11 @@ StringIndexCmd(
Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(&uch, 1));
} else {
- char buf[4];
+ char buf[TCL_UTF_MAX] = "";
length = Tcl_UniCharToUtf(ch, buf);
if ((ch >= 0xD800) && (length < 3)) {
- length = Tcl_UniCharToUtf(-1, buf + length);
+ length += Tcl_UniCharToUtf(-1, buf + length);
}
Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length));
}
diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c
index daab0d5..b97121e 100644
--- a/generic/tclCompCmdsSZ.c
+++ b/generic/tclCompCmdsSZ.c
@@ -1502,7 +1502,7 @@ TclSubstCompile(
for (endTokenPtr = tokenPtr + parse.numTokens;
tokenPtr < endTokenPtr; tokenPtr = TokenAfter(tokenPtr)) {
int length, literal, catchRange, breakJump;
- char buf[TCL_UTF_MAX];
+ char buf[TCL_UTF_MAX] = "";
JumpFixup startFixup, okFixup, returnFixup, breakFixup;
JumpFixup continueFixup, otherFixup, endFixup;
diff --git a/generic/tclCompile.c b/generic/tclCompile.c
index f6e6b81..d940ff7 100644
--- a/generic/tclCompile.c
+++ b/generic/tclCompile.c
@@ -1744,7 +1744,7 @@ TclWordKnownAtCompileTime(
case TCL_TOKEN_BS:
if (tempPtr != NULL) {
- char utfBuf[TCL_UTF_MAX];
+ char utfBuf[TCL_UTF_MAX] = "";
int length = TclParseBackslash(tokenPtr->start,
tokenPtr->size, NULL, utfBuf);
@@ -2358,7 +2358,7 @@ TclCompileTokens(
{
Tcl_DString textBuffer; /* Holds concatenated chars from adjacent
* TCL_TOKEN_TEXT, TCL_TOKEN_BS tokens. */
- char buffer[TCL_UTF_MAX];
+ char buffer[TCL_UTF_MAX] = "";
int i, numObjsToConcat, length, adjust;
unsigned char *entryCodeNext = envPtr->codeNext;
#define NUM_STATIC_POS 20
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 3ae5571..78012f0 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5215,7 +5215,7 @@ TEBCresume(
objResultPtr = Tcl_NewStringObj((const char *)
valuePtr->bytes+index, 1);
} else {
- char buf[4];
+ char buf[TCL_UTF_MAX] = "";
int ch = Tcl_GetUniChar(valuePtr, index);
/*
@@ -5228,7 +5228,7 @@ TEBCresume(
} else {
length = Tcl_UniCharToUtf(ch, buf);
if ((ch >= 0xD800) && (length < 3)) {
- length = Tcl_UniCharToUtf(-1, buf + length);
+ length += Tcl_UniCharToUtf(-1, buf + length);
}
objResultPtr = Tcl_NewStringObj(buf, length);
}
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 8d07f7f..c791585 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -791,7 +791,7 @@ TclParseBackslash(
Tcl_UniChar unichar = 0;
int result;
int count;
- char buf[TCL_UTF_MAX];
+ char buf[TCL_UTF_MAX] = "";
if (numBytes == 0) {
if (readPtr != NULL) {
@@ -2151,7 +2151,7 @@ TclSubstTokens(
Tcl_Obj *appendObj = NULL;
const char *append = NULL;
int appendByteLength = 0;
- char utfCharBytes[TCL_UTF_MAX];
+ char utfCharBytes[TCL_UTF_MAX] = "";
switch (tokenPtr->type) {
case TCL_TOKEN_TEXT:
diff --git a/generic/tclScan.c b/generic/tclScan.c
index 21ad953..acf1a58 100644
--- a/generic/tclScan.c
+++ b/generic/tclScan.c
@@ -261,7 +261,7 @@ ValidateFormat(
Tcl_UniChar ch = 0;
int objIndex, xpgSize, nspace = numVars;
int *nassign = TclStackAlloc(interp, nspace * sizeof(int));
- char buf[TCL_UTF_MAX+1];
+ char buf[TCL_UTF_MAX+1] = "";
Tcl_Obj *errorMsg; /* Place to build an error messages. Note that
* these are messy operations because we do
* not want to use the formatting engine;
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 6b63ecb..67c0b08 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -145,12 +145,11 @@ Tcl_UniCharToUtf(
if ((ch & 0xF800) == 0xD800) {
if (ch & 0x0400) {
/* Low surrogate */
- if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80)
- && ((buf[2] & 0xCF) == 0)) {
+ if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0)) {
/* Previous Tcl_UniChar was a high surrogate, so combine */
- buf[3] = (char) ((ch & 0x3F) | 0x80);
- buf[2] |= (char) (((ch >> 6) & 0x0F) | 0x80);
- return 4;
+ buf[2] = (char) ((ch & 0x3F) | 0x80);
+ buf[1] |= (char) (((ch >> 6) & 0x0F) | 0x80);
+ return 3;
}
/* Previous Tcl_UniChar was not a high surrogate, so just output */
} else {
@@ -161,7 +160,7 @@ Tcl_UniCharToUtf(
buf[2] = (char) ((ch << 4) & 0x30);
buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80);
buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0);
- return 0;
+ return 1;
}
}
goto three;
@@ -174,11 +173,14 @@ Tcl_UniCharToUtf(
return 4;
}
} else if (ch == -1) {
- if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80)
- && ((buf[2] & 0xCF) == 0)) {
- ch = 0xD7C0 + ((buf[0] & 0x07) << 8) + ((buf[1] & 0x3F) << 2)
- + ((buf[2] & 0x30) >> 4);
- goto three;
+ if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0)
+ && ((buf[-1] & 0xF8) == 0xF0)) {
+ ch = 0xD7C0 + ((buf[-1] & 0x07) << 8) + ((buf[0] & 0x3F) << 2)
+ + ((buf[1] & 0x30) >> 4);
+ buf[1] = (char) ((ch | 0x80) & 0xBF);
+ buf[0] = (char) (((ch >> 6) | 0x80) & 0xBF);
+ buf[-1] = (char) ((ch >> 12) | 0xE0);
+ return 2;
}
}
@@ -302,7 +304,7 @@ Tcl_UtfToUniChar(
Tcl_UniChar byte;
/*
- * Unroll 1 to 3 (or 4) byte UTF-8 sequences.
+ * Unroll 1 to 4 byte UTF-8 sequences.
*/
byte = *((unsigned char *) src);
@@ -375,13 +377,13 @@ Tcl_UtfToUniChar(
* Four-byte-character lead byte followed by three trail bytes.
*/
#if TCL_UTF_MAX <= 4
- byte = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
+ Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
| ((src[2] & 0x3F) >> 4)) - 0x40;
- if (byte >= 0x400) {
+ if (high >= 0x400) {
/* out of range, < 0x10000 or > 0x10ffff */
} else {
/* produce high surrogate, advance source pointer */
- *chPtr = 0xD800 + byte;
+ *chPtr = 0xD800 + high;
return 1;
}
#else
@@ -778,8 +780,8 @@ Tcl_UniCharAtIndex(
fullchar = ch;
#if TCL_UTF_MAX <= 4
if ((ch >= 0xD800) && (len < 3)) {
- /* If last Tcl_UniChar was an high surrogate, combine with low surrogate */
- (void)TclUtfToUniChar(src + len, &ch);
+ /* If last Tcl_UniChar was a high surrogate, combine with low surrogate */
+ (void)TclUtfToUniChar(src, &ch);
fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
}
#endif
@@ -819,7 +821,7 @@ Tcl_UtfAtIndex(
}
#if TCL_UTF_MAX <= 4
if ((ch >= 0xD800) && (len < 3)) {
- /* Index points at character following High Surrogate */
+ /* Index points at character following high Surrogate */
src += TclUtfToUniChar(src, &ch);
}
#endif
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 3d4298e..4590e8f 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -1654,7 +1654,7 @@ Tcl_Backslash(
int *readPtr) /* Fill in with number of characters read from
* src, unless NULL. */
{
- char buf[TCL_UTF_MAX];
+ char buf[TCL_UTF_MAX] = "";
Tcl_UniChar ch = 0;
Tcl_UtfBackslash(src, readPtr, buf);