diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-02-25 21:10:33 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-02-25 21:10:33 (GMT) |
commit | 57d9952ece8f81fc6802097bace965a196bb849b (patch) | |
tree | d57232dd97b61e8123f256c0b77a052769a0aa3f /generic | |
parent | 1eba3f12e0ade2e605ab5a69642e97f7b2ab4214 (diff) | |
download | tcl-57d9952ece8f81fc6802097bace965a196bb849b.zip tcl-57d9952ece8f81fc6802097bace965a196bb849b.tar.gz tcl-57d9952ece8f81fc6802097bace965a196bb849b.tar.bz2 |
Finish complete fix, all corner-cases correct now. Also spurious UTF-8 testcase failure (as seen on travis) fixed now.
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclBinary.c | 4 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 4 | ||||
-rw-r--r-- | generic/tclCompCmdsSZ.c | 2 | ||||
-rw-r--r-- | generic/tclCompile.c | 4 | ||||
-rw-r--r-- | generic/tclExecute.c | 4 | ||||
-rw-r--r-- | generic/tclParse.c | 4 | ||||
-rw-r--r-- | generic/tclScan.c | 2 | ||||
-rw-r--r-- | generic/tclUtf.c | 38 | ||||
-rw-r--r-- | generic/tclUtil.c | 2 |
9 files changed, 33 insertions, 31 deletions
diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 677213e..3590af4 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -1354,7 +1354,7 @@ BinaryFormatCmd( badField: { Tcl_UniChar ch = 0; - char buf[TCL_UTF_MAX + 1]; + char buf[TCL_UTF_MAX + 1] = ""; TclUtfToUniChar(errorString, &ch); buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; @@ -1724,7 +1724,7 @@ BinaryScanCmd( badField: { Tcl_UniChar ch = 0; - char buf[TCL_UTF_MAX + 1]; + char buf[TCL_UTF_MAX + 1] = ""; TclUtfToUniChar(errorString, &ch); buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index a289a5c..38689fd 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1444,11 +1444,11 @@ StringIndexCmd( Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(&uch, 1)); } else { - char buf[4]; + char buf[TCL_UTF_MAX] = ""; length = Tcl_UniCharToUtf(ch, buf); if ((ch >= 0xD800) && (length < 3)) { - length = Tcl_UniCharToUtf(-1, buf + length); + length += Tcl_UniCharToUtf(-1, buf + length); } Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length)); } diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index daab0d5..b97121e 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -1502,7 +1502,7 @@ TclSubstCompile( for (endTokenPtr = tokenPtr + parse.numTokens; tokenPtr < endTokenPtr; tokenPtr = TokenAfter(tokenPtr)) { int length, literal, catchRange, breakJump; - char buf[TCL_UTF_MAX]; + char buf[TCL_UTF_MAX] = ""; JumpFixup startFixup, okFixup, returnFixup, breakFixup; JumpFixup continueFixup, otherFixup, endFixup; diff --git a/generic/tclCompile.c b/generic/tclCompile.c index f6e6b81..d940ff7 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -1744,7 +1744,7 @@ TclWordKnownAtCompileTime( case TCL_TOKEN_BS: if (tempPtr != NULL) { - char utfBuf[TCL_UTF_MAX]; + char utfBuf[TCL_UTF_MAX] = ""; int length = TclParseBackslash(tokenPtr->start, tokenPtr->size, NULL, utfBuf); @@ -2358,7 +2358,7 @@ TclCompileTokens( { Tcl_DString textBuffer; /* Holds concatenated chars from adjacent * TCL_TOKEN_TEXT, TCL_TOKEN_BS tokens. */ - char buffer[TCL_UTF_MAX]; + char buffer[TCL_UTF_MAX] = ""; int i, numObjsToConcat, length, adjust; unsigned char *entryCodeNext = envPtr->codeNext; #define NUM_STATIC_POS 20 diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 3ae5571..78012f0 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5215,7 +5215,7 @@ TEBCresume( objResultPtr = Tcl_NewStringObj((const char *) valuePtr->bytes+index, 1); } else { - char buf[4]; + char buf[TCL_UTF_MAX] = ""; int ch = Tcl_GetUniChar(valuePtr, index); /* @@ -5228,7 +5228,7 @@ TEBCresume( } else { length = Tcl_UniCharToUtf(ch, buf); if ((ch >= 0xD800) && (length < 3)) { - length = Tcl_UniCharToUtf(-1, buf + length); + length += Tcl_UniCharToUtf(-1, buf + length); } objResultPtr = Tcl_NewStringObj(buf, length); } diff --git a/generic/tclParse.c b/generic/tclParse.c index 8d07f7f..c791585 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -791,7 +791,7 @@ TclParseBackslash( Tcl_UniChar unichar = 0; int result; int count; - char buf[TCL_UTF_MAX]; + char buf[TCL_UTF_MAX] = ""; if (numBytes == 0) { if (readPtr != NULL) { @@ -2151,7 +2151,7 @@ TclSubstTokens( Tcl_Obj *appendObj = NULL; const char *append = NULL; int appendByteLength = 0; - char utfCharBytes[TCL_UTF_MAX]; + char utfCharBytes[TCL_UTF_MAX] = ""; switch (tokenPtr->type) { case TCL_TOKEN_TEXT: diff --git a/generic/tclScan.c b/generic/tclScan.c index 21ad953..acf1a58 100644 --- a/generic/tclScan.c +++ b/generic/tclScan.c @@ -261,7 +261,7 @@ ValidateFormat( Tcl_UniChar ch = 0; int objIndex, xpgSize, nspace = numVars; int *nassign = TclStackAlloc(interp, nspace * sizeof(int)); - char buf[TCL_UTF_MAX+1]; + char buf[TCL_UTF_MAX+1] = ""; Tcl_Obj *errorMsg; /* Place to build an error messages. Note that * these are messy operations because we do * not want to use the formatting engine; diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 6b63ecb..67c0b08 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -145,12 +145,11 @@ Tcl_UniCharToUtf( if ((ch & 0xF800) == 0xD800) { if (ch & 0x0400) { /* Low surrogate */ - if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80) - && ((buf[2] & 0xCF) == 0)) { + if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0)) { /* Previous Tcl_UniChar was a high surrogate, so combine */ - buf[3] = (char) ((ch & 0x3F) | 0x80); - buf[2] |= (char) (((ch >> 6) & 0x0F) | 0x80); - return 4; + buf[2] = (char) ((ch & 0x3F) | 0x80); + buf[1] |= (char) (((ch >> 6) & 0x0F) | 0x80); + return 3; } /* Previous Tcl_UniChar was not a high surrogate, so just output */ } else { @@ -161,7 +160,7 @@ Tcl_UniCharToUtf( buf[2] = (char) ((ch << 4) & 0x30); buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80); buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0); - return 0; + return 1; } } goto three; @@ -174,11 +173,14 @@ Tcl_UniCharToUtf( return 4; } } else if (ch == -1) { - if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80) - && ((buf[2] & 0xCF) == 0)) { - ch = 0xD7C0 + ((buf[0] & 0x07) << 8) + ((buf[1] & 0x3F) << 2) - + ((buf[2] & 0x30) >> 4); - goto three; + if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0) + && ((buf[-1] & 0xF8) == 0xF0)) { + ch = 0xD7C0 + ((buf[-1] & 0x07) << 8) + ((buf[0] & 0x3F) << 2) + + ((buf[1] & 0x30) >> 4); + buf[1] = (char) ((ch | 0x80) & 0xBF); + buf[0] = (char) (((ch >> 6) | 0x80) & 0xBF); + buf[-1] = (char) ((ch >> 12) | 0xE0); + return 2; } } @@ -302,7 +304,7 @@ Tcl_UtfToUniChar( Tcl_UniChar byte; /* - * Unroll 1 to 3 (or 4) byte UTF-8 sequences. + * Unroll 1 to 4 byte UTF-8 sequences. */ byte = *((unsigned char *) src); @@ -375,13 +377,13 @@ Tcl_UtfToUniChar( * Four-byte-character lead byte followed by three trail bytes. */ #if TCL_UTF_MAX <= 4 - byte = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2) + Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2) | ((src[2] & 0x3F) >> 4)) - 0x40; - if (byte >= 0x400) { + if (high >= 0x400) { /* out of range, < 0x10000 or > 0x10ffff */ } else { /* produce high surrogate, advance source pointer */ - *chPtr = 0xD800 + byte; + *chPtr = 0xD800 + high; return 1; } #else @@ -778,8 +780,8 @@ Tcl_UniCharAtIndex( fullchar = ch; #if TCL_UTF_MAX <= 4 if ((ch >= 0xD800) && (len < 3)) { - /* If last Tcl_UniChar was an high surrogate, combine with low surrogate */ - (void)TclUtfToUniChar(src + len, &ch); + /* If last Tcl_UniChar was a high surrogate, combine with low surrogate */ + (void)TclUtfToUniChar(src, &ch); fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; } #endif @@ -819,7 +821,7 @@ Tcl_UtfAtIndex( } #if TCL_UTF_MAX <= 4 if ((ch >= 0xD800) && (len < 3)) { - /* Index points at character following High Surrogate */ + /* Index points at character following high Surrogate */ src += TclUtfToUniChar(src, &ch); } #endif diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 3d4298e..4590e8f 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -1654,7 +1654,7 @@ Tcl_Backslash( int *readPtr) /* Fill in with number of characters read from * src, unless NULL. */ { - char buf[TCL_UTF_MAX]; + char buf[TCL_UTF_MAX] = ""; Tcl_UniChar ch = 0; Tcl_UtfBackslash(src, readPtr, buf); |