diff options
| -rw-r--r-- | generic/tclEncoding.c | 45 | ||||
| -rw-r--r-- | tests/chanio.test | 2 | ||||
| -rw-r--r-- | tests/io.test | 2 |
3 files changed, 29 insertions, 20 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index dea112a..ba9f811 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -537,8 +537,8 @@ TclInitEncodingSubsystem(void) unsigned size; unsigned short i; union { - char c; - short s; + char c; + short s; } isLe; if (encodingsInitialized) { @@ -1233,9 +1233,9 @@ Tcl_ExternalToUtf( } if (!noTerminate) { - if (dstLen < 1) { - return TCL_CONVERT_NOSPACE; - } + if (dstLen < 1) { + return TCL_CONVERT_NOSPACE; + } /* * If there are any null characters in the middle of the buffer, * they will converted to the UTF-8 null character (\xC080). To get @@ -1245,9 +1245,9 @@ Tcl_ExternalToUtf( dstLen--; } else { - if (dstLen < 0) { - return TCL_CONVERT_NOSPACE; - } + if (dstLen < 0) { + return TCL_CONVERT_NOSPACE; + } } do { Tcl_EncodingState savedState = *statePtr; @@ -1423,7 +1423,7 @@ Tcl_UtfToExternal( } if (dstLen < encodingPtr->nullSize) { - return TCL_CONVERT_NOSPACE; + return TCL_CONVERT_NOSPACE; } dstLen -= encodingPtr->nullSize; result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, @@ -2731,17 +2731,26 @@ TableToUtfProc( } byte = *((unsigned char *) src); if (prefixBytes[byte]) { - src++; - if (src >= srcEnd) { - src--; - result = TCL_CONVERT_MULTIBYTE; - break; + if (src >= srcEnd-1) { + /* Prefix byte but nothing after it */ + if (!(flags & TCL_ENCODING_END)) { + /* More data to come */ + result = TCL_CONVERT_MULTIBYTE; + break; + } else if (flags & TCL_ENCODING_STOPONERROR) { + result = TCL_CONVERT_SYNTAX; + break; + } else { + ch = (Tcl_UniChar)byte; + } + } else { + ch = toUnicode[byte][*((unsigned char *)++src)]; } - ch = toUnicode[byte][*((unsigned char *) src)]; } else { ch = pageZero[byte]; } if ((ch == 0) && (byte != 0)) { + /* Prefix+suffix pair is invalid */ if (flags & TCL_ENCODING_STOPONERROR) { result = TCL_CONVERT_SYNTAX; break; @@ -2749,14 +2758,14 @@ TableToUtfProc( if (prefixBytes[byte]) { src--; } - ch = (Tcl_UniChar) byte; + ch = (Tcl_UniChar)byte; } /* * Special case for 1-byte Utf chars for speed. */ - if (ch && ch < 0x80) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (char) ch; } else { dst += Tcl_UniCharToUtf(ch, dst); @@ -2963,7 +2972,7 @@ Iso88591ToUtfProc( * Special case for 1-byte utf chars for speed. */ - if (ch && ch < 0x80) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (char) ch; } else { dst += Tcl_UniCharToUtf(ch, dst); diff --git a/tests/chanio.test b/tests/chanio.test index 3452f78..aef6a1b 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -1098,7 +1098,7 @@ test chan-io-7.3 {FilterInputBytes: split up character at EOF} -setup { lappend x [chan gets $f line] $line } -cleanup { chan close $f -} -result [list 15 "1234567890123\uFF10\uFF11" 18 0 1 -1 ""] +} -result [list 16 "1234567890123\uFF10\uFF11\x82" 18 0 1 -1 ""] test chan-io-7.4 {FilterInputBytes: recover from split up character} -setup { variable x "" } -constraints {stdio fileevent} -body { diff --git a/tests/io.test b/tests/io.test index d20bc87..50a6018 100644 --- a/tests/io.test +++ b/tests/io.test @@ -1136,7 +1136,7 @@ test io-7.3 {FilterInputBytes: split up character at EOF} {testchannel} { lappend x [gets $f line] $line close $f set x -} [list 15 "1234567890123\uFF10\uFF11" 18 0 1 -1 ""] +} [list 16 "1234567890123\uFF10\uFF11\x82" 18 0 1 -1 ""] test io-7.4 {FilterInputBytes: recover from split up character} {stdio fileevent} { set f [open "|[list [interpreter] $path(cat)]" w+] fconfigure $f -encoding binary -buffering none |
