diff options
-rw-r--r-- | generic/tclEncoding.c | 10 | ||||
-rw-r--r-- | generic/tclIO.c | 12 | ||||
-rw-r--r-- | generic/tclIO.h | 2 | ||||
-rw-r--r-- | tests/encoding.test | 15 | ||||
-rw-r--r-- | tests/io.test | 21 | ||||
-rw-r--r-- | tests/unixInit.test | 3 |
6 files changed, 54 insertions, 9 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index efe4b43..eb217b4 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2391,7 +2391,7 @@ UtfToUtfProc( * If in input mode, and -strict is specified: This is an error. */ if (flags & TCL_ENCODING_MODIFIED) { - result = TCL_CONVERT_UNKNOWN; + result = TCL_CONVERT_SYNTAX; break; } @@ -2413,6 +2413,10 @@ UtfToUtfProc( result = TCL_CONVERT_MULTIBYTE; break; } + if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { + result = TCL_CONVERT_SYNTAX; + break; + } ch = UCHAR(*src++); } else { char chbuf[2]; @@ -2424,8 +2428,8 @@ UtfToUtfProc( int low; const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); - if ((len < 2) && (ch != 0) && STOPONERROR - && (flags & TCL_ENCODING_MODIFIED)) { + if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_MODIFIED) + && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { result = TCL_CONVERT_SYNTAX; break; } diff --git a/generic/tclIO.c b/generic/tclIO.c index 4002934..d5fbd18 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4466,7 +4466,7 @@ Write( * current output encoding and strict encoding is active. */ - if (result == TCL_CONVERT_UNKNOWN) { + if (result == TCL_CONVERT_UNKNOWN || result == TCL_CONVERT_SYNTAX) { encodingError = 1; result = TCL_OK; } @@ -5517,6 +5517,11 @@ FilterInputBytes( &statePtr->inputEncodingState, dst, spaceLeft, &gsPtr->rawRead, &gsPtr->bytesWrote, &gsPtr->charsWrote); + if (result == TCL_CONVERT_UNKNOWN || result == TCL_CONVERT_SYNTAX) { + SetFlag(statePtr, CHANNEL_ENCODING_ERROR); + result = TCL_OK; + } + /* * Make sure that if we go through 'gets', that we reset the * TCL_ENCODING_START flag still. [Bug #523988] @@ -6345,6 +6350,11 @@ ReadChars( flags, &statePtr->inputEncodingState, dst, dstLimit, &srcRead, &dstDecoded, &numChars); + if (code == TCL_CONVERT_UNKNOWN || code == TCL_CONVERT_SYNTAX) { + SetFlag(statePtr, CHANNEL_ENCODING_ERROR); + code = TCL_OK; + } + /* * Perform the translation transformation in place. Read no more than * the dstDecoded bytes the encoding transformation actually produced. diff --git a/generic/tclIO.h b/generic/tclIO.h index 1da8478..fbd01ee 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -271,6 +271,8 @@ typedef struct ChannelState { * delivered for buffered data until * the state of the channel * changes. */ +#define CHANNEL_ENCODING_ERROR (1<<15) /* set if channel + * encountered an encoding error */ #define CHANNEL_RAW_MODE (1<<16) /* When set, notes that the Raw API is * being used. */ #define CHANNEL_ENCODING_NOCOMPLAIN (1<<17) /* set if option diff --git a/tests/encoding.test b/tests/encoding.test index d234e0c..9aa123d 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -673,6 +673,21 @@ test encoding-24.22 {Syntax error, two encodings} -body { test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" } -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +test encoding-24.24 {Parse invalid utf-8 with -strict} -body { + encoding convertfrom -strict utf-8 "\xC0\x80\x00\x00" +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} +test encoding-24.25 {Parse invalid utf-8 with -strict} -body { + encoding convertfrom -strict utf-8 "\x40\x80\x00\x00" +} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\x80'} +test encoding-24.26 {Parse valid utf-8 with -strict} -body { + encoding convertfrom -strict utf-8 "\xF1\x80\x80\x80" +} -result \U40000 +test encoding-24.27 {Parse invalid utf-8 with -strict} -body { + encoding convertfrom -strict utf-8 "\xF0\x80\x80\x80" +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF0'} +test encoding-24.28 {Parse invalid utf-8 with -strict} -body { + encoding convertfrom -strict utf-8 "\xFF\x00\x00" +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xFF'} file delete [file join [temporaryDirectory] iso2022.txt] diff --git a/tests/io.test b/tests/io.test index 9ae25bb..ef9e14d 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9029,11 +9029,10 @@ test io-75.4 {shiftjis encoding error read results in raw bytes} -setup { removeFile io-75.4 } -result "4181ff41" -test io-75.5 {incomplete shiftjis encoding read is ignored} -setup { +test io-75.5 {invalid utf-8 encoding read is ignored} -setup { set fn [makeFile {} io-75.5] set f [open $fn w+] fconfigure $f -encoding binary - # \x81 announces a two byte sequence. puts -nonewline $f "A\x81" flush $f seek $f 0 @@ -9047,6 +9046,24 @@ test io-75.5 {incomplete shiftjis encoding read is ignored} -setup { removeFile io-75.5 } -result "4181" +test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { + set fn [makeFile {} io-75.6] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f "A\x81" + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 +} -body { + set d [read $f] + binary scan $d H* hd + lappend hd [catch {read $f} msg] + close $f + lappend hd $msg +} -cleanup { + removeFile io-75.6 +} -result "41 0 {}" ; # Here, an exception should be thrown # ### ### ### ######### ######### ######### diff --git a/tests/unixInit.test b/tests/unixInit.test index 8e64c7a..16d9e64 100644 --- a/tests/unixInit.test +++ b/tests/unixInit.test @@ -346,8 +346,6 @@ test unixInit-3.1 {TclpSetInitialEncodings} -constraints { } -match regexp -result {^(iso8859-15?|utf-8)$} test unixInit-3.2 {TclpSetInitialEncodings} -setup { catch {set oldlc_all $env(LC_ALL)} - catch {set oldtcl_library $env(TCL_LIBRARY)} - unset -nocomplain env(TCL_LIBRARY) } -constraints {unix stdio knownBug} -body { set env(LANG) japanese set env(LC_ALL) japanese @@ -366,7 +364,6 @@ test unixInit-3.2 {TclpSetInitialEncodings} -setup { } -cleanup { unset -nocomplain env(LANG) env(LC_ALL) catch {set env(LC_ALL) $oldlc_all} - catch {set env(TCL_LIBRARY) $oldtcl_library} } -result 0 test unixInit-4.1 {TclpSetVariables} {unix} { |