From b17bb4724c7bee03cf081b87436b936da78681c5 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 11 Oct 2022 06:22:10 +0000 Subject: Making a start fixing [6978c01b65]: Channel encoding difference 8.6 <-> 9.0 --- generic/tclIO.c | 12 +++++++++++- generic/tclIO.h | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 6a9c306..097b6ee 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4466,7 +4466,7 @@ Write( * current output encoding and strict encoding is active. */ - if (result == TCL_CONVERT_UNKNOWN) { + if (result == TCL_CONVERT_UNKNOWN || result == TCL_CONVERT_SYNTAX) { encodingError = 1; result = TCL_OK; } @@ -5516,6 +5516,11 @@ FilterInputBytes( &statePtr->inputEncodingState, dst, spaceLeft, &gsPtr->rawRead, &gsPtr->bytesWrote, &gsPtr->charsWrote); + if (result == TCL_CONVERT_UNKNOWN || result == TCL_CONVERT_SYNTAX) { + SetFlag(statePtr, CHANNEL_ENCODING_ERROR); + result = TCL_OK; + } + /* * Make sure that if we go through 'gets', that we reset the * TCL_ENCODING_START flag still. [Bug #523988] @@ -6344,6 +6349,11 @@ ReadChars( flags, &statePtr->inputEncodingState, dst, dstLimit, &srcRead, &dstDecoded, &numChars); + if (code == TCL_CONVERT_UNKNOWN || code == TCL_CONVERT_SYNTAX) { + SetFlag(statePtr, CHANNEL_ENCODING_ERROR); + code = TCL_OK; + } + /* * Perform the translation transformation in place. Read no more than * the dstDecoded bytes the encoding transformation actually produced. diff --git a/generic/tclIO.h b/generic/tclIO.h index e8d2736..8f30cf0 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -271,6 +271,8 @@ typedef struct ChannelState { * delivered for buffered data until * the state of the channel * changes. */ +#define CHANNEL_ENCODING_ERROR (1<<15) /* set if channel + * encountered an encoding error */ #define CHANNEL_RAW_MODE (1<<16) /* When set, notes that the Raw API is * being used. */ #define CHANNEL_ENCODING_NOCOMPLAIN (1<<17) /* set if option -- cgit v0.12 From 91e64fb4758b08118646d3a4cb707a9288e920f3 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 20 Nov 2022 23:27:09 +0000 Subject: indenting --- generic/tclIO.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 6b9b48d..d5fbd18 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5518,8 +5518,8 @@ FilterInputBytes( &gsPtr->bytesWrote, &gsPtr->charsWrote); if (result == TCL_CONVERT_UNKNOWN || result == TCL_CONVERT_SYNTAX) { - SetFlag(statePtr, CHANNEL_ENCODING_ERROR); - result = TCL_OK; + SetFlag(statePtr, CHANNEL_ENCODING_ERROR); + result = TCL_OK; } /* @@ -6351,7 +6351,7 @@ ReadChars( dst, dstLimit, &srcRead, &dstDecoded, &numChars); if (code == TCL_CONVERT_UNKNOWN || code == TCL_CONVERT_SYNTAX) { - SetFlag(statePtr, CHANNEL_ENCODING_ERROR); + SetFlag(statePtr, CHANNEL_ENCODING_ERROR); code = TCL_OK; } -- cgit v0.12 From d942525e228c522b5f0101ced6a494e80c2cd06d Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 22 Nov 2022 22:25:46 +0000 Subject: Better -strict checking, with testcases --- generic/tclEncoding.c | 4 ++-- tests/encoding.test | 17 ++++++++++------- tests/io.test | 21 +++++++++++++++++++-- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 1df5e93..eb217b4 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2428,8 +2428,8 @@ UtfToUtfProc( int low; const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); - if ((len < 2) && (ch != 0) && STOPONERROR - && (flags & TCL_ENCODING_MODIFIED)) { + if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_MODIFIED) + && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { result = TCL_CONVERT_SYNTAX; break; } diff --git a/tests/encoding.test b/tests/encoding.test index b4f35db..9aa123d 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -674,16 +674,19 @@ test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" } -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} test encoding-24.24 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\xC0\x80" + encoding convertfrom -strict utf-8 "\xC0\x80\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} test encoding-24.25 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\x80" -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x80'} -test encoding-24.26 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\xF0" -} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF0'} + encoding convertfrom -strict utf-8 "\x40\x80\x00\x00" +} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\x80'} +test encoding-24.26 {Parse valid utf-8 with -strict} -body { + encoding convertfrom -strict utf-8 "\xF1\x80\x80\x80" +} -result \U40000 test encoding-24.27 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\xFF" + encoding convertfrom -strict utf-8 "\xF0\x80\x80\x80" +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF0'} +test encoding-24.28 {Parse invalid utf-8 with -strict} -body { + encoding convertfrom -strict utf-8 "\xFF\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xFF'} file delete [file join [temporaryDirectory] iso2022.txt] diff --git a/tests/io.test b/tests/io.test index 9ae25bb..ef9e14d 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9029,11 +9029,10 @@ test io-75.4 {shiftjis encoding error read results in raw bytes} -setup { removeFile io-75.4 } -result "4181ff41" -test io-75.5 {incomplete shiftjis encoding read is ignored} -setup { +test io-75.5 {invalid utf-8 encoding read is ignored} -setup { set fn [makeFile {} io-75.5] set f [open $fn w+] fconfigure $f -encoding binary - # \x81 announces a two byte sequence. puts -nonewline $f "A\x81" flush $f seek $f 0 @@ -9047,6 +9046,24 @@ test io-75.5 {incomplete shiftjis encoding read is ignored} -setup { removeFile io-75.5 } -result "4181" +test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { + set fn [makeFile {} io-75.6] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f "A\x81" + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 +} -body { + set d [read $f] + binary scan $d H* hd + lappend hd [catch {read $f} msg] + close $f + lappend hd $msg +} -cleanup { + removeFile io-75.6 +} -result "41 0 {}" ; # Here, an exception should be thrown # ### ### ### ######### ######### ######### -- cgit v0.12