From f5c47e4402864aa6d6f5f120c231c39423dcc360 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Mar 2023 23:17:11 +0000 Subject: Proposed fix for [1bedc53c8c]: synchronous [read] with -strictencoding does not produce an error on invalid input --- generic/tclIO.c | 28 +++++++++++++++++++++++++++- tests/io.test | 16 ++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 9944787..7f74e2e 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -6078,6 +6078,23 @@ DoReadChars( statePtr->inQueueTail = NULL; } } + + /* + * If CHANNEL_ENCODING_ERROR and CHANNEL_STICKY_EOF are both set, + * then CHANNEL_ENCODING_ERROR was caused by data that occurred + * after the EOF character was encountered, so it doesn't count as + * a real error. + */ + + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) + && !GotFlag(statePtr, CHANNEL_STICKY_EOF) + && !GotFlag(statePtr, CHANNEL_NONBLOCKING)) { + /* Channel is blocking. Return an error so that callers + * like [read] can return an error. + */ + Tcl_SetErrno(EILSEQ); + goto finish; + } } if (copiedNow < 0) { @@ -6106,6 +6123,7 @@ DoReadChars( } } +finish: /* * Failure to fill a channel buffer may have left channel reporting a * "blocked" state, but so long as we fulfilled the request here, the @@ -6139,6 +6157,11 @@ DoReadChars( assert(!(GotFlag(statePtr, CHANNEL_EOF|CHANNEL_BLOCKED) == (CHANNEL_EOF|CHANNEL_BLOCKED))); UpdateInterest(chanPtr); + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) + && (!copied || !GotFlag(statePtr, CHANNEL_NONBLOCKING))) { + Tcl_SetErrno(EILSEQ); + copied = -1; + } TclChannelRelease((Tcl_Channel)chanPtr); return copied; } @@ -6769,11 +6792,14 @@ TranslateInputEOL( * EOF character was seen in EOL translated range. Leave current file * position pointing at the EOF character, but don't store the EOF * character in the output string. + * + * If CHANNEL_ENCODING_ERROR is set, it can only be because of data + * encountered after the EOF character, so it is nonsense. Unset it. */ SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF); statePtr->inputEncodingFlags |= TCL_ENCODING_END; - ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR); + ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR|CHANNEL_ENCODING_ERROR); } } diff --git a/tests/io.test b/tests/io.test index cf90936..9246bd8 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9171,6 +9171,22 @@ test io-75.6 {invalid utf-8 encoding gets is not ignored (-profile strict)} -set removeFile io-75.6 } -match glob -returnCodes 1 -result {error reading "*": illegal byte sequence} +test io-75.7 {invalid utf-8 encoding gets is not ignored (-profile strict)} -setup { + set fn [makeFile {} io-75.7] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict +} -body { + read $f +} -cleanup { + close $f + removeFile io-75.7 +} -match glob -returnCodes 1 -result {error reading "*": illegal byte sequence} + test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] -- cgit v0.12 From 48dcbfcc5b65ce91d157d0faa2db21f6035879e9 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Mar 2023 11:11:02 +0000 Subject: Some test-cases, which test for partial read without throwing EILSEQ immediately, only work with ""-blocking 0". That's expected. --- tests/io.test | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/io.test b/tests/io.test index 9246bd8..58d276b 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9110,10 +9110,10 @@ test io-75.3 {incomplete multibyte encoding read is ignored (-profile tcl8)} -se fconfigure $f -encoding utf-8 -buffering none -profile tcl8 } -body { set d [read $f] - close $f binary scan $d H* hd set hd } -cleanup { + close $f removeFile io-75.3 } -result 41c0 @@ -9148,10 +9148,10 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-profile tcl8)} -setup { fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile tcl8 } -body { set d [read $f] - close $f binary scan $d H* hd set hd } -cleanup { + close $f removeFile io-75.5 } -result 4181 @@ -9234,10 +9234,10 @@ test io-75.10 {incomplete multibyte encoding read is ignored} -setup { fconfigure $f -encoding utf-8 -buffering none } -body { set d [read $f] - close $f binary scan $d H* hd set hd } -cleanup { + close $f removeFile io-75.10 } -result 41c0 # The current result returns the orphan byte as byte. @@ -9254,7 +9254,7 @@ test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -profile strict + fconfigure $f -encoding shiftjis -blocking 0 -eofchar "" -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd @@ -9289,7 +9289,7 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -se puts -nonewline $f "A\x81" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict + fconfigure $f -encoding utf-8 -blocking 0 -eofchar "" -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd -- cgit v0.12