From f5c47e4402864aa6d6f5f120c231c39423dcc360 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Mar 2023 23:17:11 +0000 Subject: Proposed fix for [1bedc53c8c]: synchronous [read] with -strictencoding does not produce an error on invalid input --- generic/tclIO.c | 28 +++++++++++++++++++++++++++- tests/io.test | 16 ++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 9944787..7f74e2e 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -6078,6 +6078,23 @@ DoReadChars( statePtr->inQueueTail = NULL; } } + + /* + * If CHANNEL_ENCODING_ERROR and CHANNEL_STICKY_EOF are both set, + * then CHANNEL_ENCODING_ERROR was caused by data that occurred + * after the EOF character was encountered, so it doesn't count as + * a real error. + */ + + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) + && !GotFlag(statePtr, CHANNEL_STICKY_EOF) + && !GotFlag(statePtr, CHANNEL_NONBLOCKING)) { + /* Channel is blocking. Return an error so that callers + * like [read] can return an error. + */ + Tcl_SetErrno(EILSEQ); + goto finish; + } } if (copiedNow < 0) { @@ -6106,6 +6123,7 @@ DoReadChars( } } +finish: /* * Failure to fill a channel buffer may have left channel reporting a * "blocked" state, but so long as we fulfilled the request here, the @@ -6139,6 +6157,11 @@ DoReadChars( assert(!(GotFlag(statePtr, CHANNEL_EOF|CHANNEL_BLOCKED) == (CHANNEL_EOF|CHANNEL_BLOCKED))); UpdateInterest(chanPtr); + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) + && (!copied || !GotFlag(statePtr, CHANNEL_NONBLOCKING))) { + Tcl_SetErrno(EILSEQ); + copied = -1; + } TclChannelRelease((Tcl_Channel)chanPtr); return copied; } @@ -6769,11 +6792,14 @@ TranslateInputEOL( * EOF character was seen in EOL translated range. Leave current file * position pointing at the EOF character, but don't store the EOF * character in the output string. + * + * If CHANNEL_ENCODING_ERROR is set, it can only be because of data + * encountered after the EOF character, so it is nonsense. Unset it. */ SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF); statePtr->inputEncodingFlags |= TCL_ENCODING_END; - ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR); + ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR|CHANNEL_ENCODING_ERROR); } } diff --git a/tests/io.test b/tests/io.test index cf90936..9246bd8 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9171,6 +9171,22 @@ test io-75.6 {invalid utf-8 encoding gets is not ignored (-profile strict)} -set removeFile io-75.6 } -match glob -returnCodes 1 -result {error reading "*": illegal byte sequence} +test io-75.7 {invalid utf-8 encoding gets is not ignored (-profile strict)} -setup { + set fn [makeFile {} io-75.7] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict +} -body { + read $f +} -cleanup { + close $f + removeFile io-75.7 +} -match glob -returnCodes 1 -result {error reading "*": illegal byte sequence} + test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] -- cgit v0.12