From bb2f06fc739eb91a9f1499fbfbc4fa346172660e Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 28 Feb 2023 11:42:15 +0000 Subject: Reverted [d156af9fb76dd2f4] and removed tests io-52.20 io-75.6 io-75.7, as this commit, intended to fix issue [b8f575aa2398b0e4], breaks the semantics of [read] and [gets]. Such a change would require an accepted TIP. See [b8f575aa2398b0e4] for further discussion. jn: @pouryorick See [b8f575aa2398b0e4] for the reason why this commit is not appropriate: It gets core-8-branch back in the buggy state it was, without even providing a real solution everyone agrees on. You shouldn't revert my patch just because I reverted yours. pooryorick: As I explained, the reason for this reversion is that it hard-codes an unapproved change in the semantics of [read] and [gets] into the test suite. Jan, your statement that it's a "revenge" reversion is false. I spent a month trying to find some alternative to this reversion before actually performing it. A commit that codifes in its tests changes in semantcs to [read]/[gets] simply shouldn't be on core-8-branch. --- generic/tclIO.c | 4 ++-- tests/io.test | 63 --------------------------------------------------------- 2 files changed, 2 insertions(+), 65 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 2e0cd1f..c96a406 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7588,7 +7588,7 @@ Tcl_Eof( ChannelState *statePtr = ((Channel *) chan)->state; /* State of real channel structure. */ - return (GotFlag(statePtr, CHANNEL_EOF) && !GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) ? 1 : 0; + return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0; } /* @@ -8283,7 +8283,7 @@ Tcl_SetChannelOption( statePtr->inputEncodingFlags = TCL_ENCODING_START; statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; - ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); + ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA); UpdateInterest(chanPtr); return TCL_OK; } else if (HaveOpt(2, "-eofchar")) { diff --git a/tests/io.test b/tests/io.test index 865ff7e..6821ff3 100644 --- a/tests/io.test +++ b/tests/io.test @@ -7609,27 +7609,6 @@ test io-52.19 {coverage of eofChar handling} { close $out file size $path(test2) } 8 -test io-52.20 {TclCopyChannel & encodings} -setup { - set out [open $path(utf8-fcopy.txt) w] - fconfigure $out -encoding utf-8 -translation lf - puts $out "Á" - close $out -} -constraints {fcopy} -body { - # binary to encoding => the input has to be - # in utf-8 to make sense to the encoder - - set in [open $path(utf8-fcopy.txt) r] - set out [open $path(kyrillic.txt) w] - - # Using "-encoding ascii" means reading the "Á" gives an error - fconfigure $in -encoding ascii -strictencoding 1 - fconfigure $out -encoding koi8-r -translation lf - - fcopy $in $out -} -cleanup { - close $in - close $out -} -returnCodes 1 -match glob -result {error reading "file*": illegal byte sequence} test io-52.21 {TclCopyChannel & encodings} -setup { set out [open $path(utf8-fcopy.txt) w] fconfigure $out -encoding utf-8 -translation lf @@ -9143,48 +9122,6 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-nocomplainencoding 1)} -s removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { - set fn [makeFile {} io-75.6] - set f [open $fn w+] - fconfigure $f -encoding binary - # \x81 is invalid in utf-8 - puts -nonewline $f A\x81 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 -} -body { - set d [read $f] - binary scan $d H* hd - lappend hd [catch {read $f} msg] - close $f - lappend hd $msg -} -cleanup { - removeFile io-75.6 -} -match glob -result {41 1 {error reading "*": illegal byte sequence}} - -test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { - set fn [makeFile {} io-75.7] - set f [open $fn w+] - fconfigure $f -encoding binary - # \xA1 is invalid in utf-8. -eofchar is not detected, because it comes later. - puts -nonewline $f A\xA1\x1A - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 -} -body { - set d [read $f] - binary scan $d H* hd - lappend hd [eof $f] - lappend hd [catch {read $f} msg] - lappend hd $msg - fconfigure $f -encoding iso8859-1 - lappend hd [read $f];# We changed encoding, so now we can read the \xA1 - close $f - set hd -} -cleanup { - removeFile io-75.7 -} -match glob -result {41 0 1 {error reading "*": illegal byte sequence} ¡} - test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] -- cgit v0.12 From 70e40dd53dda7e7fca32fc8aec28cf6504e7ffec Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 28 Feb 2023 12:16:10 +0000 Subject: Restore previous behavior for non-blocking mode, as for this mode the semantics of [read]/[gets] were not broken. This was the 'some agreement'. The change in line 8286 is necessary for both blocking and non-blocking mode: Whenver the encoding change we need to reset the CHANNEL_ENCODING_ERROR flag. --- generic/tclIO.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index c96a406..ae09690 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7588,6 +7588,10 @@ Tcl_Eof( ChannelState *statePtr = ((Channel *) chan)->state; /* State of real channel structure. */ + if (GotFlag(statePtr, CHANNEL_NONBLOCKING) + && GotFlag(statePtr, CHANNEL_ENCODING_ERROR) { + return 0; + } return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0; } @@ -8283,7 +8287,7 @@ Tcl_SetChannelOption( statePtr->inputEncodingFlags = TCL_ENCODING_START; statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; - ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA); + ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); UpdateInterest(chanPtr); return TCL_OK; } else if (HaveOpt(2, "-eofchar")) { -- cgit v0.12