From b62a3d44eaf5682d190fb17bc414e45ed3b11901 Mon Sep 17 00:00:00 2001 From: oehhar Date: Sun, 12 Nov 2023 18:55:01 +0000 Subject: Bug [c4eb46a1]: endless loop on gets, non blocking, profile strict, encoding error: remove non-blocking exit condition and add test case --- generic/tclIO.c | 13 +++++++++++-- tests/io.test | 25 ++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 6461909..c92fb64 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4918,8 +4918,17 @@ Tcl_GetsObj( } goto gotEOL; } else if (gs.bytesWrote == 0 - && GotFlag(statePtr, CHANNEL_ENCODING_ERROR) - && !GotFlag(statePtr, CHANNEL_NONBLOCKING)) { + && GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { + /* Ticket c4eb46a1 Harald Oehlmann 2023-11-12 debugging session. + * In non blocking mode we loop indifenitly on a decoding error in + * this while-loop. + * Removed the following from the upper condition: + * "&& !GotFlag(statePtr, CHANNEL_NONBLOCKING)" + * In case of an encoding error with leading correct bytes, we pass here + * two times, as gs.bytesWrote is not 0 on the first pass. This feels + * once to much, as the data is anyway not used. + */ + /* Set eol to the position that caused the encoding error, and then * continue to gotEOL, which stores the data that was decoded * without error to objPtr. This allows the caller to do something diff --git a/tests/io.test b/tests/io.test index 9f731ad..a6683c8 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9193,7 +9193,7 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-profile tcl8)} -setup { removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding, gets is not ignored (-profile strict)} -setup { +test io-75.6 {invalid utf-8 encoding, blocking gets is not ignored (-profile strict)} -setup { set fn [makeFile {} io-75.6] set f [open $fn w+] fconfigure $f -encoding binary @@ -9211,6 +9211,25 @@ test io-75.6 {invalid utf-8 encoding, gets is not ignored (-profile strict)} -se } -match glob -returnCodes 1 -result {error reading "file*":\ invalid or incomplete multibyte or wide character} +# TCL ticket c4eb46a196: non blocking case had endless loop, so test it +test io-75.6.2 {invalid utf-8 encoding, non blocking gets is not ignored (-profile strict)} -setup { + set fn [makeFile {} io-75.6.2] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is an incomplete byte sequence in utf-8 + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar {} \ + -translation lf -profile strict -blocking 0 +} -body { + gets $f +} -cleanup { + close $f + removeFile io-75.6.2 +} -match glob -returnCodes 1 -result {error reading "file*":\ + invalid or incomplete multibyte or wide character} + test io-75.7 { invalid utf-8 encoding read is not ignored (-profile strict) } -setup { @@ -9232,7 +9251,7 @@ test io-75.7 { } -match glob -result {1 {error reading "file*":\ invalid or incomplete multibyte or wide character}} -test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { +test io-75.8 {invalid utf-8 encoding eof first handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] fconfigure $f -encoding binary @@ -9254,7 +9273,7 @@ test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { removeFile io-75.8 } -result {41 1 {}} -test io-75.8.eoflater {invalid utf-8 encoding eof handling (-profile strict)} -setup { +test io-75.8.eoflater {invalid utf-8 encoding eof after handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] # This also configures the channel encoding profile as strict. -- cgit v0.12 From 44f9c28e418b785e842ac8b986daa9120d2a4b37 Mon Sep 17 00:00:00 2001 From: oehhar Date: Sun, 12 Nov 2023 19:32:07 +0000 Subject: bug [c4eb46a1]: fix was effective for test sequence "A\xC3B", but not for "A\x81". So add test io-75.6.1 with first sequence, io-75.6.2 is currently failing, as the gets does not return with an error. --- tests/io.test | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/tests/io.test b/tests/io.test index a6683c8..1078a50 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9212,8 +9212,27 @@ test io-75.6 {invalid utf-8 encoding, blocking gets is not ignored (-profile str invalid or incomplete multibyte or wide character} # TCL ticket c4eb46a196: non blocking case had endless loop, so test it +# The first fix was successful with the test data A\xC3B, but not with A\x81. So, test both +test io-75.6.1 {invalid utf-8 encoding "A xc3 B", non blocking gets is not ignored (-profile strict)} -setup { + set fn [makeFile {} io-75.6.1] + set f [open $fn w+] + fconfigure $f -encoding binary + # utf-8: \xC3 requires a 2nd byte > x80, but Date: Mon, 13 Nov 2023 12:48:06 +0000 Subject: Bug [c4eb46a1]: non-blocking gets fires the error on 2nd call when sequence is incomplete. Added some test cases. --- generic/tclIO.c | 6 ++++++ tests/io.test | 56 ++++++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index c92fb64..bc1b1c6 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4749,6 +4749,12 @@ Tcl_GetsObj( ResetFlag(statePtr, CHANNEL_BLOCKED); while (1) { if (dst >= dstEnd) { + /* + * In case of encoding errors, state gets flag + * CHANNEL_ENCODING_ERROR set in the call below. First, the + * EOF/EOL condition is checked, as we may have valid data with + * EOF/EOL before the encoding error. + */ if (FilterInputBytes(chanPtr, &gs) != 0) { goto restore; } diff --git a/tests/io.test b/tests/io.test index 1078a50..7e62e6b 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9193,7 +9193,7 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-profile tcl8)} -setup { removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding, blocking gets is not ignored (-profile strict)} -setup { +test io-75.6 {incomplete utf-8 encoding, blocking gets is not ignored (-profile strict)} -setup { set fn [makeFile {} io-75.6] set f [open $fn w+] fconfigure $f -encoding binary @@ -9211,9 +9211,7 @@ test io-75.6 {invalid utf-8 encoding, blocking gets is not ignored (-profile str } -match glob -returnCodes 1 -result {error reading "file*":\ invalid or incomplete multibyte or wide character} -# TCL ticket c4eb46a196: non blocking case had endless loop, so test it -# The first fix was successful with the test data A\xC3B, but not with A\x81. So, test both -test io-75.6.1 {invalid utf-8 encoding "A xc3 B", non blocking gets is not ignored (-profile strict)} -setup { +test io-75.6.1 {invalid utf-8 encoding, blocking gets is not ignored (-profile strict)} -setup { set fn [makeFile {} io-75.6.1] set f [open $fn w+] fconfigure $f -encoding binary @@ -9222,7 +9220,7 @@ test io-75.6.1 {invalid utf-8 encoding "A xc3 B", non blocking gets is not ignor flush $f seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar {} \ - -translation lf -profile strict -blocking 0 + -translation lf -profile strict } -body { gets $f } -cleanup { @@ -9231,8 +9229,48 @@ test io-75.6.1 {invalid utf-8 encoding "A xc3 B", non blocking gets is not ignor } -match glob -returnCodes 1 -result {error reading "file*":\ invalid or incomplete multibyte or wide character} -test io-75.6.2 {invalid utf-8 encoding, non blocking gets is not ignored (-profile strict)} -setup { - set fn [makeFile {} io-75.6.1] +test io-75.6.2 {invalid utf-8 encoding, blocking gets is not ignored (-profile strict), recover functionality} -setup { + set fn [makeFile {} io-75.6.2] + set f [open $fn w+] + fconfigure $f -encoding binary + # utf-8: \xC3 requires a 2nd byte > x80, but x80, but