diff options
-rw-r--r-- | generic/tclIO.c | 59 | ||||
-rw-r--r-- | generic/tclIOCmd.c | 25 | ||||
-rw-r--r-- | tests/io.test | 474 |
3 files changed, 99 insertions, 459 deletions
diff --git a/generic/tclIO.c b/generic/tclIO.c index 880b669..b12adf6 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4645,7 +4645,6 @@ Tcl_GetsObj( /* State info for channel */ ChannelBuffer *bufPtr; int inEofChar, skip, copiedTotal, oldFlags, oldRemoved; - int reportError = 0; int oldLength; Tcl_Encoding encoding; char *dst, *dstEnd, *eol, *eof; @@ -4654,7 +4653,6 @@ Tcl_GetsObj( if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { UpdateInterest(chanPtr); Tcl_SetErrno(EILSEQ); - ResetFlag(statePtr, CHANNEL_ENCODING_ERROR); return TCL_INDEX_NONE; } @@ -4914,19 +4912,6 @@ Tcl_GetsObj( goto done; } goto gotEOL; - } else if (gs.bytesWrote == 0 - && GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { - /* Set eol to the position that caused the encoding error, and then - * coninue to gotEOL, which stores the data that was decoded - * without error to objPtr. This allows the caller to do something - * useful with the data decoded so far, and also results in the - * position of the file being the first byte that was not - * succesfully decoded, allowing further processing at exactly that - * point, if desired. - */ - eol = dstEnd; - reportError = 1; - goto gotEOL; } dst = dstEnd; } @@ -4970,16 +4955,7 @@ Tcl_GetsObj( Tcl_SetObjLength(objPtr, eol - objPtr->bytes); CommonGetsCleanup(chanPtr); ResetFlag(statePtr, CHANNEL_BLOCKED); - if (reportError) { - ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR|CHANNEL_ENCODING_ERROR); - /* reset CHANNEL_ENCODING_ERROR to afford a chance to reconfigure - * the channel and try again - */ - Tcl_SetErrno(EILSEQ); - copiedTotal = -1; - } else { - copiedTotal = gs.totalChars + gs.charsWrote - skip; - } + copiedTotal = gs.totalChars + gs.charsWrote - skip; goto done; /* @@ -6007,9 +5983,8 @@ DoReadChars( } if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { - /* TODO: UpdateInterest not needed here? */ + /* TODO: We don't need this call? */ UpdateInterest(chanPtr); - Tcl_SetErrno(EILSEQ); return -1; } @@ -6025,7 +6000,7 @@ DoReadChars( assert(statePtr->inputEncodingFlags & TCL_ENCODING_END); assert(!GotFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR)); - /* TODO: UpdateInterest not needed here? */ + /* TODO: We don't need this call? */ UpdateInterest(chanPtr); return 0; } @@ -6039,7 +6014,7 @@ DoReadChars( } ResetFlag(statePtr, CHANNEL_BLOCKED|CHANNEL_EOF); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; - /* TODO: UpdateInterest not needed here? */ + /* TODO: We don't need this call? */ UpdateInterest(chanPtr); return 0; } @@ -6070,7 +6045,7 @@ DoReadChars( } /* - * Recycle current buffer if empty. + * If the current buffer is empty recycle it. */ bufPtr = statePtr->inQueueHead; @@ -6083,24 +6058,6 @@ DoReadChars( statePtr->inQueueTail = NULL; } } - - /* - * If CHANNEL_ENCODING_ERROR and CHANNEL_STICKY_EOF are both set, - * then CHANNEL_ENCODING_ERROR was caused by data that occurred - * after the EOF character was encountered, so it doesn't count as - * a real error. - */ - - if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) - && !GotFlag(statePtr, CHANNEL_STICKY_EOF) - && !GotFlag(statePtr, CHANNEL_NONBLOCKING)) { - /* Channel is synchronous. Return an error so that callers - * like [read] can return an error. - */ - Tcl_SetErrno(EILSEQ); - copied = -1; - goto finish; - } } if (copiedNow < 0) { @@ -6129,7 +6086,6 @@ DoReadChars( } } -finish: /* * Failure to fill a channel buffer may have left channel reporting a * "blocked" state, but so long as we fulfilled the request here, the @@ -6793,14 +6749,11 @@ TranslateInputEOL( * EOF character was seen in EOL translated range. Leave current file * position pointing at the EOF character, but don't store the EOF * character in the output string. - * - * If CHANNEL_ENCODING_ERROR is set, it can only be because of data - * encountered after the EOF character, so it is nonsense. Unset it. */ SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF); statePtr->inputEncodingFlags |= TCL_ENCODING_END; - ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR|CHANNEL_ENCODING_ERROR); + ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR); } } diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 507e06c..e8a534f 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -296,9 +296,6 @@ Tcl_GetsObjCmd( int lineLen; /* Length of line just read. */ int mode; /* Mode in which channel is opened. */ Tcl_Obj *linePtr, *chanObjPtr; - /* - Tcl_Obj *resultDictPtr, *returnOptsPtr; - */ int code = TCL_OK; if ((objc != 2) && (objc != 3)) { @@ -321,6 +318,7 @@ Tcl_GetsObjCmd( lineLen = Tcl_GetsObj(chan, linePtr); if (lineLen < 0) { if (!Tcl_Eof(chan) && !Tcl_InputBlocked(chan)) { + Tcl_DecrRefCount(linePtr); /* * TIP #219. @@ -334,15 +332,6 @@ Tcl_GetsObjCmd( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } - /* - resultDictPtr = Tcl_NewDictObj(); - Tcl_DictObjPut(NULL, resultDictPtr, Tcl_NewStringObj("read", -1) - , linePtr); - returnOptsPtr = Tcl_NewDictObj(); - Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-result", -1) - , resultDictPtr); - Tcl_SetReturnOptions(interp, returnOptsPtr); - */ code = TCL_ERROR; goto done; } @@ -393,9 +382,6 @@ Tcl_ReadObjCmd( int charactersRead; /* How many characters were read? */ int mode; /* Mode in which channel is opened. */ Tcl_Obj *resultPtr, *chanObjPtr; - /* - Tcl_Obj *resultDictPtr, *returnOptsPtr; - */ if ((objc != 2) && (objc != 3)) { Interp *iPtr; @@ -484,17 +470,8 @@ Tcl_ReadObjCmd( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } - /* - resultDictPtr = Tcl_NewDictObj(); - Tcl_DictObjPut(NULL, resultDictPtr, Tcl_NewStringObj("read", -1) - , resultPtr); - returnOptsPtr = Tcl_NewDictObj(); - Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-result", -1) - , resultDictPtr); TclChannelRelease(chan); Tcl_DecrRefCount(resultPtr); - Tcl_SetReturnOptions(interp, returnOptsPtr); - */ return TCL_ERROR; } diff --git a/tests/io.test b/tests/io.test index 0f47a8e..4578a93 100644 --- a/tests/io.test +++ b/tests/io.test @@ -1547,53 +1547,19 @@ test io-12.8 {ReadChars: multibyte chars split} { close $f scan [string index $in end] %c } 160 - - -apply [list {} { - set template { - test io-12.9.@variant@ {ReadChars: multibyte chars split, default (strict)} -body { - set res {} - set f [open $path(test1) w] - fconfigure $f -translation binary - puts -nonewline $f [string repeat a 9]\xC2 - close $f - set f [open $path(test1)] - fconfigure $f -encoding utf-8 @strict@ -buffersize 10 - set status [catch {read $f} cres copts] - #set in [dict get $copts -result] - #lappend res $in - lappend res $status $cres - set status [catch {read $f} cres copts] - #set in [dict get $copts -result] - #lappend res $in - lappend res $status $cres - set res - } -cleanup { - catch {close $f} - } -match glob\ - } - - #append template {\ - # -result {{read aaaaaaaaa} 1\ - # {error reading "*": illegal byte sequence}\ - # {read {}} 1 {error reading "*": illegal byte sequence}} - #} - - append template {\ - -result {1\ - {error reading "*": illegal byte sequence}\ - 1 {error reading "*": illegal byte sequence}} - } - - # strict encoding may be the default in Tcl 9, but in 8 it is not - foreach variant {encodingstrict} strict {{-encodingprofile strict}} { - set script [string map [ - list @variant@ $variant @strict@ $strict] $template] - uplevel 1 $script - } -} [namespace current]] - - +test io-12.9 {ReadChars: multibyte chars split} -body { + set f [open $path(test1) w] + fconfigure $f -translation binary + puts -nonewline $f [string repeat a 9]\xC2 + close $f + set f [open $path(test1)] + fconfigure $f -encoding utf-8 -buffersize 10 + set in [read $f] + close $f + scan [string index $in end] %c +} -cleanup { + catch {close $f} +} -result 194 test io-12.10 {ReadChars: multibyte chars split} -body { set f [open $path(test1) w] fconfigure $f -translation binary @@ -9177,136 +9143,68 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-encodingprofile tcl8)} -s removeFile io-75.5 } -result 4181 +test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { + set fn [makeFile {} io-75.6] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 +} -body { + set d [read $f] + binary scan $d H* hd + lappend hd [catch {read $f} msg] + close $f + lappend hd $msg +} -cleanup { + removeFile io-75.6 +} -match glob -result {41 1 {error reading "*": illegal byte sequence}} -apply [list {} { - - - set test { - test io-75.6 {invalid utf-8 encoding read is not ignored (-encodingprofile strict)} -setup { - set hd {} - set fn [makeFile {} io-75.6] - set f [open $fn w+] - fconfigure $f -encoding binary - # \x81 is invalid in utf-8 - puts -nonewline $f A\x81 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile strict - } -body { - set status [catch {read $f} cres copts] - #set d [dict get $copts -result read] - #binary scan $d H* hd - lappend hd $status $cres - } -cleanup { - close $f - removeFile io-75.6 - } -match glob\ - } - - #append test {\ - # -result {41 1 {error reading "*": illegal byte sequence}} - #} - - append test {\ - -result {1 {error reading "*": illegal byte sequence}} - } - - uplevel 1 $test - - set test { - test io-75.7 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -setup { - set hd {} - set fn [makeFile {} io-75.7] - set f [open $fn w+] - fconfigure $f -encoding binary - # \xA1 is invalid in utf-8. -eofchar is not detected, because it comes later. - puts -nonewline $f A\xA1\x1A - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict - } -body { - set status [catch {read $f} cres copts] - #set d [dict get $copts -result read] - #binary scan $d H* hd - lappend hd [eof $f] - lappend hd $status - lappend hd $cres - fconfigure $f -encoding iso8859-1 - lappend hd [read $f];# We changed encoding, so now we can read the \xA1 - close $f - set hd - } -cleanup { - removeFile io-75.7 - } -match glob\ - } - - #append test {\ - # -result {41 0 1 {error reading "*": illegal byte sequence} ¡} - #} - - append test {\ - -result {0 1 {error reading "*": illegal byte sequence} ¡} - } - - uplevel 1 $test - - -} [namespace current]] - - - -test io-75.8.incomplete { - incomplete uft-8 char after eof char is not an error (-encodingprofile strict) -} -setup { - set hd {} - set fn [makeFile {} io-75.8] +test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { + set fn [makeFile {} io-75.7] set f [open $fn w+] fconfigure $f -encoding binary - # \x81 is invalid and also incomplete utf-8 data, but because the eof - # character \x1A appears first, it's not an error. - puts -nonewline $f A\x1A\x81 + # \xA1 is invalid in utf-8. -eofchar is not detected, because it comes later. + puts -nonewline $f A\xA1\x1A flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict + fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 } -body { set d [read $f] binary scan $d H* hd lappend hd [eof $f] - # there should be no error on additional reads - lappend hd [read $f] + lappend hd [catch {read $f} msg] + lappend hd $msg + fconfigure $f -encoding iso8859-1 + lappend hd [read $f];# We changed encoding, so now we can read the \xA1 close $f set hd } -cleanup { - removeFile io-75.8 -} -result {41 1 {}} + removeFile io-75.7 +} -match glob -result {41 0 1 {error reading "*": illegal byte sequence} ¡} - -test io-75.8.invalid {invalid utf-8 after eof char is not an error (-encodingprofile strict)} -setup { - set res {} +test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] fconfigure $f -encoding binary - # \xc0\x80 is invalid utf-8 data, but because the eof character \x1A - # appears first, it's not an error. - puts -nonewline $f A\x1a\xc0\x80 + # \x81 is invalid in utf-8, but since \x1A comes first, -eofchar takes precedence. + puts -nonewline $f A\x1A\x81 flush $f seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict } -body { set d [read $f] - foreach char [split $d {}] { - lappend res [format %x [scan $char %c]] - } - lappend res [eof $f] - # there should be no error on additional reads - lappend res [read $f] + binary scan $d H* hd + lappend hd [eof $f] + lappend hd [read $f] close $f - set res + set hd } -cleanup { removeFile io-75.8 } -result {41 1 {}} - test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup { set fn [makeFile {} io-75.9] set f [open $fn w+] @@ -9321,7 +9219,9 @@ test io-75.9 {unrepresentable character write passes and is replaced by ?} -setu removeFile io-75.9 } -match glob -result [list {A} {error writing "*": illegal byte sequence}] - +# Incomplete sequence test. +# This error may IMHO only be detected with the close. +# But the read already returns the incomplete sequence. test io-75.10 {incomplete multibyte encoding read is ignored} -setup { set fn [makeFile {} io-75.10] set f [open $fn w+] @@ -9329,7 +9229,7 @@ test io-75.10 {incomplete multibyte encoding read is ignored} -setup { puts -nonewline $f A\xC0 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -encodingprofile tcl8 -buffering none + fconfigure $f -encoding utf-8 -buffering none } -body { set d [read $f] close $f @@ -9338,135 +9238,39 @@ test io-75.10 {incomplete multibyte encoding read is ignored} -setup { } -cleanup { removeFile io-75.10 } -result 41c0 +# The current result returns the orphan byte as byte. +# This may be expected due to special utf-8 handling. +# As utf-8 has a special treatment in multi-byte decoding, also test another +# one. +test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { + set fn [makeFile {} io-75.11] + set f [open $fn w+] + fconfigure $f -encoding binary + # In shiftjis, \x81 starts a two-byte sequence. + # But 2nd byte \xFF is not allowed + puts -nonewline $f A\x81\xFFA + flush $f + seek $f 0 + fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -strictencoding 1 +} -body { + set d [read $f] + binary scan $d H* hd + lappend hd [catch {set d [read $f]} msg] + lappend hd $msg +} -cleanup { + close $f + removeFile io-75.11 +} -match glob -result {41 1 {error reading "*": illegal byte sequence}} -apply [list {} { - - set test { - test io-75.10_strict {incomplete multibyte encoding read is an error} -setup { - set res {} - set fn [makeFile {} io-75.10] - set f [open $fn w+] - fconfigure $f -encoding binary - puts -nonewline $f A\xC0 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -encodingprofile strict -buffering none - } -body { - set status [catch {read $f} cres copts] - - #set d [dict get $copts -result read] - #binary scan $d H* hd - #lappend res $hd $cres - lappend res $cres - - chan configure $f -encoding iso8859-1 - - set d [read $f] - binary scan $d H* hd - lappend res $hd - close $f - return $res - } -cleanup { - removeFile io-75.10 - } -match glob\ - } - - #append test {\ - # -result {41 {error reading "*": illegal byte sequence} c0} - #} - - append test {\ - -result {{error reading "*": illegal byte sequence} c0} - } - - uplevel 1 $test - - - - set test { - # As utf-8 has a special treatment in multi-byte decoding, also test another - # one. - test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { - set hd {} - set fn [makeFile {} io-75.11] - set f [open $fn w+] - fconfigure $f -encoding binary - # In shiftjis, \x81 starts a two-byte sequence. - # But 2nd byte \xFF is not allowed - puts -nonewline $f A\x81\xFFA - flush $f - seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" \ - -translation lf -encodingprofile strict - } -body { - set status [catch {read $f} cres copts] - #set d [dict get $copts -result read] - #binary scan $d H* hd - lappend hd $status - lappend hd $cres - } -cleanup { - close $f - removeFile io-75.11 - } -match glob - } - - #append test {\ - # -result {41 1 {error reading "*": illegal byte sequence}} - #} - - append test {\ - -result {1 {error reading "*": illegal byte sequence}} - } - - - set test { - test io-75.12 {invalid utf-8 encoding read is an error} -setup { - set hd {} - set res {} - set fn [makeFile {} io-75.12] - set f [open $fn w+] - fconfigure $f -encoding binary - puts -nonewline $f A\x81 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf \ - -encodingprofile strict - } -body { - set status [catch {read $f} cres copts] - #set d [dict get $copts -result read] - #binary scan $d H* hd - #lappend res $hd - lappend res $status $cres - return $res - } -cleanup { - catch {close $f} - removeFile io-75.12 - } -match glob\ - } - - #append test {\ - # -result {41 1 {error reading "*": illegal byte sequence}} - #} - - - append test {\ - -result {1 {error reading "*": illegal byte sequence}} - } - - uplevel 1 $test -} [namespace current]] - - -test io-75.12_ignore {invalid utf-8 encoding read is ignored} -setup { +test io-75.12 {invalid utf-8 encoding read is ignored} -setup { set fn [makeFile {} io-75.12] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar {} \ - -translation lf -encodingprofile tcl8 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf } -body { set d [read $f] close $f @@ -9475,121 +9279,27 @@ test io-75.12_ignore {invalid utf-8 encoding read is ignored} -setup { } -cleanup { removeFile io-75.12 } -result 4181 - - -apply [list {} { - - set test { - test io-75.13 {invalid utf-8 encoding read is not ignored (-encodingprofile strict)} -setup { - set hd {} - set fn [makeFile {} io-75.13] - set f [open $fn w+] - fconfigure $f -encoding binary - # \x81 is invalid in utf-8 - puts -nonewline $f A\x81 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" \ - -translation lf -encodingprofile strict - } -body { - set status [catch {read $f} cres copts] - #set d [dict get $copts -result read] - #binary scan $d H* hd - lappend hd $status - lappend hd $cres - } -cleanup { - catch {close $f} - removeFile io-75.13 - } -match glob\ - } - - #append test {\ - # -result {41 1 {error reading "*": illegal byte sequence}} - #} - - append test {\ - -result {1 {error reading "*": illegal byte sequence}} - } - - uplevel 1 $test - - set test { - } - -} [namespace current]] - - -test io-75.14 { - invalid utf-8 encoding [gets] continues in non-strict mode after error -} -setup { - set res {} - set fn [makeFile {} io-75.14] +test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { + set fn [makeFile {} io-75.13] set f [open $fn w+] - fconfigure $f -translation binary - # \xc0 is invalid in utf-8 - puts -nonewline $f a\nb\xc0\nc\n + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f "A\x81" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf -encodingprofile strict + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 } -body { - lappend res [gets $f] - set status [catch {gets $f} cres copts] - lappend res $status $cres - chan configure $f -encodingprofile tcl8 - lappend res [gets $f] - lappend res [gets $f] - close $f - return $res + set d [read $f] + binary scan $d H* hd + lappend hd [catch {read $f} msg] + close $f + lappend hd $msg } -cleanup { - removeFile io-75.14 -} -match glob -result {a 1 {error reading "*": illegal byte sequence} bÀ c} - - - -apply [list {} { - set test { - test io-75.15 {invalid utf-8 encoding strict gets should not hang} -setup { - set res {} - set fn [makeFile {} io-75.15] - set chan [open $fn w+] - fconfigure $chan -encoding binary - # This is not valid UTF-8 - puts $chan hello\nAB\xc0\x40CD\nEFG - close $chan - } -body { - #Now try to read it with [gets] - set chan [open $fn] - fconfigure $chan -encoding utf-8 -encodingprofile strict - lappend res [gets $chan] - set status [catch {gets $chan} cres copts] - lappend res $status $cres - set status [catch {gets $chan} cres copts] - lappend res $status $cres - #lappend res [dict get $copts -result] - chan configur $chan -encoding binary - foreach char [split [read $chan 2] {}] { - lappend res [format %x [scan $char %c]] - } - return $res - } -cleanup { - close $chan - removeFile io-75.15 - } -match glob\ - } + removeFile io-75.13 +} -match glob -result {41 1 {error reading "*": illegal byte sequence}} - #append test {\ - # -result {hello 1 {error reading "*": illegal byte sequence}\ - # 1 {error reading "*": illegal byte sequence} {read AB} c0 40} - #} - - append test {\ - -result {hello 1 {error reading "*": illegal byte sequence}\ - 1 {error reading "*": illegal byte sequence} c0 40} - } - - uplevel 1 $test +# ### ### ### ######### ######### ######### -} [namespace current]] test io-76.0 {channel modes} -setup { |