diff options
| author | oehhar <harald.oehlmann@elmicron.de> | 2023-11-06 15:24:37 (GMT) |
|---|---|---|
| committer | oehhar <harald.oehlmann@elmicron.de> | 2023-11-06 15:24:37 (GMT) |
| commit | 44db33ce5944984f3308f258787d5aea1d172827 (patch) | |
| tree | aa1eed66b610f8a021564cd2a471084eaf821e56 | |
| parent | 3928c1068a7075ee5c025b58033fbeb21bb4bb61 (diff) | |
| parent | 0f91665f15fa0bdec4a9f472680f7d123a691067 (diff) | |
| download | tcl-44db33ce5944984f3308f258787d5aea1d172827.zip tcl-44db33ce5944984f3308f258787d5aea1d172827.tar.gz tcl-44db33ce5944984f3308f258787d5aea1d172827.tar.bz2 | |
Merge core-8-branch
| -rw-r--r-- | doc/filename.n | 1 | ||||
| -rw-r--r-- | doc/gets.n | 37 | ||||
| -rw-r--r-- | doc/puts.n | 6 | ||||
| -rw-r--r-- | doc/read.n | 66 | ||||
| -rw-r--r-- | tests/io.test | 2 | ||||
| -rw-r--r-- | tests/utfext.test | 8 | ||||
| -rw-r--r-- | tests/winFCmd.test | 4 |
7 files changed, 117 insertions, 7 deletions
diff --git a/doc/filename.n b/doc/filename.n index 801e346..d8a3364 100644 --- a/doc/filename.n +++ b/doc/filename.n @@ -121,6 +121,7 @@ extra backslashes are superfluous. .RE .TP \fBZipfs\fR +.RS On all platforms where \fBzipfs\fR support is enabled, paths within mounted ZIP archives begin with the string returned by the \fBzipfs root\fR command. Zipfs paths are case-sensitive on all platforms. @@ -47,6 +47,43 @@ produce the same results as if there were an input line consisting only of the end-of-line character(s). The \fBeof\fR and \fBfblocked\fR commands can be used to distinguish these three cases. +.SH "ENCODING ERRORS" +.PP +Encoding errors may exist, if the encoding profile \fBstrict\fR is used. +Encoding errors are special, as an eventual introspection or recovery is +possible by changing to an encoding which accepts the data. +An encoding error is reported by the POSIX error code \fBEILSEQ\fR. +The file pointer is unchanged in the error case. +.PP +Here is an example with an encoding error in UTF-8 encoding, which is then +introspected by a switch to the binary encoding. The test file contains a not +continued multi-byte sequence at position 1 (\fBA \\xC3 B\fR): +.PP +File creation for example +.CS +% set f [open test_A_195_B.txt wb]; puts -nonewline $f A\\xC3B; close $f +.CE +Encoding error example +.CS +% set f [open test_A_195_B.txt r] +file384b6a8 +% fconfigure $f -encoding utf-8 -profile strict +% catch {gets $f} e d +1 +% set d +-code 1 -level 0 +-errorstack {INNER {invokeStk1 gets file384b6a8}} +-errorcode {POSIX EILSEQ {invalid or incomplete multibyte or wide character}} +-errorinfo {...} -errorline 1 +% tell $f +0 +% fconfigure $f -encoding binary -profile strict +% gets $f +AÃB +.CE +Compared to \fBread\fR, any already decoded data is not consumed. +The file position is still at 0 and the recovery \fBgets\fR returns also the +already well decoded leading data. .SH "EXAMPLE" This example reads a file one line at a time and prints it out with the current line number attached to the start of each line. @@ -62,6 +62,12 @@ To avoid wasting memory, nonblocking I/O should normally be used in an event-driven fashion with the \fBfileevent\fR command (do not invoke \fBputs\fR unless you have recently been notified via a file event that the channel is ready for more output data). +.SH "ENCODING ERRORS" +.PP +Encoding errors may exist, if the encoding profile \fBstrict\fR is used. +\fBputs\fR writes out data until an encoding error occurs and fails with +POSIX error code \fBEILSEQ\fR. + .SH EXAMPLES .PP Write a short message to the console (or wherever \fBstdout\fR is @@ -50,6 +50,72 @@ newline characters according to the \fB\-translation\fR option for the channel. See the \fBfconfigure\fR manual entry for a discussion on ways in which \fBfconfigure\fR will alter input. +.SH "ENCODING ERRORS" +.PP +Encoding errors may exist, if the encoding profile \fBstrict\fR is used. +Encoding errors are special, as an eventual introspection or recovery is +possible by changing to an encoding (or encoding profile), which accepts +the data. +An encoding error is reported by the POSIX error code \fBEILSEQ\fR. +.PP +In blocking mode, the error is directly thrown, even, if there is a +leading decodable data portion. +The file pointer is advanced just before the encoding error. +An eventual well decoded data chunk before the encoding error is lost. +It is proposed to return this portion within the additional key \fB-data\fR +in the error dictionary. +.PP +In non blocking mode, first, any data without encoding error is returned +(without error state). +In the next call, no data is returned and the \fBEILSEQ\fR error state is set. +.PP +Here is an example with an encoding error in UTF-8 encoding, which is then +introspected by a switch to the binary encoding. The test file contains a not +continued multi-byte sequence at position 1 (\fBA \\xC3 B\fR): +.PP +File creation for examples +. +.CS +% set f [open test_A_195_B.txt wb]; puts -nonewline $f A\\xC3B; close $f +.CE +Blocking example +. +.CS +% set f [open test_A_195_B.txt r] +file35a65a0 +% fconfigure $f -encoding utf-8 -profile strict -blocking 1 +% catch {read $f} e d +1 +% set d +-code 1 -level 0 +-errorstack {INNER {invokeStk1 read file35a65a0}} +-errorcode {POSIX EILSEQ {invalid or incomplete multibyte or wide character}} +-errorinfo {...} -errorline 1 +% tell $f +1 +% fconfigure $f -encoding binary -profile strict +% read $f +ÃB +% close $f +.CE +Non blocking example +. +.CS +% set f [open test_A_195_B.txt r] +file35a65a0 +% fconfigure $f -encoding utf-8 -profile strict -blocking 0 +% read $f +A +% tell $f +1 +% catch {read $f} e d +1 +% set d +-code 1 -level 0 +-errorstack {INNER {invokeStk1 read file384b228}} +-errorcode {POSIX EILSEQ {invalid or incomplete multibyte or wide character}} +-errorinfo {...} -errorline 1 +.CE .SH "USE WITH SERIAL PORTS" '\" Note: this advice actually applies to many versions of Tcl .PP diff --git a/tests/io.test b/tests/io.test index 7826be4..997dadd 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9299,7 +9299,7 @@ test io-strict-multibyte-eof { } -match glob -result {1 {error reading "*":\ invalid or incomplete multibyte or wide character} {}} -test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup { +test io-75.9 {unrepresentable character write throws error in strict profile} -setup { set fn [makeFile {} io-75.9] set f [open $fn w+] fconfigure $f -encoding iso8859-1 -profile strict diff --git a/tests/utfext.test b/tests/utfext.test index ce50666..d2da50b 100644 --- a/tests/utfext.test +++ b/tests/utfext.test @@ -41,12 +41,12 @@ proc testbasic {direction enc hexin hexout {flags {start end}}} { set result [string range "$out$filler" 0 $dstlen-1] test $cmd-$enc-$hexin-[join $flags -] "$cmd - $enc - $hexin - $flags" -body \ [list testencoding $cmd $enc $in $flags {} $dstlen] \ - -result [list ok {} $result] + -result [list ok {} $result] -constraints testencoding foreach profile [encoding profiles] { set flags2 [linsert $flags end profile$profile] test $cmd-$enc-$hexin-[join $flags2 -] "$cmd - $enc - $hexin - $flags" -body \ [list testencoding $cmd $enc $in $flags2 {} $dstlen] \ - -result [list ok {} $result] + -result [list ok {} $result] -constraints testencoding } } @@ -67,7 +67,7 @@ foreach {enc utfhex hex} $utfExtMap { # Test for insufficient space test xx-bufferoverflow {buffer overflow Tcl_ExternalToUtf} -body { testencoding Tcl_UtfToExternal ucs-2 A {start end} {} 1 -} -result [list nospace {} \xFF] +} -result [list nospace {} \xFF] -constraints testencoding # Another bug - char limit not obeyed # % set cv 2 @@ -79,7 +79,7 @@ test TableToUtf-bug-5be203d6ca {Bug 5be203d6ca - truncated prefix in table encod lassign [testencoding Tcl_ExternalToUtf shiftjis $src {start} 0 16 srcRead dstWritten charsWritten] buf set result [list [testencoding Tcl_ExternalToUtf shiftjis $src {start} 0 16 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten] lappend result {*}[list [testencoding Tcl_ExternalToUtf shiftjis [string range $src $srcRead end] {end} 0 10 srcRead dstWritten charsWritten] $srcRead $dstWritten $charsWritten] -} -result [list [list multibyte 0 \xEF\xBC\x90\xEF\xBC\x91\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF] 4 6 2 [list ok 0 \xC2\x82\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF] 1 2 1] +} -result [list [list multibyte 0 \xEF\xBC\x90\xEF\xBC\x91\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF] 4 6 2 [list ok 0 \xC2\x82\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF] 1 2 1] -constraints testencoding ::tcltest::cleanupTests diff --git a/tests/winFCmd.test b/tests/winFCmd.test index 9b5e67e..ac5ae4e 100644 --- a/tests/winFCmd.test +++ b/tests/winFCmd.test @@ -48,7 +48,7 @@ proc contents {file} { proc cleanupRecurse {args} { # Assumes no loops via links! # Need to change permissions BEFORE deletion - testchmod 0o777 {*}$args + catch {testchmod 0o777 {*}$args} foreach victim $args { if {[file isdirectory $victim]} { cleanupRecurse {*}[glob -nocomplain -directory $victim td* tf* Test*] @@ -454,7 +454,7 @@ test winFCmd-2.11 {TclpCopyFile: CopyFile succeeds} -setup { } -result {tf1 tf1} test winFCmd-2.12 {TclpCopyFile: CopyFile succeeds} -setup { cleanup -} -constraints {win testfile} -body { +} -constraints {win testfile testchmod} -body { createfile tf1 tf1 file attribute tf1 -readonly 1 testfile cp tf1 tf2 |
